rumale 0.22.2 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +5 -2
- data/.github/workflows/coverage.yml +30 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +45 -8
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +76 -96
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +6 -1
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
- data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
- data/lib/rumale/ensemble/voting_classifier.rb +126 -0
- data/lib/rumale/ensemble/voting_regressor.rb +82 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +1 -1
- data/lib/rumale/linear_model/lasso.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +66 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +71 -34
- data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/validation.rb +12 -0
- data/lib/rumale/version.rb +1 -1
- metadata +13 -6
- data/.travis.yml +0 -17
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_binarizer'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module KernelMachine
|
9
|
+
# KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
|
10
|
+
# It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale'
|
15
|
+
#
|
16
|
+
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
17
|
+
# kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
|
18
|
+
# kridge.fit(kernel_mat_train, traininig_values)
|
19
|
+
#
|
20
|
+
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
21
|
+
# results = kridge.predict(kernel_mat_test)
|
22
|
+
class KernelRidgeClassifier
|
23
|
+
include Base::BaseEstimator
|
24
|
+
include Base::Classifier
|
25
|
+
|
26
|
+
# Return the class labels.
|
27
|
+
# @return [Numo::Int32] (size: n_classes)
|
28
|
+
attr_reader :classes
|
29
|
+
|
30
|
+
# Return the weight vector.
|
31
|
+
# @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
|
32
|
+
attr_reader :weight_vec
|
33
|
+
|
34
|
+
# Create a new regressor with kernel ridge classifier.
|
35
|
+
#
|
36
|
+
# @param reg_param [Float/Numo::DFloat] The regularization parameter.
|
37
|
+
def initialize(reg_param: 1.0)
|
38
|
+
@params = {}
|
39
|
+
@params[:reg_param] = reg_param
|
40
|
+
@classes = nil
|
41
|
+
@weight_vec = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# Fit the model with given training data.
|
45
|
+
#
|
46
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
47
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
48
|
+
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
49
|
+
# @return [KernelRidgeClassifier] The learned classifier itself.
|
50
|
+
def fit(x, y)
|
51
|
+
x = check_convert_sample_array(x)
|
52
|
+
y = check_convert_label_array(y)
|
53
|
+
check_sample_label_size(x, y)
|
54
|
+
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
55
|
+
raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
56
|
+
|
57
|
+
@encoder = Rumale::Preprocessing::LabelBinarizer.new
|
58
|
+
y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
|
59
|
+
@classes = Numo::NArray[*@encoder.classes]
|
60
|
+
|
61
|
+
n_samples = x.shape[0]
|
62
|
+
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
|
63
|
+
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
|
64
|
+
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Calculate confidence scores for samples.
|
69
|
+
#
|
70
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
71
|
+
# The kernel matrix between testing samples and training samples to predict values.
|
72
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
73
|
+
def decision_function(x)
|
74
|
+
x = check_convert_sample_array(x)
|
75
|
+
x.dot(@weight_vec)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Predict class labels for samples.
|
79
|
+
#
|
80
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
81
|
+
# The kernel matrix between testing samples and training samples to predict the labels.
|
82
|
+
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
83
|
+
def predict(x)
|
84
|
+
x = check_convert_sample_array(x)
|
85
|
+
scores = decision_function(x)
|
86
|
+
n_samples, n_classes = scores.shape
|
87
|
+
label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
|
88
|
+
@classes[label_ids].dup
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -152,7 +152,7 @@ module Rumale
|
|
152
152
|
|
153
153
|
if @classes.size > 2
|
154
154
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
155
|
-
return (probs.transpose / probs.sum(axis: 1)).transpose
|
155
|
+
return (probs.transpose / probs.sum(axis: 1)).transpose.dup
|
156
156
|
end
|
157
157
|
|
158
158
|
n_samples, = x.shape
|
@@ -81,7 +81,7 @@ module Rumale
|
|
81
81
|
# Fit the model with given training data.
|
82
82
|
#
|
83
83
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
84
|
-
# @param y [Numo::
|
84
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
85
85
|
# @return [ElasticNet] The learned regressor itself.
|
86
86
|
def fit(x, y)
|
87
87
|
x = check_convert_sample_array(x)
|
@@ -77,7 +77,7 @@ module Rumale
|
|
77
77
|
# Fit the model with given training data.
|
78
78
|
#
|
79
79
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
80
|
-
# @param y [Numo::
|
80
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
81
81
|
# @return [Lasso] The learned regressor itself.
|
82
82
|
def fit(x, y)
|
83
83
|
x = check_convert_sample_array(x)
|
@@ -1,12 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
3
5
|
require 'rumale/linear_model/base_sgd'
|
4
6
|
require 'rumale/base/regressor'
|
5
7
|
|
6
8
|
module Rumale
|
7
9
|
module LinearModel
|
8
10
|
# LinearRegression is a class that implements ordinary least square linear regression
|
9
|
-
# with stochastic gradient descent (SGD) optimization
|
11
|
+
# with stochastic gradient descent (SGD) optimization,
|
12
|
+
# singular value decomposition (SVD), or L-BFGS optimization.
|
10
13
|
#
|
11
14
|
# @example
|
12
15
|
# estimator =
|
@@ -41,31 +44,32 @@ module Rumale
|
|
41
44
|
#
|
42
45
|
# @param learning_rate [Float] The initial value of learning rate.
|
43
46
|
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
44
|
-
# If solver
|
47
|
+
# If solver is not 'sgd', this parameter is ignored.
|
45
48
|
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
46
49
|
# If nil is given, the decay sets to 'learning_rate'.
|
47
|
-
# If solver
|
50
|
+
# If solver is not 'sgd', this parameter is ignored.
|
48
51
|
# @param momentum [Float] The momentum factor.
|
49
|
-
# If solver
|
52
|
+
# If solver is not 'sgd', this parameter is ignored.
|
50
53
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
51
54
|
# @param bias_scale [Float] The scale of the bias term.
|
52
55
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
53
56
|
# how many times the whole data is given to the training process.
|
54
|
-
# If solver
|
57
|
+
# If solver is 'svd', this parameter is ignored.
|
55
58
|
# @param batch_size [Integer] The size of the mini batches.
|
56
|
-
# If solver
|
59
|
+
# If solver is not 'sgd', this parameter is ignored.
|
57
60
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
58
|
-
# If solver
|
59
|
-
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or '
|
60
|
-
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the '
|
61
|
+
# If solver is 'svd', this parameter is ignored.
|
62
|
+
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd' or 'lbfgs').
|
63
|
+
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
|
61
64
|
# 'sgd' uses the stochastic gradient descent optimization.
|
62
65
|
# 'svd' performs singular value decomposition of samples.
|
66
|
+
# 'lbfgs' uses the L-BFGS method for optimization.
|
63
67
|
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
64
68
|
# If nil is given, the method does not execute in parallel.
|
65
69
|
# If zero or less is given, it becomes equal to the number of processors.
|
66
|
-
# This parameter is ignored if the Parallel gem is not loaded.
|
70
|
+
# This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
|
67
71
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
68
|
-
# If solver
|
72
|
+
# If solver is 'svd', this parameter is ignored.
|
69
73
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
70
74
|
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
71
75
|
fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
|
@@ -80,9 +84,9 @@ module Rumale
|
|
80
84
|
super()
|
81
85
|
@params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
|
82
86
|
@params[:solver] = if solver == 'auto'
|
83
|
-
|
87
|
+
enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
|
84
88
|
else
|
85
|
-
solver
|
89
|
+
solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
|
86
90
|
end
|
87
91
|
@params[:decay] ||= @params[:learning_rate]
|
88
92
|
@params[:random_seed] ||= srand
|
@@ -95,15 +99,17 @@ module Rumale
|
|
95
99
|
# Fit the model with given training data.
|
96
100
|
#
|
97
101
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
98
|
-
# @param y [Numo::
|
102
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
99
103
|
# @return [LinearRegression] The learned regressor itself.
|
100
104
|
def fit(x, y)
|
101
105
|
x = check_convert_sample_array(x)
|
102
106
|
y = check_convert_tvalue_array(y)
|
103
107
|
check_sample_tvalue_size(x, y)
|
104
108
|
|
105
|
-
if @params[:solver] == 'svd' && enable_linalg?
|
109
|
+
if @params[:solver] == 'svd' && enable_linalg?(warning: false)
|
106
110
|
fit_svd(x, y)
|
111
|
+
elsif @params[:solver] == 'lbfgs'
|
112
|
+
fit_lbfgs(x, y)
|
107
113
|
else
|
108
114
|
fit_sgd(x, y)
|
109
115
|
end
|
@@ -124,24 +130,46 @@ module Rumale
|
|
124
130
|
|
125
131
|
def fit_svd(x, y)
|
126
132
|
x = expand_feature(x) if fit_bias?
|
127
|
-
|
128
133
|
w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
|
134
|
+
@weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
|
135
|
+
end
|
129
136
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
+
def fit_lbfgs(x, y)
|
138
|
+
fnc = proc do |w, x, y| # rubocop:disable Lint/ShadowingOuterLocalVariable
|
139
|
+
n_samples, n_features = x.shape
|
140
|
+
w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
|
141
|
+
z = x.dot(w.transpose)
|
142
|
+
d = z - y
|
143
|
+
loss = (d**2).sum.fdiv(n_samples)
|
144
|
+
gradient = 2.fdiv(n_samples) * d.transpose.dot(x)
|
145
|
+
[loss, gradient.flatten.dup]
|
137
146
|
end
|
138
|
-
end
|
139
147
|
|
140
|
-
|
141
|
-
|
148
|
+
x = expand_feature(x) if fit_bias?
|
149
|
+
|
142
150
|
n_features = x.shape[1]
|
151
|
+
n_outputs = single_target?(y) ? 1 : y.shape[1]
|
152
|
+
|
153
|
+
res = Lbfgsb.minimize(
|
154
|
+
fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y],
|
155
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
156
|
+
verbose: @params[:verbose] ? 1 : -1
|
157
|
+
)
|
143
158
|
|
144
|
-
|
159
|
+
@weight_vec, @bias_term =
|
160
|
+
if single_target?(y)
|
161
|
+
split_weight(res[:x])
|
162
|
+
else
|
163
|
+
split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def fit_sgd(x, y)
|
168
|
+
if single_target?(y)
|
169
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
170
|
+
else
|
171
|
+
n_outputs = y.shape[1]
|
172
|
+
n_features = x.shape[1]
|
145
173
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
146
174
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
147
175
|
if enable_parallel?
|
@@ -150,20 +178,23 @@ module Rumale
|
|
150
178
|
else
|
151
179
|
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
152
180
|
end
|
153
|
-
else
|
154
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
155
181
|
end
|
156
182
|
end
|
157
183
|
|
158
|
-
def
|
159
|
-
|
184
|
+
def single_target?(y)
|
185
|
+
y.ndim == 1
|
160
186
|
end
|
161
187
|
|
162
|
-
def
|
163
|
-
|
164
|
-
|
188
|
+
def init_weight(n_features, n_outputs)
|
189
|
+
Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
|
190
|
+
end
|
165
191
|
|
166
|
-
|
192
|
+
def split_weight_mult(w)
|
193
|
+
if fit_bias?
|
194
|
+
[w[0...-1, true].dup, w[-1, true].dup]
|
195
|
+
else
|
196
|
+
[w.dup, Numo::DFloat.zeros(w.shape[1])]
|
197
|
+
end
|
167
198
|
end
|
168
199
|
end
|
169
200
|
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
5
|
+
require 'rumale/base/base_estimator'
|
6
|
+
require 'rumale/base/regressor'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module LinearModel
|
10
|
+
# NNLS is a class that implements non-negative least squares regression.
|
11
|
+
# NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
|
15
|
+
# estimator.fit(training_samples, traininig_values)
|
16
|
+
# results = estimator.predict(testing_samples)
|
17
|
+
#
|
18
|
+
class NNLS
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Base::Regressor
|
21
|
+
|
22
|
+
# Return the weight vector.
|
23
|
+
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
24
|
+
attr_reader :weight_vec
|
25
|
+
|
26
|
+
# Return the bias term (a.k.a. intercept).
|
27
|
+
# @return [Numo::DFloat] (shape: [n_outputs])
|
28
|
+
attr_reader :bias_term
|
29
|
+
|
30
|
+
# Returns the number of iterations when converged.
|
31
|
+
# @return [Integer]
|
32
|
+
attr_reader :n_iter
|
33
|
+
|
34
|
+
# Return the random generator for initializing weight.
|
35
|
+
# @return [Random]
|
36
|
+
attr_reader :rng
|
37
|
+
|
38
|
+
# Create a new regressor with non-negative least squares method.
|
39
|
+
#
|
40
|
+
# @param reg_param [Float] The regularization parameter for L2 regularization term.
|
41
|
+
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
42
|
+
# @param bias_scale [Float] The scale of the bias term.
|
43
|
+
# @param max_iter [Integer] The maximum number of epochs that indicates
|
44
|
+
# how many times the whole data is given to the training process.
|
45
|
+
# @param tol [Float] The tolerance of loss for terminating optimization.
|
46
|
+
# If solver = 'svd', this parameter is ignored.
|
47
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
48
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
|
+
def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
|
50
|
+
max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
|
51
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, tol: tol)
|
52
|
+
check_params_boolean(fit_bias: fit_bias, verbose: verbose)
|
53
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
54
|
+
check_params_positive(reg_param: reg_param, max_iter: max_iter)
|
55
|
+
@params = method(:initialize).parameters.each_with_object({}) { |(_, prm), obj| obj[prm] = binding.local_variable_get(prm) }
|
56
|
+
@params[:random_seed] ||= srand
|
57
|
+
@n_iter = nil
|
58
|
+
@weight_vec = nil
|
59
|
+
@bias_term = nil
|
60
|
+
@rng = Random.new(@params[:random_seed])
|
61
|
+
end
|
62
|
+
|
63
|
+
# Fit the model with given training data.
|
64
|
+
#
|
65
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
66
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
67
|
+
# @return [NonneagtiveLeastSquare] The learned regressor itself.
|
68
|
+
def fit(x, y)
|
69
|
+
x = check_convert_sample_array(x)
|
70
|
+
y = check_convert_tvalue_array(y)
|
71
|
+
check_sample_tvalue_size(x, y)
|
72
|
+
|
73
|
+
x = expand_feature(x) if fit_bias?
|
74
|
+
|
75
|
+
n_features = x.shape[1]
|
76
|
+
n_outputs = single_target?(y) ? 1 : y.shape[1]
|
77
|
+
|
78
|
+
w_init = Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
|
79
|
+
w_init[w_init.lt(0)] = 0
|
80
|
+
bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
|
81
|
+
bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
|
82
|
+
|
83
|
+
res = Lbfgsb.minimize(
|
84
|
+
fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
|
85
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
|
86
|
+
)
|
87
|
+
|
88
|
+
@n_iter = res[:n_iter]
|
89
|
+
w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
|
90
|
+
|
91
|
+
if fit_bias?
|
92
|
+
@weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
|
93
|
+
@bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
|
94
|
+
else
|
95
|
+
@weight_vec = w.dup
|
96
|
+
@bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
|
97
|
+
end
|
98
|
+
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# Predict values for samples.
|
103
|
+
#
|
104
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
105
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
106
|
+
def predict(x)
|
107
|
+
x = check_convert_sample_array(x)
|
108
|
+
x.dot(@weight_vec.transpose) + @bias_term
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def nnls_fnc(w, x, y, alpha)
|
114
|
+
n_samples, n_features = x.shape
|
115
|
+
w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
|
116
|
+
z = x.dot(w.transpose)
|
117
|
+
d = z - y
|
118
|
+
loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
|
119
|
+
gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
|
120
|
+
[loss, gradient.flatten.dup]
|
121
|
+
end
|
122
|
+
|
123
|
+
def expand_feature(x)
|
124
|
+
n_samples = x.shape[0]
|
125
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
126
|
+
end
|
127
|
+
|
128
|
+
def fit_bias?
|
129
|
+
@params[:fit_bias] == true
|
130
|
+
end
|
131
|
+
|
132
|
+
def single_target?(y)
|
133
|
+
y.ndim == 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
3
5
|
require 'rumale/linear_model/base_sgd'
|
4
6
|
require 'rumale/base/regressor'
|
5
7
|
|
6
8
|
module Rumale
|
7
9
|
module LinearModel
|
8
10
|
# Ridge is a class that implements Ridge Regression
|
9
|
-
# with stochastic gradient descent (SGD) optimization
|
11
|
+
# with stochastic gradient descent (SGD) optimization,
|
12
|
+
# singular value decomposition (SVD), or L-BFGS optimization.
|
10
13
|
#
|
11
14
|
# @example
|
12
15
|
# estimator =
|
@@ -41,32 +44,33 @@ module Rumale
|
|
41
44
|
#
|
42
45
|
# @param learning_rate [Float] The initial value of learning rate.
|
43
46
|
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
44
|
-
# If solver
|
47
|
+
# If solver is not 'sgd', this parameter is ignored.
|
45
48
|
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
46
49
|
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
47
|
-
# If solver
|
50
|
+
# If solver is not 'sgd', this parameter is ignored.
|
48
51
|
# @param momentum [Float] The momentum factor.
|
49
|
-
# If solver
|
52
|
+
# If solver is not 'sgd', this parameter is ignored.
|
50
53
|
# @param reg_param [Float] The regularization parameter.
|
51
54
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
52
55
|
# @param bias_scale [Float] The scale of the bias term.
|
53
56
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
54
57
|
# how many times the whole data is given to the training process.
|
55
|
-
# If solver
|
58
|
+
# If solver is 'svd', this parameter is ignored.
|
56
59
|
# @param batch_size [Integer] The size of the mini batches.
|
57
|
-
# If solver
|
60
|
+
# If solver is not 'sgd', this parameter is ignored.
|
58
61
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
59
|
-
# If solver
|
60
|
-
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or '
|
61
|
-
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the '
|
62
|
+
# If solver is 'svd', this parameter is ignored.
|
63
|
+
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd', or 'lbfgs').
|
64
|
+
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
|
62
65
|
# 'sgd' uses the stochastic gradient descent optimization.
|
63
66
|
# 'svd' performs singular value decomposition of samples.
|
67
|
+
# 'lbfgs' uses the L-BFGS method for optimization.
|
64
68
|
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
65
69
|
# If nil is given, the method does not execute in parallel.
|
66
70
|
# If zero or less is given, it becomes equal to the number of processors.
|
67
|
-
# This parameter is ignored if the Parallel gem is not loaded or
|
71
|
+
# This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
|
68
72
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
69
|
-
# If solver
|
73
|
+
# If solver is 'svd', this parameter is ignored.
|
70
74
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
71
75
|
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
72
76
|
reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
|
@@ -83,9 +87,9 @@ module Rumale
|
|
83
87
|
super()
|
84
88
|
@params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
|
85
89
|
@params[:solver] = if solver == 'auto'
|
86
|
-
|
90
|
+
enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
|
87
91
|
else
|
88
|
-
solver
|
92
|
+
solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
|
89
93
|
end
|
90
94
|
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
91
95
|
@params[:random_seed] ||= srand
|
@@ -99,15 +103,17 @@ module Rumale
|
|
99
103
|
# Fit the model with given training data.
|
100
104
|
#
|
101
105
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
102
|
-
# @param y [Numo::
|
106
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
103
107
|
# @return [Ridge] The learned regressor itself.
|
104
108
|
def fit(x, y)
|
105
109
|
x = check_convert_sample_array(x)
|
106
110
|
y = check_convert_tvalue_array(y)
|
107
111
|
check_sample_tvalue_size(x, y)
|
108
112
|
|
109
|
-
if @params[:solver] == 'svd' && enable_linalg?
|
113
|
+
if @params[:solver] == 'svd' && enable_linalg?(warning: false)
|
110
114
|
fit_svd(x, y)
|
115
|
+
elsif @params[:solver] == 'lbfgs'
|
116
|
+
fit_lbfgs(x, y)
|
111
117
|
else
|
112
118
|
fit_sgd(x, y)
|
113
119
|
end
|
@@ -127,27 +133,51 @@ module Rumale
|
|
127
133
|
private
|
128
134
|
|
129
135
|
def fit_svd(x, y)
|
130
|
-
|
136
|
+
x = expand_feature(x) if fit_bias?
|
131
137
|
|
132
|
-
s, u, vt = Numo::Linalg.svd(
|
138
|
+
s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
|
133
139
|
d = (s / (s**2 + @params[:reg_param])).diag
|
134
140
|
w = vt.transpose.dot(d).dot(u.transpose).dot(y)
|
135
141
|
|
136
|
-
|
137
|
-
if @params[:fit_bias]
|
138
|
-
@weight_vec = is_single_target_vals ? w[0...-1].dup : w[0...-1, true].dup
|
139
|
-
@bias_term = is_single_target_vals ? w[-1] : w[-1, true].dup
|
140
|
-
else
|
141
|
-
@weight_vec = w.dup
|
142
|
-
@bias_term = is_single_target_vals ? 0 : Numo::DFloat.zeros(y.shape[1])
|
143
|
-
end
|
142
|
+
@weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
|
144
143
|
end
|
145
144
|
|
146
|
-
def
|
147
|
-
|
145
|
+
def fit_lbfgs(x, y)
|
146
|
+
fnc = proc do |w, x, y, a| # rubocop:disable Lint/ShadowingOuterLocalVariable
|
147
|
+
n_samples, n_features = x.shape
|
148
|
+
w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
|
149
|
+
z = x.dot(w.transpose)
|
150
|
+
d = z - y
|
151
|
+
loss = (d**2).sum.fdiv(n_samples) + a * (w * w).sum
|
152
|
+
gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * a * w
|
153
|
+
[loss, gradient.flatten.dup]
|
154
|
+
end
|
155
|
+
|
156
|
+
x = expand_feature(x) if fit_bias?
|
157
|
+
|
148
158
|
n_features = x.shape[1]
|
159
|
+
n_outputs = single_target?(y) ? 1 : y.shape[1]
|
160
|
+
|
161
|
+
res = Lbfgsb.minimize(
|
162
|
+
fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y, @params[:reg_param]],
|
163
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
164
|
+
verbose: @params[:verbose] ? 1 : -1
|
165
|
+
)
|
166
|
+
|
167
|
+
@weight_vec, @bias_term =
|
168
|
+
if single_target?(y)
|
169
|
+
split_weight(res[:x])
|
170
|
+
else
|
171
|
+
split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
|
172
|
+
end
|
173
|
+
end
|
149
174
|
|
150
|
-
|
175
|
+
def fit_sgd(x, y)
|
176
|
+
if single_target?(y)
|
177
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
178
|
+
else
|
179
|
+
n_outputs = y.shape[1]
|
180
|
+
n_features = x.shape[1]
|
151
181
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
152
182
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
153
183
|
if enable_parallel?
|
@@ -156,16 +186,23 @@ module Rumale
|
|
156
186
|
else
|
157
187
|
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
158
188
|
end
|
159
|
-
else
|
160
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
161
189
|
end
|
162
190
|
end
|
163
191
|
|
164
|
-
def
|
165
|
-
|
166
|
-
|
192
|
+
def single_target?(y)
|
193
|
+
y.ndim == 1
|
194
|
+
end
|
195
|
+
|
196
|
+
def init_weight(n_features, n_outputs)
|
197
|
+
Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
|
198
|
+
end
|
167
199
|
|
168
|
-
|
200
|
+
def split_weight_mult(w)
|
201
|
+
if fit_bias?
|
202
|
+
[w[0...-1, true].dup, w[-1, true].dup]
|
203
|
+
else
|
204
|
+
[w.dup, Numo::DFloat.zeros(w.shape[1])]
|
205
|
+
end
|
169
206
|
end
|
170
207
|
end
|
171
208
|
end
|