rumale-linear_model 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/regressor'
6
+ require 'rumale/validation'
7
+ require 'rumale/linear_model/base_sgd'
8
+
9
+ module Rumale
10
+ module LinearModel
11
+ # LinearRegression is a class that implements ordinary least square linear regression
12
+ # with stochastic gradient descent (SGD) optimization,
13
+ # singular value decomposition (SVD), or L-BFGS optimization.
14
+ #
15
+ # @example
16
+ # require 'rumale/linear_model/linear_regression'
17
+ #
18
+ # estimator =
19
+ # Rumale::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
20
+ # estimator.fit(training_samples, traininig_values)
21
+ # results = estimator.predict(testing_samples)
22
+ #
23
+ # # If Numo::Linalg is installed, you can specify 'svd' for the solver option.
24
+ # require 'numo/linalg/autoloader'
25
+ # require 'rumale/linear_model/linear_regression'
26
+ #
27
+ # estimator = Rumale::LinearModel::LinearRegression.new(solver: 'svd')
28
+ # estimator.fit(training_samples, traininig_values)
29
+ # results = estimator.predict(testing_samples)
30
+ #
31
+ # *Reference*
32
+ # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
33
+ class LinearRegression < BaseSGD
34
+ include ::Rumale::Base::Regressor
35
+
36
+ # Return the weight vector.
37
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
38
+ attr_reader :weight_vec
39
+
40
+ # Return the bias term (a.k.a. intercept).
41
+ # @return [Numo::DFloat] (shape: [n_outputs])
42
+ attr_reader :bias_term
43
+
44
+ # Return the random generator for random sampling.
45
+ # @return [Random]
46
+ attr_reader :rng
47
+
48
+ # Create a new ordinary least square linear regressor.
49
+ #
50
+ # @param learning_rate [Float] The initial value of learning rate.
51
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
52
+ # If solver is not 'sgd', this parameter is ignored.
53
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
54
+ # If nil is given, the decay sets to 'learning_rate'.
55
+ # If solver is not 'sgd', this parameter is ignored.
56
+ # @param momentum [Float] The momentum factor.
57
+ # If solver is not 'sgd', this parameter is ignored.
58
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
59
+ # @param bias_scale [Float] The scale of the bias term.
60
+ # @param max_iter [Integer] The maximum number of epochs that indicates
61
+ # how many times the whole data is given to the training process.
62
+ # If solver is 'svd', this parameter is ignored.
63
+ # @param batch_size [Integer] The size of the mini batches.
64
+ # If solver is not 'sgd', this parameter is ignored.
65
+ # @param tol [Float] The tolerance of loss for terminating optimization.
66
+ # If solver is 'svd', this parameter is ignored.
67
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd' or 'lbfgs').
68
+ # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
69
+ # 'sgd' uses the stochastic gradient descent optimization.
70
+ # 'svd' performs singular value decomposition of samples.
71
+ # 'lbfgs' uses the L-BFGS method for optimization.
72
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
73
+ # If nil is given, the method does not execute in parallel.
74
+ # If zero or less is given, it becomes equal to the number of processors.
75
+ # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
76
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
77
+ # If solver is 'svd', this parameter is ignored.
78
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
79
+ def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
80
+ fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
81
+ solver: 'auto',
82
+ n_jobs: nil, verbose: false, random_seed: nil)
83
+ super()
84
+ @params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
85
+ @params[:solver] = if solver == 'auto'
86
+ enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
87
+ else
88
+ solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
89
+ end
90
+ @params[:decay] ||= @params[:learning_rate]
91
+ @params[:random_seed] ||= srand
92
+ @rng = Random.new(@params[:random_seed])
93
+ @loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
94
+ end
95
+
96
+ # Fit the model with given training data.
97
+ #
98
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
99
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
100
+ # @return [LinearRegression] The learned regressor itself.
101
+ def fit(x, y)
102
+ x = ::Rumale::Validation.check_convert_sample_array(x)
103
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
104
+ ::Rumale::Validation.check_sample_size(x, y)
105
+
106
+ if @params[:solver] == 'svd' && enable_linalg?(warning: false)
107
+ fit_svd(x, y)
108
+ elsif @params[:solver] == 'lbfgs'
109
+ fit_lbfgs(x, y)
110
+ else
111
+ fit_sgd(x, y)
112
+ end
113
+
114
+ self
115
+ end
116
+
117
+ # Predict values for samples.
118
+ #
119
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
120
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
121
+ def predict(x)
122
+ x = ::Rumale::Validation.check_convert_sample_array(x)
123
+
124
+ x.dot(@weight_vec.transpose) + @bias_term
125
+ end
126
+
127
+ private
128
+
129
+ def fit_svd(x, y)
130
+ x = expand_feature(x) if fit_bias?
131
+ w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
132
+ @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
133
+ end
134
+
135
+ def fit_lbfgs(x, y)
136
+ fnc = proc do |w, x, y| # rubocop:disable Lint/ShadowingOuterLocalVariable
137
+ n_samples, n_features = x.shape
138
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
139
+ z = x.dot(w.transpose)
140
+ d = z - y
141
+ loss = (d**2).sum.fdiv(n_samples)
142
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x)
143
+ [loss, gradient.flatten.dup]
144
+ end
145
+
146
+ x = expand_feature(x) if fit_bias?
147
+
148
+ n_features = x.shape[1]
149
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
150
+
151
+ res = Lbfgsb.minimize(
152
+ fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y],
153
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
154
+ verbose: @params[:verbose] ? 1 : -1
155
+ )
156
+
157
+ @weight_vec, @bias_term =
158
+ if single_target?(y)
159
+ split_weight(res[:x])
160
+ else
161
+ split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
162
+ end
163
+ end
164
+
165
+ def fit_sgd(x, y)
166
+ if single_target?(y)
167
+ @weight_vec, @bias_term = partial_fit(x, y)
168
+ else
169
+ n_outputs = y.shape[1]
170
+ n_features = x.shape[1]
171
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
172
+ @bias_term = Numo::DFloat.zeros(n_outputs)
173
+ if enable_parallel?
174
+ models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
175
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
176
+ else
177
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
178
+ end
179
+ end
180
+ end
181
+
182
+ def single_target?(y)
183
+ y.ndim == 1
184
+ end
185
+
186
+ def init_weight(n_features, n_outputs)
187
+ Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
188
+ end
189
+
190
+ def split_weight_mult(w)
191
+ if fit_bias?
192
+ [w[0...-1, true].dup, w[-1, true].dup]
193
+ else
194
+ [w.dup, Numo::DFloat.zeros(w.shape[1])]
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,266 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/classifier'
6
+ require 'rumale/utils'
7
+ require 'rumale/validation'
8
+ require 'rumale/linear_model/base_sgd'
9
+
10
+ module Rumale
11
+ module LinearModel
12
+ # LogisticRegression is a class that implements Logistic Regression.
13
+ # In multiclass classification problem, it uses one-vs-the-rest strategy for the sgd solver
14
+ # and multinomial logistic regression for the lbfgs solver.
15
+ #
16
+ # @note
17
+ # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
18
+ # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
19
+ # https://github.com/yoshoku/rumale-svm
20
+ #
21
+ # @example
22
+ # require 'rumale/linear_model/logistic_regression'
23
+ #
24
+ # estimator =
25
+ # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, random_seed: 1)
26
+ # estimator.fit(training_samples, traininig_labels)
27
+ # results = estimator.predict(testing_samples)
28
+ #
29
+ # *Reference*
30
+ # - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
31
+ # - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
32
+ # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
33
+ class LogisticRegression < BaseSGD # rubocop:disable Metrics/ClassLength
34
+ include ::Rumale::Base::Classifier
35
+
36
+ # Return the weight vector for Logistic Regression.
37
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
38
+ attr_reader :weight_vec
39
+
40
+ # Return the bias term (a.k.a. intercept) for Logistic Regression.
41
+ # @return [Numo::DFloat] (shape: [n_classes])
42
+ attr_reader :bias_term
43
+
44
+ # Return the class labels.
45
+ # @return [Numo::Int32] (shape: [n_classes])
46
+ attr_reader :classes
47
+
48
+ # Return the random generator for performing random sampling.
49
+ # @return [Random]
50
+ attr_reader :rng
51
+
52
+ # Create a new classifier with Logisitc Regression.
53
+ #
54
+ # @param learning_rate [Float] The initial value of learning rate.
55
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
56
+ # If solver = 'lbfgs', this parameter is ignored.
57
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
58
+ # If nil is given, the decay sets to 'reg_param * learning_rate'.
59
+ # If solver = 'lbfgs', this parameter is ignored.
60
+ # @param momentum [Float] The momentum factor.
61
+ # If solver = 'lbfgs', this parameter is ignored.
62
+ # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
63
+ # If solver = 'lbfgs', only 'l2' can be selected for this parameter.
64
+ # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
65
+ # If penalty set to 'l2' or 'l1', this parameter is ignored.
66
+ # If l1_ratio = 1, the regularization is similar to Lasso.
67
+ # If l1_ratio = 0, the regularization is similar to Ridge.
68
+ # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
69
+ # If solver = 'lbfgs', this parameter is ignored.
70
+ # @param reg_param [Float] The regularization parameter.
71
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
72
+ # @param bias_scale [Float] The scale of the bias term.
73
+ # If fit_bias is true, the feature vector v becoms [v; bias_scale].
74
+ # @param max_iter [Integer] The maximum number of epochs that indicates
75
+ # how many times the whole data is given to the training process.
76
+ # @param batch_size [Integer] The size of the mini batches.
77
+ # If solver = 'lbfgs', this parameter is ignored.
78
+ # @param tol [Float] The tolerance of loss for terminating optimization.
79
+ # If solver = 'lbfgs', this value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
80
+ # @param solver [String] The algorithm for optimization. ('lbfgs' or 'sgd').
81
+ # 'lbfgs' uses the L-BFGS with lbfgs.rb gem.
82
+ # 'sgd' uses the stochastic gradient descent optimization.
83
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
84
+ # If nil is given, the methods do not execute in parallel.
85
+ # If zero or less is given, it becomes equal to the number of processors.
86
+ # This parameter is ignored if the Parallel gem is not loaded or the solver is 'lbfgs'.
87
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
88
+ # If solver = 'lbfgs' and true is given, 'iterate.dat' file is generated by lbfgsb.rb.
89
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
90
+ def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
91
+ penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
92
+ fit_bias: true, bias_scale: 1.0,
93
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
94
+ solver: 'lbfgs',
95
+ n_jobs: nil, verbose: false, random_seed: nil)
96
+ raise ArgumentError, "The 'lbfgs' solver supports only 'l2' penalties." if solver == 'lbfgs' && penalty != 'l2'
97
+
98
+ super()
99
+ @params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
100
+ @params[:solver] = solver == 'sgd' ? 'sgd' : 'lbfgs'
101
+ @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
102
+ @params[:random_seed] ||= srand
103
+ @rng = Random.new(@params[:random_seed])
104
+ @penalty_type = @params[:penalty]
105
+ @loss_func = ::Rumale::LinearModel::Loss::LogLoss.new
106
+ end
107
+
108
+ # Fit the model with given training data.
109
+ #
110
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
111
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
112
+ # @return [LogisticRegression] The learned classifier itself.
113
+ def fit(x, y)
114
+ x = ::Rumale::Validation.check_convert_sample_array(x)
115
+ y = ::Rumale::Validation.check_convert_label_array(y)
116
+ ::Rumale::Validation.check_sample_size(x, y)
117
+
118
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
119
+ if @params[:solver] == 'sgd'
120
+ fit_sgd(x, y)
121
+ else
122
+ fit_lbfgs(x, y)
123
+ end
124
+
125
+ self
126
+ end
127
+
128
+ # Calculate confidence scores for samples.
129
+ #
130
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
131
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
132
+ def decision_function(x)
133
+ x = ::Rumale::Validation.check_convert_sample_array(x)
134
+
135
+ x.dot(@weight_vec.transpose) + @bias_term
136
+ end
137
+
138
+ # Predict class labels for samples.
139
+ #
140
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
141
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
142
+ def predict(x)
143
+ x = ::Rumale::Validation.check_convert_sample_array(x)
144
+
145
+ n_samples, = x.shape
146
+ decision_values = predict_proba(x)
147
+ predicted = if enable_parallel?
148
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
149
+ else
150
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
151
+ end
152
+ Numo::Int32.asarray(predicted)
153
+ end
154
+
155
+ # Predict probability for samples.
156
+ #
157
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
158
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
159
+ def predict_proba(x)
160
+ x = ::Rumale::Validation.check_convert_sample_array(x)
161
+
162
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
163
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
164
+
165
+ n_samples, = x.shape
166
+ probs = Numo::DFloat.zeros(n_samples, 2)
167
+ probs[true, 1] = proba
168
+ probs[true, 0] = 1.0 - proba
169
+ probs
170
+ end
171
+
172
+ private
173
+
174
+ def multiclass_problem?
175
+ @classes.size > 2
176
+ end
177
+
178
+ def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
179
+ if multiclass_problem?
180
+ fnc = proc do |w, x, y, a|
181
+ n_features = x.shape[1]
182
+ n_classes = y.shape[1]
183
+ z = x.dot(w.reshape(n_classes, n_features).transpose)
184
+ # logsumexp and softmax
185
+ z_max = z.max(-1).expand_dims(-1).dup
186
+ z_max[~z_max.isfinite] = 0.0
187
+ lgsexp = Numo::NMath.log(Numo::NMath.exp(z - z_max).sum(axis: -1)).expand_dims(-1) + z_max
188
+ t = z - lgsexp
189
+ sftmax = Numo::NMath.exp(t)
190
+ # loss and gradient
191
+ loss = -(y * t).sum + 0.5 * a * w.dot(w)
192
+ grad = (sftmax - y).transpose.dot(x).flatten.dup + a * w
193
+ [loss, grad]
194
+ end
195
+
196
+ base_x = expand_feature(base_x) if fit_bias?
197
+ onehot_y = ::Rumale::Utils.binarize_labels(base_y)
198
+ n_classes = @classes.size
199
+ n_features = base_x.shape[1]
200
+ w_init = Numo::DFloat.zeros(n_classes * n_features)
201
+
202
+ verbose = @params[:verbose] ? 1 : -1
203
+ res = Lbfgsb.minimize(
204
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, onehot_y, @params[:reg_param]],
205
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
206
+ )
207
+
208
+ if fit_bias?
209
+ weight = res[:x].reshape(n_classes, n_features)
210
+ @weight_vec = weight[true, 0...-1].dup
211
+ @bias_term = weight[true, -1].dup
212
+ else
213
+ @weight_vec = res[:x].reshape(n_classes, n_features)
214
+ @bias_term = Numo::DFloat.zeros(n_classes)
215
+ end
216
+ else
217
+ fnc = proc do |w, x, y, a|
218
+ z = 1 + Numo::NMath.exp(-y * x.dot(w))
219
+ loss = Numo::NMath.log(z).sum + 0.5 * a * w.dot(w)
220
+ grad = (y / z - y).dot(x) + a * w
221
+ [loss, grad]
222
+ end
223
+
224
+ base_x = expand_feature(base_x) if fit_bias?
225
+ negative_label = @classes[0]
226
+ bin_y = Numo::Int32.cast(base_y.ne(negative_label)) * 2 - 1
227
+ n_features = base_x.shape[1]
228
+ w_init = Numo::DFloat.zeros(n_features)
229
+
230
+ verbose = @params[:verbose] ? 1 : -1
231
+ res = Lbfgsb.minimize(
232
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
233
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
234
+ )
235
+
236
+ @weight_vec, @bias_term = split_weight(res[:x])
237
+ end
238
+ end
239
+
240
+ def fit_sgd(x, y)
241
+ if multiclass_problem?
242
+ n_classes = @classes.size
243
+ n_features = x.shape[1]
244
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
245
+ @bias_term = Numo::DFloat.zeros(n_classes)
246
+ if enable_parallel?
247
+ models = parallel_map(n_classes) do |n|
248
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
249
+ partial_fit(x, bin_y)
250
+ end
251
+ n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
252
+ else
253
+ n_classes.times do |n|
254
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
255
+ @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
256
+ end
257
+ end
258
+ else
259
+ negative_label = @classes[0]
260
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
261
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/estimator'
6
+ require 'rumale/base/regressor'
7
+ require 'rumale/validation'
8
+
9
+ module Rumale
10
+ module LinearModel
11
+ # NNLS is a class that implements non-negative least squares regression.
12
+ # NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
13
+ #
14
+ # @example
15
+ # require 'rumale/linear_model/nnls'
16
+ #
17
+ # estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
18
+ # estimator.fit(training_samples, traininig_values)
19
+ # results = estimator.predict(testing_samples)
20
+ #
21
+ class NNLS < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::Regressor
23
+
24
+ # Return the weight vector.
25
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
26
+ attr_reader :weight_vec
27
+
28
+ # Return the bias term (a.k.a. intercept).
29
+ # @return [Numo::DFloat] (shape: [n_outputs])
30
+ attr_reader :bias_term
31
+
32
+ # Returns the number of iterations when converged.
33
+ # @return [Integer]
34
+ attr_reader :n_iter
35
+
36
+ # Return the random generator for initializing weight.
37
+ # @return [Random]
38
+ attr_reader :rng
39
+
40
+ # Create a new regressor with non-negative least squares method.
41
+ #
42
+ # @param reg_param [Float] The regularization parameter for L2 regularization term.
43
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
44
+ # @param bias_scale [Float] The scale of the bias term.
45
+ # @param max_iter [Integer] The maximum number of epochs that indicates
46
+ # how many times the whole data is given to the training process.
47
+ # @param tol [Float] The tolerance of loss for terminating optimization.
48
+ # If solver = 'svd', this parameter is ignored.
49
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
50
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
51
+ def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
52
+ max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
53
+ super()
54
+ @params = {
55
+ reg_param: reg_param,
56
+ fit_bias: fit_bias,
57
+ bias_scale: bias_scale,
58
+ max_iter: max_iter,
59
+ tol: tol,
60
+ verbose: verbose,
61
+ random_seed: random_seed || srand
62
+ }
63
+ @rng = Random.new(@params[:random_seed])
64
+ end
65
+
66
+ # Fit the model with given training data.
67
+ #
68
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
69
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
70
+ # @return [NonneagtiveLeastSquare] The learned regressor itself.
71
+ def fit(x, y)
72
+ x = ::Rumale::Validation.check_convert_sample_array(x)
73
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
74
+ ::Rumale::Validation.check_sample_size(x, y)
75
+
76
+ x = expand_feature(x) if fit_bias?
77
+
78
+ n_features = x.shape[1]
79
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
80
+
81
+ w_init = ::Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
82
+ w_init[w_init.lt(0)] = 0
83
+ bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
84
+ bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
85
+
86
+ res = Lbfgsb.minimize(
87
+ fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
88
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
89
+ )
90
+
91
+ @n_iter = res[:n_iter]
92
+ w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
93
+
94
+ if fit_bias?
95
+ @weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
96
+ @bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
97
+ else
98
+ @weight_vec = w.dup
99
+ @bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
100
+ end
101
+
102
+ self
103
+ end
104
+
105
+ # Predict values for samples.
106
+ #
107
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
108
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
109
+ def predict(x)
110
+ x = ::Rumale::Validation.check_convert_sample_array(x)
111
+
112
+ x.dot(@weight_vec.transpose) + @bias_term
113
+ end
114
+
115
+ private
116
+
117
+ def nnls_fnc(w, x, y, alpha)
118
+ n_samples, n_features = x.shape
119
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
120
+ z = x.dot(w.transpose)
121
+ d = z - y
122
+ loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
123
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
124
+ [loss, gradient.flatten.dup]
125
+ end
126
+
127
+ def expand_feature(x)
128
+ n_samples = x.shape[0]
129
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
130
+ end
131
+
132
+ def fit_bias?
133
+ @params[:fit_bias] == true
134
+ end
135
+
136
+ def single_target?(y)
137
+ y.ndim == 1
138
+ end
139
+ end
140
+ end
141
+ end