rumale-linear_model 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/regressor'
6
+ require 'rumale/validation'
7
+ require 'rumale/linear_model/base_sgd'
8
+
9
+ module Rumale
10
+ module LinearModel
11
+ # LinearRegression is a class that implements ordinary least square linear regression
12
+ # with stochastic gradient descent (SGD) optimization,
13
+ # singular value decomposition (SVD), or L-BFGS optimization.
14
+ #
15
+ # @example
16
+ # require 'rumale/linear_model/linear_regression'
17
+ #
18
+ # estimator =
19
+ # Rumale::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
20
+ # estimator.fit(training_samples, traininig_values)
21
+ # results = estimator.predict(testing_samples)
22
+ #
23
+ # # If Numo::Linalg is installed, you can specify 'svd' for the solver option.
24
+ # require 'numo/linalg/autoloader'
25
+ # require 'rumale/linear_model/linear_regression'
26
+ #
27
+ # estimator = Rumale::LinearModel::LinearRegression.new(solver: 'svd')
28
+ # estimator.fit(training_samples, traininig_values)
29
+ # results = estimator.predict(testing_samples)
30
+ #
31
+ # *Reference*
32
+ # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
33
+ class LinearRegression < BaseSGD
34
+ include ::Rumale::Base::Regressor
35
+
36
+ # Return the weight vector.
37
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
38
+ attr_reader :weight_vec
39
+
40
+ # Return the bias term (a.k.a. intercept).
41
+ # @return [Numo::DFloat] (shape: [n_outputs])
42
+ attr_reader :bias_term
43
+
44
+ # Return the random generator for random sampling.
45
+ # @return [Random]
46
+ attr_reader :rng
47
+
48
+ # Create a new ordinary least square linear regressor.
49
+ #
50
+ # @param learning_rate [Float] The initial value of learning rate.
51
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
52
+ # If solver is not 'sgd', this parameter is ignored.
53
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
54
+ # If nil is given, the decay sets to 'learning_rate'.
55
+ # If solver is not 'sgd', this parameter is ignored.
56
+ # @param momentum [Float] The momentum factor.
57
+ # If solver is not 'sgd', this parameter is ignored.
58
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
59
+ # @param bias_scale [Float] The scale of the bias term.
60
+ # @param max_iter [Integer] The maximum number of epochs that indicates
61
+ # how many times the whole data is given to the training process.
62
+ # If solver is 'svd', this parameter is ignored.
63
+ # @param batch_size [Integer] The size of the mini batches.
64
+ # If solver is not 'sgd', this parameter is ignored.
65
+ # @param tol [Float] The tolerance of loss for terminating optimization.
66
+ # If solver is 'svd', this parameter is ignored.
67
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd' or 'lbfgs').
68
+ # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
69
+ # 'sgd' uses the stochastic gradient descent optimization.
70
+ # 'svd' performs singular value decomposition of samples.
71
+ # 'lbfgs' uses the L-BFGS method for optimization.
72
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
73
+ # If nil is given, the method does not execute in parallel.
74
+ # If zero or less is given, it becomes equal to the number of processors.
75
+ # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
76
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
77
+ # If solver is 'svd', this parameter is ignored.
78
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
79
+ def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
80
+ fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
81
+ solver: 'auto',
82
+ n_jobs: nil, verbose: false, random_seed: nil)
83
+ super()
84
+ @params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
85
+ @params[:solver] = if solver == 'auto'
86
+ enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
87
+ else
88
+ solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
89
+ end
90
+ @params[:decay] ||= @params[:learning_rate]
91
+ @params[:random_seed] ||= srand
92
+ @rng = Random.new(@params[:random_seed])
93
+ @loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
94
+ end
95
+
96
+ # Fit the model with given training data.
97
+ #
98
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
99
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
100
+ # @return [LinearRegression] The learned regressor itself.
101
+ def fit(x, y)
102
+ x = ::Rumale::Validation.check_convert_sample_array(x)
103
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
104
+ ::Rumale::Validation.check_sample_size(x, y)
105
+
106
+ if @params[:solver] == 'svd' && enable_linalg?(warning: false)
107
+ fit_svd(x, y)
108
+ elsif @params[:solver] == 'lbfgs'
109
+ fit_lbfgs(x, y)
110
+ else
111
+ fit_sgd(x, y)
112
+ end
113
+
114
+ self
115
+ end
116
+
117
+ # Predict values for samples.
118
+ #
119
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
120
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
121
+ def predict(x)
122
+ x = ::Rumale::Validation.check_convert_sample_array(x)
123
+
124
+ x.dot(@weight_vec.transpose) + @bias_term
125
+ end
126
+
127
+ private
128
+
129
+ def fit_svd(x, y)
130
+ x = expand_feature(x) if fit_bias?
131
+ w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
132
+ @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
133
+ end
134
+
135
+ def fit_lbfgs(x, y)
136
+ fnc = proc do |w, x, y| # rubocop:disable Lint/ShadowingOuterLocalVariable
137
+ n_samples, n_features = x.shape
138
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
139
+ z = x.dot(w.transpose)
140
+ d = z - y
141
+ loss = (d**2).sum.fdiv(n_samples)
142
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x)
143
+ [loss, gradient.flatten.dup]
144
+ end
145
+
146
+ x = expand_feature(x) if fit_bias?
147
+
148
+ n_features = x.shape[1]
149
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
150
+
151
+ res = Lbfgsb.minimize(
152
+ fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y],
153
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
154
+ verbose: @params[:verbose] ? 1 : -1
155
+ )
156
+
157
+ @weight_vec, @bias_term =
158
+ if single_target?(y)
159
+ split_weight(res[:x])
160
+ else
161
+ split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
162
+ end
163
+ end
164
+
165
+ def fit_sgd(x, y)
166
+ if single_target?(y)
167
+ @weight_vec, @bias_term = partial_fit(x, y)
168
+ else
169
+ n_outputs = y.shape[1]
170
+ n_features = x.shape[1]
171
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
172
+ @bias_term = Numo::DFloat.zeros(n_outputs)
173
+ if enable_parallel?
174
+ models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
175
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
176
+ else
177
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
178
+ end
179
+ end
180
+ end
181
+
182
+ def single_target?(y)
183
+ y.ndim == 1
184
+ end
185
+
186
+ def init_weight(n_features, n_outputs)
187
+ Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
188
+ end
189
+
190
+ def split_weight_mult(w)
191
+ if fit_bias?
192
+ [w[0...-1, true].dup, w[-1, true].dup]
193
+ else
194
+ [w.dup, Numo::DFloat.zeros(w.shape[1])]
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,266 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/classifier'
6
+ require 'rumale/utils'
7
+ require 'rumale/validation'
8
+ require 'rumale/linear_model/base_sgd'
9
+
10
+ module Rumale
11
+ module LinearModel
12
+ # LogisticRegression is a class that implements Logistic Regression.
13
+ # In multiclass classification problem, it uses one-vs-the-rest strategy for the sgd solver
14
+ # and multinomial logistic regression for the lbfgs solver.
15
+ #
16
+ # @note
17
+ # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
18
+ # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
19
+ # https://github.com/yoshoku/rumale-svm
20
+ #
21
+ # @example
22
+ # require 'rumale/linear_model/logistic_regression'
23
+ #
24
+ # estimator =
25
+ # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, random_seed: 1)
26
+ # estimator.fit(training_samples, traininig_labels)
27
+ # results = estimator.predict(testing_samples)
28
+ #
29
+ # *Reference*
30
+ # - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
31
+ # - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
32
+ # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
33
+ class LogisticRegression < BaseSGD # rubocop:disable Metrics/ClassLength
34
+ include ::Rumale::Base::Classifier
35
+
36
+ # Return the weight vector for Logistic Regression.
37
+ # @return [Numo::DFloat] (shape: [n_classes, n_features])
38
+ attr_reader :weight_vec
39
+
40
+ # Return the bias term (a.k.a. intercept) for Logistic Regression.
41
+ # @return [Numo::DFloat] (shape: [n_classes])
42
+ attr_reader :bias_term
43
+
44
+ # Return the class labels.
45
+ # @return [Numo::Int32] (shape: [n_classes])
46
+ attr_reader :classes
47
+
48
+ # Return the random generator for performing random sampling.
49
+ # @return [Random]
50
+ attr_reader :rng
51
+
52
+ # Create a new classifier with Logisitc Regression.
53
+ #
54
+ # @param learning_rate [Float] The initial value of learning rate.
55
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
56
+ # If solver = 'lbfgs', this parameter is ignored.
57
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
58
+ # If nil is given, the decay sets to 'reg_param * learning_rate'.
59
+ # If solver = 'lbfgs', this parameter is ignored.
60
+ # @param momentum [Float] The momentum factor.
61
+ # If solver = 'lbfgs', this parameter is ignored.
62
+ # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
63
+ # If solver = 'lbfgs', only 'l2' can be selected for this parameter.
64
+ # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
65
+ # If penalty set to 'l2' or 'l1', this parameter is ignored.
66
+ # If l1_ratio = 1, the regularization is similar to Lasso.
67
+ # If l1_ratio = 0, the regularization is similar to Ridge.
68
+ # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
69
+ # If solver = 'lbfgs', this parameter is ignored.
70
+ # @param reg_param [Float] The regularization parameter.
71
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
72
+ # @param bias_scale [Float] The scale of the bias term.
73
+ # If fit_bias is true, the feature vector v becoms [v; bias_scale].
74
+ # @param max_iter [Integer] The maximum number of epochs that indicates
75
+ # how many times the whole data is given to the training process.
76
+ # @param batch_size [Integer] The size of the mini batches.
77
+ # If solver = 'lbfgs', this parameter is ignored.
78
+ # @param tol [Float] The tolerance of loss for terminating optimization.
79
+ # If solver = 'lbfgs', this value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
80
+ # @param solver [String] The algorithm for optimization. ('lbfgs' or 'sgd').
81
+ # 'lbfgs' uses the L-BFGS with lbfgs.rb gem.
82
+ # 'sgd' uses the stochastic gradient descent optimization.
83
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
84
+ # If nil is given, the methods do not execute in parallel.
85
+ # If zero or less is given, it becomes equal to the number of processors.
86
+ # This parameter is ignored if the Parallel gem is not loaded or the solver is 'lbfgs'.
87
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
88
+ # If solver = 'lbfgs' and true is given, 'iterate.dat' file is generated by lbfgsb.rb.
89
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
90
+ def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
91
+ penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
92
+ fit_bias: true, bias_scale: 1.0,
93
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
94
+ solver: 'lbfgs',
95
+ n_jobs: nil, verbose: false, random_seed: nil)
96
+ raise ArgumentError, "The 'lbfgs' solver supports only 'l2' penalties." if solver == 'lbfgs' && penalty != 'l2'
97
+
98
+ super()
99
+ @params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
100
+ @params[:solver] = solver == 'sgd' ? 'sgd' : 'lbfgs'
101
+ @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
102
+ @params[:random_seed] ||= srand
103
+ @rng = Random.new(@params[:random_seed])
104
+ @penalty_type = @params[:penalty]
105
+ @loss_func = ::Rumale::LinearModel::Loss::LogLoss.new
106
+ end
107
+
108
+ # Fit the model with given training data.
109
+ #
110
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
111
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
112
+ # @return [LogisticRegression] The learned classifier itself.
113
+ def fit(x, y)
114
+ x = ::Rumale::Validation.check_convert_sample_array(x)
115
+ y = ::Rumale::Validation.check_convert_label_array(y)
116
+ ::Rumale::Validation.check_sample_size(x, y)
117
+
118
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
119
+ if @params[:solver] == 'sgd'
120
+ fit_sgd(x, y)
121
+ else
122
+ fit_lbfgs(x, y)
123
+ end
124
+
125
+ self
126
+ end
127
+
128
+ # Calculate confidence scores for samples.
129
+ #
130
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
131
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
132
+ def decision_function(x)
133
+ x = ::Rumale::Validation.check_convert_sample_array(x)
134
+
135
+ x.dot(@weight_vec.transpose) + @bias_term
136
+ end
137
+
138
+ # Predict class labels for samples.
139
+ #
140
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
141
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
142
+ def predict(x)
143
+ x = ::Rumale::Validation.check_convert_sample_array(x)
144
+
145
+ n_samples, = x.shape
146
+ decision_values = predict_proba(x)
147
+ predicted = if enable_parallel?
148
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
149
+ else
150
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
151
+ end
152
+ Numo::Int32.asarray(predicted)
153
+ end
154
+
155
+ # Predict probability for samples.
156
+ #
157
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
158
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
159
+ def predict_proba(x)
160
+ x = ::Rumale::Validation.check_convert_sample_array(x)
161
+
162
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
163
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
164
+
165
+ n_samples, = x.shape
166
+ probs = Numo::DFloat.zeros(n_samples, 2)
167
+ probs[true, 1] = proba
168
+ probs[true, 0] = 1.0 - proba
169
+ probs
170
+ end
171
+
172
+ private
173
+
174
+ def multiclass_problem?
175
+ @classes.size > 2
176
+ end
177
+
178
+ def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
179
+ if multiclass_problem?
180
+ fnc = proc do |w, x, y, a|
181
+ n_features = x.shape[1]
182
+ n_classes = y.shape[1]
183
+ z = x.dot(w.reshape(n_classes, n_features).transpose)
184
+ # logsumexp and softmax
185
+ z_max = z.max(-1).expand_dims(-1).dup
186
+ z_max[~z_max.isfinite] = 0.0
187
+ lgsexp = Numo::NMath.log(Numo::NMath.exp(z - z_max).sum(axis: -1)).expand_dims(-1) + z_max
188
+ t = z - lgsexp
189
+ sftmax = Numo::NMath.exp(t)
190
+ # loss and gradient
191
+ loss = -(y * t).sum + 0.5 * a * w.dot(w)
192
+ grad = (sftmax - y).transpose.dot(x).flatten.dup + a * w
193
+ [loss, grad]
194
+ end
195
+
196
+ base_x = expand_feature(base_x) if fit_bias?
197
+ onehot_y = ::Rumale::Utils.binarize_labels(base_y)
198
+ n_classes = @classes.size
199
+ n_features = base_x.shape[1]
200
+ w_init = Numo::DFloat.zeros(n_classes * n_features)
201
+
202
+ verbose = @params[:verbose] ? 1 : -1
203
+ res = Lbfgsb.minimize(
204
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, onehot_y, @params[:reg_param]],
205
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
206
+ )
207
+
208
+ if fit_bias?
209
+ weight = res[:x].reshape(n_classes, n_features)
210
+ @weight_vec = weight[true, 0...-1].dup
211
+ @bias_term = weight[true, -1].dup
212
+ else
213
+ @weight_vec = res[:x].reshape(n_classes, n_features)
214
+ @bias_term = Numo::DFloat.zeros(n_classes)
215
+ end
216
+ else
217
+ fnc = proc do |w, x, y, a|
218
+ z = 1 + Numo::NMath.exp(-y * x.dot(w))
219
+ loss = Numo::NMath.log(z).sum + 0.5 * a * w.dot(w)
220
+ grad = (y / z - y).dot(x) + a * w
221
+ [loss, grad]
222
+ end
223
+
224
+ base_x = expand_feature(base_x) if fit_bias?
225
+ negative_label = @classes[0]
226
+ bin_y = Numo::Int32.cast(base_y.ne(negative_label)) * 2 - 1
227
+ n_features = base_x.shape[1]
228
+ w_init = Numo::DFloat.zeros(n_features)
229
+
230
+ verbose = @params[:verbose] ? 1 : -1
231
+ res = Lbfgsb.minimize(
232
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
233
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
234
+ )
235
+
236
+ @weight_vec, @bias_term = split_weight(res[:x])
237
+ end
238
+ end
239
+
240
+ def fit_sgd(x, y)
241
+ if multiclass_problem?
242
+ n_classes = @classes.size
243
+ n_features = x.shape[1]
244
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
245
+ @bias_term = Numo::DFloat.zeros(n_classes)
246
+ if enable_parallel?
247
+ models = parallel_map(n_classes) do |n|
248
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
249
+ partial_fit(x, bin_y)
250
+ end
251
+ n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
252
+ else
253
+ n_classes.times do |n|
254
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
255
+ @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
256
+ end
257
+ end
258
+ else
259
+ negative_label = @classes[0]
260
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
261
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
262
+ end
263
+ end
264
+ end
265
+ end
266
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/estimator'
6
+ require 'rumale/base/regressor'
7
+ require 'rumale/validation'
8
+
9
+ module Rumale
10
+ module LinearModel
11
+ # NNLS is a class that implements non-negative least squares regression.
12
+ # NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
13
+ #
14
+ # @example
15
+ # require 'rumale/linear_model/nnls'
16
+ #
17
+ # estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
18
+ # estimator.fit(training_samples, traininig_values)
19
+ # results = estimator.predict(testing_samples)
20
+ #
21
+ class NNLS < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::Regressor
23
+
24
+ # Return the weight vector.
25
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
26
+ attr_reader :weight_vec
27
+
28
+ # Return the bias term (a.k.a. intercept).
29
+ # @return [Numo::DFloat] (shape: [n_outputs])
30
+ attr_reader :bias_term
31
+
32
+ # Returns the number of iterations when converged.
33
+ # @return [Integer]
34
+ attr_reader :n_iter
35
+
36
+ # Return the random generator for initializing weight.
37
+ # @return [Random]
38
+ attr_reader :rng
39
+
40
+ # Create a new regressor with non-negative least squares method.
41
+ #
42
+ # @param reg_param [Float] The regularization parameter for L2 regularization term.
43
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
44
+ # @param bias_scale [Float] The scale of the bias term.
45
+ # @param max_iter [Integer] The maximum number of epochs that indicates
46
+ # how many times the whole data is given to the training process.
47
+ # @param tol [Float] The tolerance of loss for terminating optimization.
48
+ # If solver = 'svd', this parameter is ignored.
49
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
50
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
51
+ def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
52
+ max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
53
+ super()
54
+ @params = {
55
+ reg_param: reg_param,
56
+ fit_bias: fit_bias,
57
+ bias_scale: bias_scale,
58
+ max_iter: max_iter,
59
+ tol: tol,
60
+ verbose: verbose,
61
+ random_seed: random_seed || srand
62
+ }
63
+ @rng = Random.new(@params[:random_seed])
64
+ end
65
+
66
+ # Fit the model with given training data.
67
+ #
68
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
69
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
70
+ # @return [NonneagtiveLeastSquare] The learned regressor itself.
71
+ def fit(x, y)
72
+ x = ::Rumale::Validation.check_convert_sample_array(x)
73
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
74
+ ::Rumale::Validation.check_sample_size(x, y)
75
+
76
+ x = expand_feature(x) if fit_bias?
77
+
78
+ n_features = x.shape[1]
79
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
80
+
81
+ w_init = ::Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
82
+ w_init[w_init.lt(0)] = 0
83
+ bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
84
+ bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
85
+
86
+ res = Lbfgsb.minimize(
87
+ fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
88
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
89
+ )
90
+
91
+ @n_iter = res[:n_iter]
92
+ w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
93
+
94
+ if fit_bias?
95
+ @weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
96
+ @bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
97
+ else
98
+ @weight_vec = w.dup
99
+ @bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
100
+ end
101
+
102
+ self
103
+ end
104
+
105
+ # Predict values for samples.
106
+ #
107
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
108
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
109
+ def predict(x)
110
+ x = ::Rumale::Validation.check_convert_sample_array(x)
111
+
112
+ x.dot(@weight_vec.transpose) + @bias_term
113
+ end
114
+
115
+ private
116
+
117
+ def nnls_fnc(w, x, y, alpha)
118
+ n_samples, n_features = x.shape
119
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
120
+ z = x.dot(w.transpose)
121
+ d = z - y
122
+ loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
123
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
124
+ [loss, gradient.flatten.dup]
125
+ end
126
+
127
+ def expand_feature(x)
128
+ n_samples = x.shape[0]
129
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
130
+ end
131
+
132
+ def fit_bias?
133
+ @params[:fit_bias] == true
134
+ end
135
+
136
+ def single_target?(y)
137
+ y.ndim == 1
138
+ end
139
+ end
140
+ end
141
+ end