rumale-linear_model 0.24.0 → 0.25.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,19 +4,18 @@ require 'lbfgsb'
4
4
 
5
5
  require 'rumale/base/regressor'
6
6
  require 'rumale/validation'
7
- require 'rumale/linear_model/base_sgd'
7
+
8
+ require_relative 'base_estimator'
8
9
 
9
10
  module Rumale
10
11
  module LinearModel
11
12
  # Ridge is a class that implements Ridge Regression
12
- # with stochastic gradient descent (SGD) optimization,
13
- # singular value decomposition (SVD), or L-BFGS optimization.
13
+ # with singular value decomposition (SVD) or L-BFGS optimization.
14
14
  #
15
15
  # @example
16
16
  # require 'rumale/linear_model/ridge'
17
17
  #
18
- # estimator =
19
- # Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
18
+ # estimator = Rumale::LinearModel::Ridge.new(reg_param: 0.1)
20
19
  # estimator.fit(training_samples, traininig_values)
21
20
  # results = estimator.predict(testing_samples)
22
21
  #
@@ -27,73 +26,40 @@ module Rumale
27
26
  # estimator = Rumale::LinearModel::Ridge.new(reg_param: 0.1, solver: 'svd')
28
27
  # estimator.fit(training_samples, traininig_values)
29
28
  # results = estimator.predict(testing_samples)
30
- #
31
- # *Reference*
32
- # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
33
- class Ridge < BaseSGD
34
- include ::Rumale::Base::Regressor
35
-
36
- # Return the weight vector.
37
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
38
- attr_reader :weight_vec
39
-
40
- # Return the bias term (a.k.a. intercept).
41
- # @return [Numo::DFloat] (shape: [n_outputs])
42
- attr_reader :bias_term
43
-
44
- # Return the random generator for random sampling.
45
- # @return [Random]
46
- attr_reader :rng
29
+ class Ridge < Rumale::LinearModel::BaseEstimator
30
+ include Rumale::Base::Regressor
47
31
 
48
32
  # Create a new Ridge regressor.
49
33
  #
50
- # @param learning_rate [Float] The initial value of learning rate.
51
- # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
52
- # If solver is not 'sgd', this parameter is ignored.
53
- # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
54
- # If nil is given, the decay sets to 'reg_param * learning_rate'.
55
- # If solver is not 'sgd', this parameter is ignored.
56
- # @param momentum [Float] The momentum factor.
57
- # If solver is not 'sgd', this parameter is ignored.
58
34
  # @param reg_param [Float] The regularization parameter.
59
35
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
60
36
  # @param bias_scale [Float] The scale of the bias term.
61
37
  # @param max_iter [Integer] The maximum number of epochs that indicates
62
38
  # how many times the whole data is given to the training process.
63
39
  # If solver is 'svd', this parameter is ignored.
64
- # @param batch_size [Integer] The size of the mini batches.
65
- # If solver is not 'sgd', this parameter is ignored.
66
40
  # @param tol [Float] The tolerance of loss for terminating optimization.
67
41
  # If solver is 'svd', this parameter is ignored.
68
- # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd', or 'lbfgs').
42
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'svd', or 'lbfgs').
69
43
  # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
70
- # 'sgd' uses the stochastic gradient descent optimization.
71
44
  # 'svd' performs singular value decomposition of samples.
72
45
  # 'lbfgs' uses the L-BFGS method for optimization.
73
- # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
74
- # If nil is given, the method does not execute in parallel.
75
- # If zero or less is given, it becomes equal to the number of processors.
76
- # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
77
46
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
78
47
  # If solver is 'svd', this parameter is ignored.
79
- # @param random_seed [Integer] The seed value using to initialize the random generator.
80
- def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
81
- reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
82
- max_iter: 1000, batch_size: 50, tol: 1e-4,
83
- solver: 'auto',
84
- n_jobs: nil, verbose: false, random_seed: nil)
48
+ def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0, max_iter: 1000, tol: 1e-4, solver: 'auto', verbose: false)
85
49
  super()
86
- @params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
50
+ @params = {
51
+ reg_param: reg_param,
52
+ fit_bias: fit_bias,
53
+ bias_scale: bias_scale,
54
+ max_iter: max_iter,
55
+ tol: tol,
56
+ verbose: verbose
57
+ }
87
58
  @params[:solver] = if solver == 'auto'
88
59
  enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
89
60
  else
90
- solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
61
+ solver.match?(/^svd$|^lbfgs$/) ? solver : 'lbfgs'
91
62
  end
92
- @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
93
- @params[:random_seed] ||= srand
94
- @rng = Random.new(@params[:random_seed])
95
- @penalty_type = L2_PENALTY
96
- @loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
97
63
  end
98
64
 
99
65
  # Fit the model with given training data.
@@ -102,17 +68,15 @@ module Rumale
102
68
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
103
69
  # @return [Ridge] The learned regressor itself.
104
70
  def fit(x, y)
105
- x = ::Rumale::Validation.check_convert_sample_array(x)
106
- y = ::Rumale::Validation.check_convert_target_value_array(y)
107
- ::Rumale::Validation.check_sample_size(x, y)
108
-
109
- if @params[:solver] == 'svd' && enable_linalg?(warning: false)
110
- fit_svd(x, y)
111
- elsif @params[:solver] == 'lbfgs'
112
- fit_lbfgs(x, y)
113
- else
114
- fit_sgd(x, y)
115
- end
71
+ x = Rumale::Validation.check_convert_sample_array(x)
72
+ y = Rumale::Validation.check_convert_target_value_array(y)
73
+ Rumale::Validation.check_sample_size(x, y)
74
+
75
+ @weight_vec, @bias_term = if @params[:solver] == 'svd' && enable_linalg?(warning: false)
76
+ partial_fit_svd(x, y)
77
+ else
78
+ partial_fit_lbfgs(x, y)
79
+ end
116
80
 
117
81
  self
118
82
  end
@@ -122,25 +86,24 @@ module Rumale
122
86
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
123
87
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
124
88
  def predict(x)
125
- x = ::Rumale::Validation.check_convert_sample_array(x)
89
+ x = Rumale::Validation.check_convert_sample_array(x)
126
90
 
127
91
  x.dot(@weight_vec.transpose) + @bias_term
128
92
  end
129
93
 
130
94
  private
131
95
 
132
- def fit_svd(x, y)
96
+ def partial_fit_svd(x, y)
133
97
  x = expand_feature(x) if fit_bias?
134
-
135
98
  s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
136
99
  d = (s / (s**2 + @params[:reg_param])).diag
137
100
  w = vt.transpose.dot(d).dot(u.transpose).dot(y)
138
-
139
- @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
101
+ w = w.transpose.dup unless single_target?(y)
102
+ split_weight(w)
140
103
  end
141
104
 
142
- def fit_lbfgs(x, y)
143
- fnc = proc do |w, x, y, a| # rubocop:disable Lint/ShadowingOuterLocalVariable
105
+ def partial_fit_lbfgs(base_x, base_y)
106
+ fnc = proc do |w, x, y, a|
144
107
  n_samples, n_features = x.shape
145
108
  w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
146
109
  z = x.dot(w.transpose)
@@ -150,57 +113,25 @@ module Rumale
150
113
  [loss, gradient.flatten.dup]
151
114
  end
152
115
 
153
- x = expand_feature(x) if fit_bias?
116
+ base_x = expand_feature(base_x) if fit_bias?
154
117
 
155
- n_features = x.shape[1]
156
- n_outputs = single_target?(y) ? 1 : y.shape[1]
118
+ n_features = base_x.shape[1]
119
+ n_outputs = single_target?(base_y) ? 1 : base_y.shape[1]
120
+ w_init = Numo::DFloat.zeros(n_outputs * n_features)
157
121
 
158
122
  res = Lbfgsb.minimize(
159
- fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y, @params[:reg_param]],
123
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, base_y, @params[:reg_param]],
160
124
  maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
161
125
  verbose: @params[:verbose] ? 1 : -1
162
126
  )
163
127
 
164
- @weight_vec, @bias_term =
165
- if single_target?(y)
166
- split_weight(res[:x])
167
- else
168
- split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
169
- end
170
- end
171
-
172
- def fit_sgd(x, y)
173
- if single_target?(y)
174
- @weight_vec, @bias_term = partial_fit(x, y)
175
- else
176
- n_outputs = y.shape[1]
177
- n_features = x.shape[1]
178
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
179
- @bias_term = Numo::DFloat.zeros(n_outputs)
180
- if enable_parallel?
181
- models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
182
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
183
- else
184
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
185
- end
186
- end
128
+ w = single_target?(base_y) ? res[:x] : res[:x].reshape(n_outputs, n_features)
129
+ split_weight(w)
187
130
  end
188
131
 
189
132
  def single_target?(y)
190
133
  y.ndim == 1
191
134
  end
192
-
193
- def init_weight(n_features, n_outputs)
194
- ::Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
195
- end
196
-
197
- def split_weight_mult(w)
198
- if fit_bias?
199
- [w[0...-1, true].dup, w[-1, true].dup]
200
- else
201
- [w.dup, Numo::DFloat.zeros(w.shape[1])]
202
- end
203
- end
204
135
  end
205
136
  end
206
137
  end
@@ -0,0 +1,262 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/classifier'
4
+ require 'rumale/probabilistic_output'
5
+ require 'rumale/validation'
6
+
7
+ require_relative 'sgd_estimator'
8
+
9
+ module Rumale
10
+ module LinearModel
11
+ # SGDClassifier is a class that implements linear classifier with stochastic gradient descent optimization.
12
+ #
13
+ # @example
14
+ # require 'rumale/linear_model/sgd_classifier'
15
+ #
16
+ # estimator =
17
+ # Rumale::LinearModel::SGDClassifier.new(loss: 'hinge', reg_param: 1.0, max_iter: 1000, batch_size: 50, random_seed: 1)
18
+ # estimator.fit(training_samples, traininig_labels)
19
+ # results = estimator.predict(testing_samples)
20
+ #
21
+ # *Reference*
22
+ # - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
23
+ # - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
24
+ # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
25
+ class SGDClassifier < Rumale::LinearModel::SGDEstimator # rubocop:disable Metrics/ClassLength
26
+ include Rumale::Base::Classifier
27
+
28
+ # Return the class labels.
29
+ # @return [Numo::Int32] (shape: [n_classes])
30
+ attr_reader :classes
31
+
32
+ # Return the random generator for performing random sampling.
33
+ # @return [Random]
34
+ attr_reader :rng
35
+
36
+ # Create a new linear classifier with stochastic gradient descent optimization.
37
+ #
38
+ # @param loss [String] The loss function to be used ('hinge' and 'log_loss').
39
+ # @param learning_rate [Float] The initial value of learning rate.
40
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
41
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
42
+ # If nil is given, the decay sets to 'reg_param * learning_rate'.
43
+ # @param momentum [Float] The momentum factor.
44
+ # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
45
+ # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
46
+ # If penalty set to 'l2' or 'l1', this parameter is ignored.
47
+ # If l1_ratio = 1, the regularization is similar to Lasso.
48
+ # If l1_ratio = 0, the regularization is similar to Ridge.
49
+ # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
50
+ # @param reg_param [Float] The regularization parameter.
51
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
52
+ # @param bias_scale [Float] The scale of the bias term.
53
+ # @param max_iter [Integer] The maximum number of epochs that indicates
54
+ # how many times the whole data is given to the training process.
55
+ # @param batch_size [Integer] The size of the mini batches.
56
+ # @param tol [Float] The tolerance of loss for terminating optimization.
57
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
58
+ # If nil is given, the methods do not execute in parallel.
59
+ # If zero or less is given, it becomes equal to the number of processors.
60
+ # This parameter is ignored if the Parallel gem is not loaded.
61
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
62
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
63
+ def initialize(loss: 'hinge', learning_rate: 0.01, decay: nil, momentum: 0.9,
64
+ penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
65
+ fit_bias: true, bias_scale: 1.0,
66
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
67
+ n_jobs: nil, verbose: false, random_seed: nil)
68
+ super()
69
+ @params.merge!(
70
+ loss: loss,
71
+ learning_rate: learning_rate,
72
+ decay: decay,
73
+ momentum: momentum,
74
+ penalty: penalty,
75
+ reg_param: reg_param,
76
+ l1_ratio: l1_ratio,
77
+ fit_bias: fit_bias,
78
+ bias_scale: bias_scale,
79
+ max_iter: max_iter,
80
+ batch_size: batch_size,
81
+ tol: tol,
82
+ n_jobs: n_jobs,
83
+ verbose: verbose,
84
+ random_seed: random_seed
85
+ )
86
+ @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
87
+ @params[:random_seed] ||= srand
88
+ @rng = Random.new(@params[:random_seed])
89
+ @penalty_type = @params[:penalty]
90
+ @loss_func = case @params[:loss]
91
+ when Rumale::LinearModel::Loss::HingeLoss::NAME
92
+ Rumale::LinearModel::Loss::HingeLoss.new
93
+ when Rumale::LinearModel::Loss::LogLoss::NAME
94
+ Rumale::LinearModel::Loss::LogLoss.new
95
+ else
96
+ raise ArgumentError, "given loss '#{loss}' is not supported."
97
+ end
98
+ end
99
+
100
+ # Fit the model with given training data.
101
+ #
102
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
103
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
104
+ # @return [SGDClassifier] The learned classifier itself.
105
+ def fit(x, y)
106
+ x = Rumale::Validation.check_convert_sample_array(x)
107
+ y = Rumale::Validation.check_convert_label_array(y)
108
+ Rumale::Validation.check_sample_size(x, y)
109
+
110
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
111
+
112
+ send("fit_#{@loss_func.name}", x, y)
113
+
114
+ self
115
+ end
116
+
117
+ # Calculate confidence scores for samples.
118
+ #
119
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
120
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
121
+ def decision_function(x)
122
+ x = ::Rumale::Validation.check_convert_sample_array(x)
123
+
124
+ x.dot(@weight_vec.transpose) + @bias_term
125
+ end
126
+
127
+ # Predict class labels for samples.
128
+ #
129
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
130
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
131
+ def predict(x)
132
+ x = ::Rumale::Validation.check_convert_sample_array(x)
133
+
134
+ send("predict_#{@loss_func.name}", x)
135
+ end
136
+
137
+ # Predict probability for samples.
138
+ #
139
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
140
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
141
+ def predict_proba(x)
142
+ x = ::Rumale::Validation.check_convert_sample_array(x)
143
+
144
+ send("predict_proba_#{@loss_func.name}", x)
145
+ end
146
+
147
+ private
148
+
149
+ def fit_hinge(x, y)
150
+ if multiclass_problem?
151
+ n_classes = @classes.size
152
+ n_features = x.shape[1]
153
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
154
+ @bias_term = Numo::DFloat.zeros(n_classes)
155
+ @prob_param = Numo::DFloat.zeros(n_classes, 2)
156
+ models = if enable_parallel?
157
+ parallel_map(n_classes) do |n|
158
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
159
+ w, b = partial_fit(x, bin_y)
160
+ prb = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
161
+ [w, b, prb]
162
+ end
163
+ else
164
+ Array.new(n_classes) do |n|
165
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
166
+ w, b = partial_fit(x, bin_y)
167
+ prb = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
168
+ [w, b, prb]
169
+ end
170
+ end
171
+ # store model.
172
+ models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
173
+ else
174
+ negative_label = @classes[0]
175
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
176
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
177
+ @prob_param = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
178
+ end
179
+ end
180
+
181
+ def fit_log_loss(x, y)
182
+ if multiclass_problem?
183
+ n_classes = @classes.size
184
+ n_features = x.shape[1]
185
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
186
+ @bias_term = Numo::DFloat.zeros(n_classes)
187
+ if enable_parallel?
188
+ models = parallel_map(n_classes) do |n|
189
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
190
+ partial_fit(x, bin_y)
191
+ end
192
+ n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
193
+ else
194
+ n_classes.times do |n|
195
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
196
+ @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
197
+ end
198
+ end
199
+ else
200
+ negative_label = @classes[0]
201
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
202
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
203
+ end
204
+ end
205
+
206
+ def predict_proba_hinge(x)
207
+ if multiclass_problem?
208
+ probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
209
+ (probs.transpose / probs.sum(axis: 1)).transpose.dup
210
+ else
211
+ n_samples = x.shape[0]
212
+ probs = Numo::DFloat.zeros(n_samples, 2)
213
+ probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
214
+ probs[true, 0] = 1.0 - probs[true, 1]
215
+ probs
216
+ end
217
+ end
218
+
219
+ def predict_proba_log_loss(x)
220
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
221
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
222
+
223
+ n_samples = x.shape[0]
224
+ probs = Numo::DFloat.zeros(n_samples, 2)
225
+ probs[true, 1] = proba
226
+ probs[true, 0] = 1.0 - proba
227
+ probs
228
+ end
229
+
230
+ def predict_hinge(x)
231
+ n_samples = x.shape[0]
232
+ predicted = if multiclass_problem?
233
+ decision_values = decision_function(x)
234
+ if enable_parallel?
235
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
236
+ else
237
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
238
+ end
239
+ else
240
+ decision_values = decision_function(x).ge(0.0).to_a
241
+ Array.new(n_samples) { |n| @classes[decision_values[n]] }
242
+ end
243
+ Numo::Int32.asarray(predicted)
244
+ end
245
+
246
+ def predict_log_loss(x)
247
+ n_samples = x.shape[0]
248
+ decision_values = predict_proba_log_loss(x)
249
+ predicted = if enable_parallel?
250
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
251
+ else
252
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
253
+ end
254
+ Numo::Int32.asarray(predicted)
255
+ end
256
+
257
+ def multiclass_problem?
258
+ @classes.size > 2
259
+ end
260
+ end
261
+ end
262
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/base/estimator'
3
+ require_relative 'base_estimator'
4
4
 
5
5
  module Rumale
6
6
  module LinearModel
@@ -96,6 +96,14 @@ module Rumale
96
96
  def dloss(out, y)
97
97
  2.fdiv(y.shape[0]) * (out - y)
98
98
  end
99
+
100
+ # @!visibility private
101
+ def name
102
+ NAME
103
+ end
104
+
105
+ # @!visibility private
106
+ NAME = 'squared_error'
99
107
  end
100
108
 
101
109
  # @!visibility private
@@ -110,6 +118,14 @@ module Rumale
110
118
  def dloss(out, y)
111
119
  y / (1 + Numo::NMath.exp(-y * out)) - y
112
120
  end
121
+
122
+ # @!visibility private
123
+ def name
124
+ NAME
125
+ end
126
+
127
+ # @!visibility private
128
+ NAME = 'log_loss'
113
129
  end
114
130
 
115
131
  # @!visibility private
@@ -127,6 +143,14 @@ module Rumale
127
143
  d[tids] = -y[tids] if tids.count.positive?
128
144
  d
129
145
  end
146
+
147
+ # @!visibility private
148
+ def name
149
+ NAME
150
+ end
151
+
152
+ # @!visibility private
153
+ NAME = 'hinge'
130
154
  end
131
155
 
132
156
  # @!visibility private
@@ -151,13 +175,21 @@ module Rumale
151
175
  d[tids] = -1 if tids.count.positive?
152
176
  d
153
177
  end
178
+
179
+ # @!visibility private
180
+ def name
181
+ NAME
182
+ end
183
+
184
+ # @!visibility private
185
+ NAME = 'epsilon_insensitive'
154
186
  end
155
187
  end
156
188
 
157
- # BaseSGD is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
189
+ # SGDEstimator is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
158
190
  # This class is used internally.
159
- class BaseSGD < ::Rumale::Base::Estimator
160
- # Create an initial linear model.
191
+ class SGDEstimator < Rumale::LinearModel::BaseEstimator
192
+ # Create an initial linear model with SGD.
161
193
  def initialize
162
194
  super()
163
195
  @params = {
@@ -224,23 +256,6 @@ module Rumale
224
256
  split_weight(weight)
225
257
  end
226
258
 
227
- def expand_feature(x)
228
- n_samples = x.shape[0]
229
- Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
230
- end
231
-
232
- def split_weight(weight)
233
- if fit_bias?
234
- [weight[0...-1].dup, weight[-1]]
235
- else
236
- [weight, 0.0]
237
- end
238
- end
239
-
240
- def fit_bias?
241
- @params[:fit_bias] == true
242
- end
243
-
244
259
  def apply_l2_penalty?
245
260
  @penalty_type == L2_PENALTY || @penalty_type == ELASTICNET_PENALTY
246
261
  end