rumale-linear_model 0.24.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,19 +4,18 @@ require 'lbfgsb'
4
4
 
5
5
  require 'rumale/base/regressor'
6
6
  require 'rumale/validation'
7
- require 'rumale/linear_model/base_sgd'
7
+
8
+ require_relative 'base_estimator'
8
9
 
9
10
  module Rumale
10
11
  module LinearModel
11
12
  # Ridge is a class that implements Ridge Regression
12
- # with stochastic gradient descent (SGD) optimization,
13
- # singular value decomposition (SVD), or L-BFGS optimization.
13
+ # with singular value decomposition (SVD) or L-BFGS optimization.
14
14
  #
15
15
  # @example
16
16
  # require 'rumale/linear_model/ridge'
17
17
  #
18
- # estimator =
19
- # Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
18
+ # estimator = Rumale::LinearModel::Ridge.new(reg_param: 0.1)
20
19
  # estimator.fit(training_samples, traininig_values)
21
20
  # results = estimator.predict(testing_samples)
22
21
  #
@@ -27,73 +26,40 @@ module Rumale
27
26
  # estimator = Rumale::LinearModel::Ridge.new(reg_param: 0.1, solver: 'svd')
28
27
  # estimator.fit(training_samples, traininig_values)
29
28
  # results = estimator.predict(testing_samples)
30
- #
31
- # *Reference*
32
- # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
33
- class Ridge < BaseSGD
34
- include ::Rumale::Base::Regressor
35
-
36
- # Return the weight vector.
37
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
38
- attr_reader :weight_vec
39
-
40
- # Return the bias term (a.k.a. intercept).
41
- # @return [Numo::DFloat] (shape: [n_outputs])
42
- attr_reader :bias_term
43
-
44
- # Return the random generator for random sampling.
45
- # @return [Random]
46
- attr_reader :rng
29
+ class Ridge < Rumale::LinearModel::BaseEstimator
30
+ include Rumale::Base::Regressor
47
31
 
48
32
  # Create a new Ridge regressor.
49
33
  #
50
- # @param learning_rate [Float] The initial value of learning rate.
51
- # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
52
- # If solver is not 'sgd', this parameter is ignored.
53
- # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
54
- # If nil is given, the decay sets to 'reg_param * learning_rate'.
55
- # If solver is not 'sgd', this parameter is ignored.
56
- # @param momentum [Float] The momentum factor.
57
- # If solver is not 'sgd', this parameter is ignored.
58
34
  # @param reg_param [Float] The regularization parameter.
59
35
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
60
36
  # @param bias_scale [Float] The scale of the bias term.
61
37
  # @param max_iter [Integer] The maximum number of epochs that indicates
62
38
  # how many times the whole data is given to the training process.
63
39
  # If solver is 'svd', this parameter is ignored.
64
- # @param batch_size [Integer] The size of the mini batches.
65
- # If solver is not 'sgd', this parameter is ignored.
66
40
  # @param tol [Float] The tolerance of loss for terminating optimization.
67
41
  # If solver is 'svd', this parameter is ignored.
68
- # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd', or 'lbfgs').
42
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'svd', or 'lbfgs').
69
43
  # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
70
- # 'sgd' uses the stochastic gradient descent optimization.
71
44
  # 'svd' performs singular value decomposition of samples.
72
45
  # 'lbfgs' uses the L-BFGS method for optimization.
73
- # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
74
- # If nil is given, the method does not execute in parallel.
75
- # If zero or less is given, it becomes equal to the number of processors.
76
- # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
77
46
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
78
47
  # If solver is 'svd', this parameter is ignored.
79
- # @param random_seed [Integer] The seed value using to initialize the random generator.
80
- def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
81
- reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
82
- max_iter: 1000, batch_size: 50, tol: 1e-4,
83
- solver: 'auto',
84
- n_jobs: nil, verbose: false, random_seed: nil)
48
+ def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0, max_iter: 1000, tol: 1e-4, solver: 'auto', verbose: false)
85
49
  super()
86
- @params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
50
+ @params = {
51
+ reg_param: reg_param,
52
+ fit_bias: fit_bias,
53
+ bias_scale: bias_scale,
54
+ max_iter: max_iter,
55
+ tol: tol,
56
+ verbose: verbose
57
+ }
87
58
  @params[:solver] = if solver == 'auto'
88
59
  enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
89
60
  else
90
- solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
61
+ solver.match?(/^svd$|^lbfgs$/) ? solver : 'lbfgs'
91
62
  end
92
- @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
93
- @params[:random_seed] ||= srand
94
- @rng = Random.new(@params[:random_seed])
95
- @penalty_type = L2_PENALTY
96
- @loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
97
63
  end
98
64
 
99
65
  # Fit the model with given training data.
@@ -102,17 +68,15 @@ module Rumale
102
68
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
103
69
  # @return [Ridge] The learned regressor itself.
104
70
  def fit(x, y)
105
- x = ::Rumale::Validation.check_convert_sample_array(x)
106
- y = ::Rumale::Validation.check_convert_target_value_array(y)
107
- ::Rumale::Validation.check_sample_size(x, y)
108
-
109
- if @params[:solver] == 'svd' && enable_linalg?(warning: false)
110
- fit_svd(x, y)
111
- elsif @params[:solver] == 'lbfgs'
112
- fit_lbfgs(x, y)
113
- else
114
- fit_sgd(x, y)
115
- end
71
+ x = Rumale::Validation.check_convert_sample_array(x)
72
+ y = Rumale::Validation.check_convert_target_value_array(y)
73
+ Rumale::Validation.check_sample_size(x, y)
74
+
75
+ @weight_vec, @bias_term = if @params[:solver] == 'svd' && enable_linalg?(warning: false)
76
+ partial_fit_svd(x, y)
77
+ else
78
+ partial_fit_lbfgs(x, y)
79
+ end
116
80
 
117
81
  self
118
82
  end
@@ -122,25 +86,24 @@ module Rumale
122
86
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
123
87
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
124
88
  def predict(x)
125
- x = ::Rumale::Validation.check_convert_sample_array(x)
89
+ x = Rumale::Validation.check_convert_sample_array(x)
126
90
 
127
91
  x.dot(@weight_vec.transpose) + @bias_term
128
92
  end
129
93
 
130
94
  private
131
95
 
132
- def fit_svd(x, y)
96
+ def partial_fit_svd(x, y)
133
97
  x = expand_feature(x) if fit_bias?
134
-
135
98
  s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
136
99
  d = (s / (s**2 + @params[:reg_param])).diag
137
100
  w = vt.transpose.dot(d).dot(u.transpose).dot(y)
138
-
139
- @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
101
+ w = w.transpose.dup unless single_target?(y)
102
+ split_weight(w)
140
103
  end
141
104
 
142
- def fit_lbfgs(x, y)
143
- fnc = proc do |w, x, y, a| # rubocop:disable Lint/ShadowingOuterLocalVariable
105
+ def partial_fit_lbfgs(base_x, base_y)
106
+ fnc = proc do |w, x, y, a|
144
107
  n_samples, n_features = x.shape
145
108
  w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
146
109
  z = x.dot(w.transpose)
@@ -150,57 +113,25 @@ module Rumale
150
113
  [loss, gradient.flatten.dup]
151
114
  end
152
115
 
153
- x = expand_feature(x) if fit_bias?
116
+ base_x = expand_feature(base_x) if fit_bias?
154
117
 
155
- n_features = x.shape[1]
156
- n_outputs = single_target?(y) ? 1 : y.shape[1]
118
+ n_features = base_x.shape[1]
119
+ n_outputs = single_target?(base_y) ? 1 : base_y.shape[1]
120
+ w_init = Numo::DFloat.zeros(n_outputs * n_features)
157
121
 
158
122
  res = Lbfgsb.minimize(
159
- fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y, @params[:reg_param]],
123
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, base_y, @params[:reg_param]],
160
124
  maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
161
125
  verbose: @params[:verbose] ? 1 : -1
162
126
  )
163
127
 
164
- @weight_vec, @bias_term =
165
- if single_target?(y)
166
- split_weight(res[:x])
167
- else
168
- split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
169
- end
170
- end
171
-
172
- def fit_sgd(x, y)
173
- if single_target?(y)
174
- @weight_vec, @bias_term = partial_fit(x, y)
175
- else
176
- n_outputs = y.shape[1]
177
- n_features = x.shape[1]
178
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
179
- @bias_term = Numo::DFloat.zeros(n_outputs)
180
- if enable_parallel?
181
- models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
182
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
183
- else
184
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
185
- end
186
- end
128
+ w = single_target?(base_y) ? res[:x] : res[:x].reshape(n_outputs, n_features)
129
+ split_weight(w)
187
130
  end
188
131
 
189
132
  def single_target?(y)
190
133
  y.ndim == 1
191
134
  end
192
-
193
- def init_weight(n_features, n_outputs)
194
- ::Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
195
- end
196
-
197
- def split_weight_mult(w)
198
- if fit_bias?
199
- [w[0...-1, true].dup, w[-1, true].dup]
200
- else
201
- [w.dup, Numo::DFloat.zeros(w.shape[1])]
202
- end
203
- end
204
135
  end
205
136
  end
206
137
  end
@@ -0,0 +1,262 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/classifier'
4
+ require 'rumale/probabilistic_output'
5
+ require 'rumale/validation'
6
+
7
+ require_relative 'sgd_estimator'
8
+
9
+ module Rumale
10
+ module LinearModel
11
+ # SGDClassifier is a class that implements linear classifier with stochastic gradient descent optimization.
12
+ #
13
+ # @example
14
+ # require 'rumale/linear_model/sgd_classifier'
15
+ #
16
+ # estimator =
17
+ # Rumale::LinearModel::SGDClassifier.new(loss: 'hinge', reg_param: 1.0, max_iter: 1000, batch_size: 50, random_seed: 1)
18
+ # estimator.fit(training_samples, traininig_labels)
19
+ # results = estimator.predict(testing_samples)
20
+ #
21
+ # *Reference*
22
+ # - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
23
+ # - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
24
+ # - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
25
+ class SGDClassifier < Rumale::LinearModel::SGDEstimator # rubocop:disable Metrics/ClassLength
26
+ include Rumale::Base::Classifier
27
+
28
+ # Return the class labels.
29
+ # @return [Numo::Int32] (shape: [n_classes])
30
+ attr_reader :classes
31
+
32
+ # Return the random generator for performing random sampling.
33
+ # @return [Random]
34
+ attr_reader :rng
35
+
36
+ # Create a new linear classifier with stochastic gradient descent optimization.
37
+ #
38
+ # @param loss [String] The loss function to be used ('hinge' and 'log_loss').
39
+ # @param learning_rate [Float] The initial value of learning rate.
40
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
41
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
42
+ # If nil is given, the decay sets to 'reg_param * learning_rate'.
43
+ # @param momentum [Float] The momentum factor.
44
+ # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
45
+ # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
46
+ # If penalty set to 'l2' or 'l1', this parameter is ignored.
47
+ # If l1_ratio = 1, the regularization is similar to Lasso.
48
+ # If l1_ratio = 0, the regularization is similar to Ridge.
49
+ # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
50
+ # @param reg_param [Float] The regularization parameter.
51
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
52
+ # @param bias_scale [Float] The scale of the bias term.
53
+ # @param max_iter [Integer] The maximum number of epochs that indicates
54
+ # how many times the whole data is given to the training process.
55
+ # @param batch_size [Integer] The size of the mini batches.
56
+ # @param tol [Float] The tolerance of loss for terminating optimization.
57
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
58
+ # If nil is given, the methods do not execute in parallel.
59
+ # If zero or less is given, it becomes equal to the number of processors.
60
+ # This parameter is ignored if the Parallel gem is not loaded.
61
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
62
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
63
+ def initialize(loss: 'hinge', learning_rate: 0.01, decay: nil, momentum: 0.9,
64
+ penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
65
+ fit_bias: true, bias_scale: 1.0,
66
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
67
+ n_jobs: nil, verbose: false, random_seed: nil)
68
+ super()
69
+ @params.merge!(
70
+ loss: loss,
71
+ learning_rate: learning_rate,
72
+ decay: decay,
73
+ momentum: momentum,
74
+ penalty: penalty,
75
+ reg_param: reg_param,
76
+ l1_ratio: l1_ratio,
77
+ fit_bias: fit_bias,
78
+ bias_scale: bias_scale,
79
+ max_iter: max_iter,
80
+ batch_size: batch_size,
81
+ tol: tol,
82
+ n_jobs: n_jobs,
83
+ verbose: verbose,
84
+ random_seed: random_seed
85
+ )
86
+ @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
87
+ @params[:random_seed] ||= srand
88
+ @rng = Random.new(@params[:random_seed])
89
+ @penalty_type = @params[:penalty]
90
+ @loss_func = case @params[:loss]
91
+ when Rumale::LinearModel::Loss::HingeLoss::NAME
92
+ Rumale::LinearModel::Loss::HingeLoss.new
93
+ when Rumale::LinearModel::Loss::LogLoss::NAME
94
+ Rumale::LinearModel::Loss::LogLoss.new
95
+ else
96
+ raise ArgumentError, "given loss '#{loss}' is not supported."
97
+ end
98
+ end
99
+
100
+ # Fit the model with given training data.
101
+ #
102
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
103
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
104
+ # @return [SGDClassifier] The learned classifier itself.
105
+ def fit(x, y)
106
+ x = Rumale::Validation.check_convert_sample_array(x)
107
+ y = Rumale::Validation.check_convert_label_array(y)
108
+ Rumale::Validation.check_sample_size(x, y)
109
+
110
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
111
+
112
+ send("fit_#{@loss_func.name}", x, y)
113
+
114
+ self
115
+ end
116
+
117
+ # Calculate confidence scores for samples.
118
+ #
119
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
120
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
121
+ def decision_function(x)
122
+ x = ::Rumale::Validation.check_convert_sample_array(x)
123
+
124
+ x.dot(@weight_vec.transpose) + @bias_term
125
+ end
126
+
127
+ # Predict class labels for samples.
128
+ #
129
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
130
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
131
+ def predict(x)
132
+ x = ::Rumale::Validation.check_convert_sample_array(x)
133
+
134
+ send("predict_#{@loss_func.name}", x)
135
+ end
136
+
137
+ # Predict probability for samples.
138
+ #
139
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
140
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
141
+ def predict_proba(x)
142
+ x = ::Rumale::Validation.check_convert_sample_array(x)
143
+
144
+ send("predict_proba_#{@loss_func.name}", x)
145
+ end
146
+
147
+ private
148
+
149
+ def fit_hinge(x, y)
150
+ if multiclass_problem?
151
+ n_classes = @classes.size
152
+ n_features = x.shape[1]
153
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
154
+ @bias_term = Numo::DFloat.zeros(n_classes)
155
+ @prob_param = Numo::DFloat.zeros(n_classes, 2)
156
+ models = if enable_parallel?
157
+ parallel_map(n_classes) do |n|
158
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
159
+ w, b = partial_fit(x, bin_y)
160
+ prb = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
161
+ [w, b, prb]
162
+ end
163
+ else
164
+ Array.new(n_classes) do |n|
165
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
166
+ w, b = partial_fit(x, bin_y)
167
+ prb = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
168
+ [w, b, prb]
169
+ end
170
+ end
171
+ # store model.
172
+ models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
173
+ else
174
+ negative_label = @classes[0]
175
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
176
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
177
+ @prob_param = Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
178
+ end
179
+ end
180
+
181
+ def fit_log_loss(x, y)
182
+ if multiclass_problem?
183
+ n_classes = @classes.size
184
+ n_features = x.shape[1]
185
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
186
+ @bias_term = Numo::DFloat.zeros(n_classes)
187
+ if enable_parallel?
188
+ models = parallel_map(n_classes) do |n|
189
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
190
+ partial_fit(x, bin_y)
191
+ end
192
+ n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
193
+ else
194
+ n_classes.times do |n|
195
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
196
+ @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
197
+ end
198
+ end
199
+ else
200
+ negative_label = @classes[0]
201
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
202
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
203
+ end
204
+ end
205
+
206
+ def predict_proba_hinge(x)
207
+ if multiclass_problem?
208
+ probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
209
+ (probs.transpose / probs.sum(axis: 1)).transpose.dup
210
+ else
211
+ n_samples = x.shape[0]
212
+ probs = Numo::DFloat.zeros(n_samples, 2)
213
+ probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
214
+ probs[true, 0] = 1.0 - probs[true, 1]
215
+ probs
216
+ end
217
+ end
218
+
219
+ def predict_proba_log_loss(x)
220
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
221
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
222
+
223
+ n_samples = x.shape[0]
224
+ probs = Numo::DFloat.zeros(n_samples, 2)
225
+ probs[true, 1] = proba
226
+ probs[true, 0] = 1.0 - proba
227
+ probs
228
+ end
229
+
230
+ def predict_hinge(x)
231
+ n_samples = x.shape[0]
232
+ predicted = if multiclass_problem?
233
+ decision_values = decision_function(x)
234
+ if enable_parallel?
235
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
236
+ else
237
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
238
+ end
239
+ else
240
+ decision_values = decision_function(x).ge(0.0).to_a
241
+ Array.new(n_samples) { |n| @classes[decision_values[n]] }
242
+ end
243
+ Numo::Int32.asarray(predicted)
244
+ end
245
+
246
+ def predict_log_loss(x)
247
+ n_samples = x.shape[0]
248
+ decision_values = predict_proba_log_loss(x)
249
+ predicted = if enable_parallel?
250
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
251
+ else
252
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
253
+ end
254
+ Numo::Int32.asarray(predicted)
255
+ end
256
+
257
+ def multiclass_problem?
258
+ @classes.size > 2
259
+ end
260
+ end
261
+ end
262
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/base/estimator'
3
+ require_relative 'base_estimator'
4
4
 
5
5
  module Rumale
6
6
  module LinearModel
@@ -96,6 +96,14 @@ module Rumale
96
96
  def dloss(out, y)
97
97
  2.fdiv(y.shape[0]) * (out - y)
98
98
  end
99
+
100
+ # @!visibility private
101
+ def name
102
+ NAME
103
+ end
104
+
105
+ # @!visibility private
106
+ NAME = 'squared_error'
99
107
  end
100
108
 
101
109
  # @!visibility private
@@ -110,6 +118,14 @@ module Rumale
110
118
  def dloss(out, y)
111
119
  y / (1 + Numo::NMath.exp(-y * out)) - y
112
120
  end
121
+
122
+ # @!visibility private
123
+ def name
124
+ NAME
125
+ end
126
+
127
+ # @!visibility private
128
+ NAME = 'log_loss'
113
129
  end
114
130
 
115
131
  # @!visibility private
@@ -127,6 +143,14 @@ module Rumale
127
143
  d[tids] = -y[tids] if tids.count.positive?
128
144
  d
129
145
  end
146
+
147
+ # @!visibility private
148
+ def name
149
+ NAME
150
+ end
151
+
152
+ # @!visibility private
153
+ NAME = 'hinge'
130
154
  end
131
155
 
132
156
  # @!visibility private
@@ -151,13 +175,21 @@ module Rumale
151
175
  d[tids] = -1 if tids.count.positive?
152
176
  d
153
177
  end
178
+
179
+ # @!visibility private
180
+ def name
181
+ NAME
182
+ end
183
+
184
+ # @!visibility private
185
+ NAME = 'epsilon_insensitive'
154
186
  end
155
187
  end
156
188
 
157
- # BaseSGD is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
189
+ # SGDEstimator is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
158
190
  # This class is used internally.
159
- class BaseSGD < ::Rumale::Base::Estimator
160
- # Create an initial linear model.
191
+ class SGDEstimator < Rumale::LinearModel::BaseEstimator
192
+ # Create an initial linear model with SGD.
161
193
  def initialize
162
194
  super()
163
195
  @params = {
@@ -224,23 +256,6 @@ module Rumale
224
256
  split_weight(weight)
225
257
  end
226
258
 
227
- def expand_feature(x)
228
- n_samples = x.shape[0]
229
- Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
230
- end
231
-
232
- def split_weight(weight)
233
- if fit_bias?
234
- [weight[0...-1].dup, weight[-1]]
235
- else
236
- [weight, 0.0]
237
- end
238
- end
239
-
240
- def fit_bias?
241
- @params[:fit_bias] == true
242
- end
243
-
244
259
  def apply_l2_penalty?
245
260
  @penalty_type == L2_PENALTY || @penalty_type == ELASTICNET_PENALTY
246
261
  end