rumale 0.20.2 → 0.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/regressor'
5
+
6
+ module Rumale
7
+ module Ensemble
8
+ # StackingRegressor is a class that implements regressor with stacking method.
9
+ #
10
+ # @example
11
+ # estimators = {
12
+ # las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
13
+ # mlp: Rumele::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
14
+ # rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
15
+ # }
16
+ # meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
17
+ # regressor = Rumale::Ensemble::StackedRegressor.new(
18
+ # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
19
+ # )
20
+ # regressor.fit(training_samples, traininig_values)
21
+ # results = regressor.predict(testing_samples)
22
+ #
23
+ # *Reference*
24
+ # - Zhou, Z-H., "Ensemble Mehotds - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
25
+ class StackingRegressor
26
+ include Base::BaseEstimator
27
+ include Base::Regressor
28
+
29
+ # Return the base regressors.
30
+ # @return [Hash<Symbol,Regressor>]
31
+ attr_reader :estimators
32
+
33
+ # Return the meta regressor.
34
+ # @return [Regressor]
35
+ attr_reader :meta_estimator
36
+
37
+ # Create a new regressor with stacking method.
38
+ #
39
+ # @param estimators [Hash<Symbol,Regressor>] The base regressors for extracting meta features.
40
+ # @param meta_estimator [Regressor/Nil] The meta regressor that predicts values.
41
+ # If nil is given, Ridge is used.
42
+ # @param n_splits [Integer] The number of folds for cross validation with k-fold on meta feature extraction in training phase.
43
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
44
+ # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta regressor.
45
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
46
+ def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, passthrough: false, random_seed: nil)
47
+ check_params_type(Hash, estimators: estimators)
48
+ check_params_numeric(n_splits: n_splits)
49
+ check_params_boolean(shuffle: shuffle, passthrough: passthrough)
50
+ check_params_numeric_or_nil(random_seed: random_seed)
51
+ @estimators = estimators
52
+ @meta_estimator = meta_estimator || Rumale::LinearModel::Ridge.new
53
+ @output_size = nil
54
+ @params = {}
55
+ @params[:n_splits] = n_splits
56
+ @params[:shuffle] = shuffle
57
+ @params[:passthrough] = passthrough
58
+ @params[:random_seed] = random_seed || srand
59
+ end
60
+
61
+ # Fit the model with given training data.
62
+ #
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
65
+ # @return [StackedRegressor] The learned regressor itself.
66
+ def fit(x, y)
67
+ x = check_convert_sample_array(x)
68
+ y = check_convert_tvalue_array(y)
69
+ check_sample_tvalue_size(x, y)
70
+
71
+ n_samples, n_features = x.shape
72
+ n_outputs = y.ndim == 1 ? 1 : y.shape[1]
73
+
74
+ # training base regressors with all training data.
75
+ @estimators.each_key { |name| @estimators[name].fit(x, y) }
76
+
77
+ # detecting size of output for each base regressor.
78
+ @output_size = detect_output_size(n_features)
79
+
80
+ # extracting meta features with base regressors.
81
+ n_components = @output_size.values.inject(:+)
82
+ z = Numo::DFloat.zeros(n_samples, n_components)
83
+
84
+ kf = Rumale::ModelSelection::KFold.new(
85
+ n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
86
+ )
87
+
88
+ kf.split(x, y).each do |train_ids, valid_ids|
89
+ x_train = x[train_ids, true]
90
+ y_train = n_outputs == 1 ? y[train_ids] : y[train_ids, true]
91
+ x_valid = x[valid_ids, true]
92
+ f_start = 0
93
+ @estimators.each_key do |name|
94
+ est_fold = Marshal.load(Marshal.dump(@estimators[name]))
95
+ f_last = f_start + @output_size[name]
96
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
97
+ z[valid_ids, f_position] = est_fold.fit(x_train, y_train).predict(x_valid)
98
+ f_start = f_last
99
+ end
100
+ end
101
+
102
+ # concatenating original features.
103
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
104
+
105
+ # training meta regressor.
106
+ @meta_estimator.fit(z, y)
107
+
108
+ self
109
+ end
110
+
111
+ # Predict values for samples.
112
+ #
113
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
114
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) The predicted values per sample.
115
+ def predict(x)
116
+ x = check_convert_sample_array(x)
117
+ z = transform(x)
118
+ @meta_estimator.predict(z)
119
+ end
120
+
121
+ # Transform the given data with the learned model.
122
+ #
123
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
124
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
125
+ def transform(x)
126
+ x = check_convert_sample_array(x)
127
+ n_samples = x.shape[0]
128
+ n_components = @output_size.values.inject(:+)
129
+ z = Numo::DFloat.zeros(n_samples, n_components)
130
+ f_start = 0
131
+ @estimators.each_key do |name|
132
+ f_last = f_start + @output_size[name]
133
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
134
+ z[true, f_position] = @estimators[name].predict(x)
135
+ f_start = f_last
136
+ end
137
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
138
+ z
139
+ end
140
+
141
+ # Fit the model with training data, and then transform them with the learned model.
142
+ #
143
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
144
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
145
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
146
+ def fit_transform(x, y)
147
+ x = check_convert_sample_array(x)
148
+ y = check_convert_tvalue_array(y)
149
+ fit(x, y).transform(x)
150
+ end
151
+
152
+ private
153
+
154
+ def detect_output_size(n_features)
155
+ x_dummy = Numo::DFloat.new(2, n_features).rand
156
+ @estimators.each_key.with_object({}) do |name, obj|
157
+ output_dummy = @estimators[name].predict(x_dummy)
158
+ obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -75,9 +75,12 @@ module Rumale
75
75
  false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
76
76
 
77
77
  if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
78
+ # NOTE: Numo::NArray#insert is not a destructive method.
79
+ # rubocop:disable Style/RedundantSelfAssignment
78
80
  true_pos = true_pos.insert(0, 0)
79
81
  false_pos = false_pos.insert(0, 0)
80
82
  thresholds = thresholds.insert(0, thresholds[0] + 1)
83
+ # rubocop:enable Style/RedundantSelfAssignment
81
84
  end
82
85
 
83
86
  tpr = true_pos / true_pos[-1].to_f
@@ -67,7 +67,7 @@ module Rumale
67
67
  def transform(x)
68
68
  raise 'FeatureHasher#transform requires Mmh3 but that is not loaded.' unless enable_mmh3?
69
69
 
70
- x = [x] unless x.is_a?(Array) # rubocop:disable Style/ArrayCoercion
70
+ x = [x] unless x.is_a?(Array)
71
71
  n_samples = x.size
72
72
 
73
73
  z = Numo::DFloat.zeros(n_samples, n_features)
@@ -99,7 +99,7 @@ module Rumale
99
99
  # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
100
100
  # @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
101
101
  def transform(x)
102
- x = [x] unless x.is_a?(Array) # rubocop:disable Style/ArrayCoercion
102
+ x = [x] unless x.is_a?(Array)
103
103
  n_samples = x.size
104
104
  n_features = @vocabulary.size
105
105
  z = Numo::DFloat.zeros(n_samples, n_features)
@@ -11,9 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent (SGD) optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
- # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
15
- # If you prefer execution speed, you should use Rumale::SVM::SVC.
16
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
16
+ # If you prefer execution speed, you should use Rumale::SVM::SVC.
17
+ # https://github.com/yoshoku/rumale-svm
17
18
  #
18
19
  # @example
19
20
  # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
@@ -171,7 +171,7 @@ module Rumale
171
171
  @params[:fit_bias] = true
172
172
  @params[:reg_param] = 0.0
173
173
  @params[:l1_ratio] = 0.0
174
- @params[:max_iter] = 200
174
+ @params[:max_iter] = 1000
175
175
  @params[:batch_size] = 50
176
176
  @params[:tol] = 0.0001
177
177
  @params[:verbose] = false
@@ -10,7 +10,7 @@ module Rumale
10
10
  #
11
11
  # @example
12
12
  # estimator =
13
- # Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 200, batch_size: 50, random_seed: 1)
13
+ # Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 1000, batch_size: 50, random_seed: 1)
14
14
  # estimator.fit(training_samples, traininig_values)
15
15
  # results = estimator.predict(testing_samples)
16
16
  #
@@ -59,7 +59,7 @@ module Rumale
59
59
  # @param random_seed [Integer] The seed value using to initialize the random generator.
60
60
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
61
61
  reg_param: 1.0, l1_ratio: 0.5, fit_bias: true, bias_scale: 1.0,
62
- max_iter: 200, batch_size: 50, tol: 1e-4,
62
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
63
63
  n_jobs: nil, verbose: false, random_seed: nil)
64
64
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
65
65
  reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
@@ -10,7 +10,7 @@ module Rumale
10
10
  #
11
11
  # @example
12
12
  # estimator =
13
- # Rumale::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 500, batch_size: 20, random_seed: 1)
13
+ # Rumale::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
14
14
  # estimator.fit(training_samples, traininig_values)
15
15
  # results = estimator.predict(testing_samples)
16
16
  #
@@ -55,7 +55,7 @@ module Rumale
55
55
  # @param random_seed [Integer] The seed value using to initialize the random generator.
56
56
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
57
57
  reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
58
- max_iter: 200, batch_size: 50, tol: 1e-4,
58
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
59
59
  n_jobs: nil, verbose: false, random_seed: nil)
60
60
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
61
61
  reg_param: reg_param, bias_scale: bias_scale,
@@ -10,7 +10,7 @@ module Rumale
10
10
  #
11
11
  # @example
12
12
  # estimator =
13
- # Rumale::LinearModel::LinearRegression.new(max_iter: 500, batch_size: 20, random_seed: 1)
13
+ # Rumale::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
14
14
  # estimator.fit(training_samples, traininig_values)
15
15
  # results = estimator.predict(testing_samples)
16
16
  #
@@ -68,7 +68,7 @@ module Rumale
68
68
  # If solver = 'svd', this parameter is ignored.
69
69
  # @param random_seed [Integer] The seed value using to initialize the random generator.
70
70
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
71
- fit_bias: true, bias_scale: 1.0, max_iter: 200, batch_size: 50, tol: 1e-4,
71
+ fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
72
72
  solver: 'auto',
73
73
  n_jobs: nil, verbose: false, random_seed: nil)
74
74
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
@@ -82,7 +82,7 @@ module Rumale
82
82
  @params[:solver] = if solver == 'auto'
83
83
  load_linalg? ? 'svd' : 'sgd'
84
84
  else
85
- solver != 'svd' ? 'sgd' : 'svd'
85
+ solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
86
86
  end
87
87
  @params[:decay] ||= @params[:learning_rate]
88
88
  @params[:random_seed] ||= srand
@@ -1,21 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/linear_model/base_sgd'
3
+ require 'lbfgsb'
4
4
  require 'rumale/base/classifier'
5
+ require 'rumale/linear_model/base_sgd'
6
+ require 'rumale/preprocessing/label_binarizer'
5
7
 
6
8
  module Rumale
7
9
  module LinearModel
8
- # LogisticRegression is a class that implements Logistic Regression
9
- # with stochastic gradient descent optimization.
10
- # For multiclass classification problem, it uses one-vs-the-rest strategy.
10
+ # LogisticRegression is a class that implements Logistic Regression.
11
+ # In multiclass classification problem, it uses one-vs-the-rest strategy for the sgd solver
12
+ # and multinomial logistic regression for the lbfgs solver.
11
13
  #
12
- # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
13
- # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
14
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
16
+ # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
17
+ # https://github.com/yoshoku/rumale-svm
15
18
  #
16
19
  # @example
17
20
  # estimator =
18
- # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 200, batch_size: 50, random_seed: 1)
21
+ # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, random_seed: 1)
19
22
  # estimator.fit(training_samples, traininig_labels)
20
23
  # results = estimator.predict(testing_samples)
21
24
  #
@@ -42,19 +45,24 @@ module Rumale
42
45
  # @return [Random]
43
46
  attr_reader :rng
44
47
 
45
- # Create a new classifier with Logisitc Regression by the SGD optimization.
48
+ # Create a new classifier with Logisitc Regression.
46
49
  #
47
50
  # @param learning_rate [Float] The initial value of learning rate.
48
51
  # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
52
+ # If solver = 'lbfgs', this parameter is ignored.
49
53
  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
50
54
  # If nil is given, the decay sets to 'reg_param * learning_rate'.
55
+ # If solver = 'lbfgs', this parameter is ignored.
51
56
  # @param momentum [Float] The momentum factor.
57
+ # If solver = 'lbfgs', this parameter is ignored.
52
58
  # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
59
+ # If solver = 'lbfgs', only 'l2' can be selected for this parameter.
53
60
  # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
54
61
  # If penalty set to 'l2' or 'l1', this parameter is ignored.
55
62
  # If l1_ratio = 1, the regularization is similar to Lasso.
56
63
  # If l1_ratio = 0, the regularization is similar to Ridge.
57
64
  # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
65
+ # If solver = 'lbfgs', this parameter is ignored.
58
66
  # @param reg_param [Float] The regularization parameter.
59
67
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
60
68
  # @param bias_scale [Float] The scale of the bias term.
@@ -62,28 +70,38 @@ module Rumale
62
70
  # @param max_iter [Integer] The maximum number of epochs that indicates
63
71
  # how many times the whole data is given to the training process.
64
72
  # @param batch_size [Integer] The size of the mini batches.
73
+ # If solver = 'lbfgs', this parameter is ignored.
65
74
  # @param tol [Float] The tolerance of loss for terminating optimization.
75
+ # If solver = 'lbfgs', this value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
76
+ # @param solver [String] The algorithm for optimization. ('lbfgs' or 'sgd').
77
+ # 'lbfgs' uses the L-BFGS with lbfgs.rb gem.
78
+ # 'sgd' uses the stochastic gradient descent optimization.
66
79
  # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
67
80
  # If nil is given, the methods do not execute in parallel.
68
81
  # If zero or less is given, it becomes equal to the number of processors.
69
- # This parameter is ignored if the Parallel gem is not loaded.
82
+ # This parameter is ignored if the Parallel gem is not loaded or the solver is 'lbfgs'.
70
83
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
84
+ # If solver = 'lbfgs' and true is given, 'iterate.dat' file is generated by lbfgsb.rb.
71
85
  # @param random_seed [Integer] The seed value using to initialize the random generator.
72
86
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
73
87
  penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
74
88
  fit_bias: true, bias_scale: 1.0,
75
- max_iter: 200, batch_size: 50, tol: 1e-4,
89
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
90
+ solver: 'lbfgs',
76
91
  n_jobs: nil, verbose: false, random_seed: nil)
77
92
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
78
93
  reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
79
94
  max_iter: max_iter, batch_size: batch_size, tol: tol)
80
95
  check_params_boolean(fit_bias: fit_bias, verbose: verbose)
81
- check_params_string(penalty: penalty)
96
+ check_params_string(solver: solver, penalty: penalty)
82
97
  check_params_numeric_or_nil(decay: decay, n_jobs: n_jobs, random_seed: random_seed)
83
98
  check_params_positive(learning_rate: learning_rate, reg_param: reg_param,
84
99
  bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
100
+ raise ArgumentError, "The 'lbfgs' solver supports only 'l2' penalties." if solver == 'lbfgs' && penalty != 'l2'
101
+
85
102
  super()
86
103
  @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
104
+ @params[:solver] = solver == 'sgd' ? 'sgd' : 'lbfgs'
87
105
  @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
88
106
  @params[:random_seed] ||= srand
89
107
  @rng = Random.new(@params[:random_seed])
@@ -105,30 +123,10 @@ module Rumale
105
123
  check_sample_label_size(x, y)
106
124
 
107
125
  @classes = Numo::Int32[*y.to_a.uniq.sort]
108
-
109
- if multiclass_problem?
110
- n_classes = @classes.size
111
- n_features = x.shape[1]
112
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
113
- @bias_term = Numo::DFloat.zeros(n_classes)
114
- if enable_parallel?
115
- # :nocov:
116
- models = parallel_map(n_classes) do |n|
117
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
118
- partial_fit(x, bin_y)
119
- end
120
- # :nocov:
121
- n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
122
- else
123
- n_classes.times do |n|
124
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
125
- @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
126
- end
127
- end
126
+ if @params[:solver] == 'sgd'
127
+ fit_sgd(x, y)
128
128
  else
129
- negative_label = @classes[0]
130
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
131
- @weight_vec, @bias_term = partial_fit(x, bin_y)
129
+ fit_lbfgs(x, y)
132
130
  end
133
131
 
134
132
  self
@@ -182,6 +180,96 @@ module Rumale
182
180
  def multiclass_problem?
183
181
  @classes.size > 2
184
182
  end
183
+
184
+ def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
185
+ if multiclass_problem?
186
+ fnc = proc do |w, x, y, a|
187
+ n_features = x.shape[1]
188
+ n_classes = y.shape[1]
189
+ z = x.dot(w.reshape(n_classes, n_features).transpose)
190
+ # logsumexp and softmax
191
+ z_max = z.max(-1).expand_dims(-1).dup
192
+ z_max[~z_max.isfinite] = 0.0
193
+ lgsexp = Numo::NMath.log(Numo::NMath.exp(z - z_max).sum(-1)).expand_dims(-1) + z_max
194
+ t = z - lgsexp
195
+ sftmax = Numo::NMath.exp(t)
196
+ # loss and gradient
197
+ loss = -(y * t).sum + 0.5 * a * w.dot(w)
198
+ grad = (sftmax - y).transpose.dot(x).flatten.dup + a * w
199
+ [loss, grad]
200
+ end
201
+
202
+ base_x = expand_feature(base_x) if fit_bias?
203
+ encoder = Rumale::Preprocessing::LabelBinarizer.new
204
+ onehot_y = encoder.fit_transform(base_y)
205
+ n_classes = @classes.size
206
+ n_features = base_x.shape[1]
207
+ w_init = Numo::DFloat.zeros(n_classes * n_features)
208
+
209
+ verbose = @params[:verbose] ? 1 : -1
210
+ res = Lbfgsb.minimize(
211
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, onehot_y, @params[:reg_param]],
212
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
213
+ )
214
+
215
+ if fit_bias?
216
+ weight = res[:x].reshape(n_classes, n_features)
217
+ @weight_vec = weight[true, 0...-1].dup
218
+ @bias_term = weight[true, -1].dup
219
+ else
220
+ @weight_vec = res[:x].reshape(n_classes, n_features)
221
+ @bias_term = Numo::DFloat.zeros(n_classes)
222
+ end
223
+ else
224
+ fnc = proc do |w, x, y, a|
225
+ z = 1 + Numo::NMath.exp(-y * x.dot(w))
226
+ loss = Numo::NMath.log(z).sum + 0.5 * a * w.dot(w)
227
+ grad = (y / z - y).dot(x) + a * w
228
+ [loss, grad]
229
+ end
230
+
231
+ base_x = expand_feature(base_x) if fit_bias?
232
+ negative_label = @classes[0]
233
+ bin_y = Numo::Int32.cast(base_y.ne(negative_label)) * 2 - 1
234
+ n_features = base_x.shape[1]
235
+ w_init = Numo::DFloat.zeros(n_features)
236
+
237
+ verbose = @params[:verbose] ? 1 : -1
238
+ res = Lbfgsb.minimize(
239
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
240
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
241
+ )
242
+
243
+ @weight_vec, @bias_term = split_weight(res[:x])
244
+ end
245
+ end
246
+
247
+ def fit_sgd(x, y)
248
+ if multiclass_problem?
249
+ n_classes = @classes.size
250
+ n_features = x.shape[1]
251
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
252
+ @bias_term = Numo::DFloat.zeros(n_classes)
253
+ if enable_parallel?
254
+ # :nocov:
255
+ models = parallel_map(n_classes) do |n|
256
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
257
+ partial_fit(x, bin_y)
258
+ end
259
+ # :nocov:
260
+ n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
261
+ else
262
+ n_classes.times do |n|
263
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
264
+ @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
265
+ end
266
+ end
267
+ else
268
+ negative_label = @classes[0]
269
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
270
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
271
+ end
272
+ end
185
273
  end
186
274
  end
187
275
  end