rumale 0.22.2 → 0.23.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_binarizer'
6
+
7
+ module Rumale
8
+ module KernelMachine
9
+ # KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
10
+ # It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
11
+ #
12
+ # @example
13
+ # require 'numo/linalg/autoloader'
14
+ # require 'rumale'
15
+ #
16
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
+ # kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
18
+ # kridge.fit(kernel_mat_train, traininig_values)
19
+ #
20
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
+ # results = kridge.predict(kernel_mat_test)
22
+ class KernelRidgeClassifier
23
+ include Base::BaseEstimator
24
+ include Base::Classifier
25
+
26
+ # Return the class labels.
27
+ # @return [Numo::Int32] (size: n_classes)
28
+ attr_reader :classes
29
+
30
+ # Return the weight vector.
31
+ # @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
32
+ attr_reader :weight_vec
33
+
34
+ # Create a new regressor with kernel ridge classifier.
35
+ #
36
+ # @param reg_param [Float/Numo::DFloat] The regularization parameter.
37
+ def initialize(reg_param: 1.0)
38
+ @params = {}
39
+ @params[:reg_param] = reg_param
40
+ @classes = nil
41
+ @weight_vec = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
47
+ # The kernel matrix of the training data to be used for fitting the model.
48
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
49
+ # @return [KernelRidgeClassifier] The learned classifier itself.
50
+ def fit(x, y)
51
+ x = check_convert_sample_array(x)
52
+ y = check_convert_label_array(y)
53
+ check_sample_label_size(x, y)
54
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
55
+ raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
56
+
57
+ @encoder = Rumale::Preprocessing::LabelBinarizer.new
58
+ y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
59
+ @classes = Numo::NArray[*@encoder.classes]
60
+
61
+ n_samples = x.shape[0]
62
+ reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
63
+ @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
64
+
65
+ self
66
+ end
67
+
68
+ # Calculate confidence scores for samples.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
71
+ # The kernel matrix between testing samples and training samples to predict values.
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
73
+ def decision_function(x)
74
+ x = check_convert_sample_array(x)
75
+ x.dot(@weight_vec)
76
+ end
77
+
78
+ # Predict class labels for samples.
79
+ #
80
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
81
+ # The kernel matrix between testing samples and training samples to predict the labels.
82
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
83
+ def predict(x)
84
+ x = check_convert_sample_array(x)
85
+ scores = decision_function(x)
86
+ n_samples, n_classes = scores.shape
87
+ label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
88
+ @classes[label_ids].dup
89
+ end
90
+ end
91
+ end
92
+ end
@@ -152,7 +152,7 @@ module Rumale
152
152
 
153
153
  if @classes.size > 2
154
154
  probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
155
- return (probs.transpose / probs.sum(axis: 1)).transpose
155
+ return (probs.transpose / probs.sum(axis: 1)).transpose.dup
156
156
  end
157
157
 
158
158
  n_samples, = x.shape
@@ -81,7 +81,7 @@ module Rumale
81
81
  # Fit the model with given training data.
82
82
  #
83
83
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
84
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
84
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
85
85
  # @return [ElasticNet] The learned regressor itself.
86
86
  def fit(x, y)
87
87
  x = check_convert_sample_array(x)
@@ -77,7 +77,7 @@ module Rumale
77
77
  # Fit the model with given training data.
78
78
  #
79
79
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
80
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
81
81
  # @return [Lasso] The learned regressor itself.
82
82
  def fit(x, y)
83
83
  x = check_convert_sample_array(x)
@@ -1,12 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'lbfgsb'
4
+
3
5
  require 'rumale/linear_model/base_sgd'
4
6
  require 'rumale/base/regressor'
5
7
 
6
8
  module Rumale
7
9
  module LinearModel
8
10
  # LinearRegression is a class that implements ordinary least square linear regression
9
- # with stochastic gradient descent (SGD) optimization or singular value decomposition (SVD).
11
+ # with stochastic gradient descent (SGD) optimization,
12
+ # singular value decomposition (SVD), or L-BFGS optimization.
10
13
  #
11
14
  # @example
12
15
  # estimator =
@@ -41,31 +44,32 @@ module Rumale
41
44
  #
42
45
  # @param learning_rate [Float] The initial value of learning rate.
43
46
  # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
44
- # If solver = 'svd', this parameter is ignored.
47
+ # If solver is not 'sgd', this parameter is ignored.
45
48
  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
46
49
  # If nil is given, the decay sets to 'learning_rate'.
47
- # If solver = 'svd', this parameter is ignored.
50
+ # If solver is not 'sgd', this parameter is ignored.
48
51
  # @param momentum [Float] The momentum factor.
49
- # If solver = 'svd', this parameter is ignored.
52
+ # If solver is not 'sgd', this parameter is ignored.
50
53
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
51
54
  # @param bias_scale [Float] The scale of the bias term.
52
55
  # @param max_iter [Integer] The maximum number of epochs that indicates
53
56
  # how many times the whole data is given to the training process.
54
- # If solver = 'svd', this parameter is ignored.
57
+ # If solver is 'svd', this parameter is ignored.
55
58
  # @param batch_size [Integer] The size of the mini batches.
56
- # If solver = 'svd', this parameter is ignored.
59
+ # If solver is not 'sgd', this parameter is ignored.
57
60
  # @param tol [Float] The tolerance of loss for terminating optimization.
58
- # If solver = 'svd', this parameter is ignored.
59
- # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or 'svd').
60
- # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'sgd' solver.
61
+ # If solver is 'svd', this parameter is ignored.
62
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd' or 'lbfgs').
63
+ # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
61
64
  # 'sgd' uses the stochastic gradient descent optimization.
62
65
  # 'svd' performs singular value decomposition of samples.
66
+ # 'lbfgs' uses the L-BFGS method for optimization.
63
67
  # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
64
68
  # If nil is given, the method does not execute in parallel.
65
69
  # If zero or less is given, it becomes equal to the number of processors.
66
- # This parameter is ignored if the Parallel gem is not loaded.
70
+ # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
67
71
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
68
- # If solver = 'svd', this parameter is ignored.
72
+ # If solver is 'svd', this parameter is ignored.
69
73
  # @param random_seed [Integer] The seed value using to initialize the random generator.
70
74
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
71
75
  fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
@@ -80,9 +84,9 @@ module Rumale
80
84
  super()
81
85
  @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
82
86
  @params[:solver] = if solver == 'auto'
83
- load_linalg? ? 'svd' : 'sgd'
87
+ enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
84
88
  else
85
- solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
89
+ solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
86
90
  end
87
91
  @params[:decay] ||= @params[:learning_rate]
88
92
  @params[:random_seed] ||= srand
@@ -95,15 +99,17 @@ module Rumale
95
99
  # Fit the model with given training data.
96
100
  #
97
101
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
98
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
102
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
99
103
  # @return [LinearRegression] The learned regressor itself.
100
104
  def fit(x, y)
101
105
  x = check_convert_sample_array(x)
102
106
  y = check_convert_tvalue_array(y)
103
107
  check_sample_tvalue_size(x, y)
104
108
 
105
- if @params[:solver] == 'svd' && enable_linalg?
109
+ if @params[:solver] == 'svd' && enable_linalg?(warning: false)
106
110
  fit_svd(x, y)
111
+ elsif @params[:solver] == 'lbfgs'
112
+ fit_lbfgs(x, y)
107
113
  else
108
114
  fit_sgd(x, y)
109
115
  end
@@ -124,24 +130,46 @@ module Rumale
124
130
 
125
131
  def fit_svd(x, y)
126
132
  x = expand_feature(x) if fit_bias?
127
-
128
133
  w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
134
+ @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
135
+ end
129
136
 
130
- is_single_target_vals = y.shape[1].nil?
131
- if @params[:fit_bias]
132
- @weight_vec = is_single_target_vals ? w[0...-1].dup : w[0...-1, true].dup
133
- @bias_term = is_single_target_vals ? w[-1] : w[-1, true].dup
134
- else
135
- @weight_vec = w.dup
136
- @bias_term = is_single_target_vals ? 0 : Numo::DFloat.zeros(y.shape[1])
137
+ def fit_lbfgs(x, y)
138
+ fnc = proc do |w, x, y| # rubocop:disable Lint/ShadowingOuterLocalVariable
139
+ n_samples, n_features = x.shape
140
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
141
+ z = x.dot(w.transpose)
142
+ d = z - y
143
+ loss = (d**2).sum.fdiv(n_samples)
144
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x)
145
+ [loss, gradient.flatten.dup]
137
146
  end
138
- end
139
147
 
140
- def fit_sgd(x, y)
141
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
148
+ x = expand_feature(x) if fit_bias?
149
+
142
150
  n_features = x.shape[1]
151
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
152
+
153
+ res = Lbfgsb.minimize(
154
+ fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y],
155
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
156
+ verbose: @params[:verbose] ? 1 : -1
157
+ )
143
158
 
144
- if n_outputs > 1
159
+ @weight_vec, @bias_term =
160
+ if single_target?(y)
161
+ split_weight(res[:x])
162
+ else
163
+ split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
164
+ end
165
+ end
166
+
167
+ def fit_sgd(x, y)
168
+ if single_target?(y)
169
+ @weight_vec, @bias_term = partial_fit(x, y)
170
+ else
171
+ n_outputs = y.shape[1]
172
+ n_features = x.shape[1]
145
173
  @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
146
174
  @bias_term = Numo::DFloat.zeros(n_outputs)
147
175
  if enable_parallel?
@@ -150,20 +178,23 @@ module Rumale
150
178
  else
151
179
  n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
152
180
  end
153
- else
154
- @weight_vec, @bias_term = partial_fit(x, y)
155
181
  end
156
182
  end
157
183
 
158
- def fit_bias?
159
- @params[:fit_bias] == true
184
+ def single_target?(y)
185
+ y.ndim == 1
160
186
  end
161
187
 
162
- def load_linalg?
163
- return false if defined?(Numo::Linalg).nil?
164
- return false if Numo::Linalg::VERSION < '0.1.4'
188
+ def init_weight(n_features, n_outputs)
189
+ Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
190
+ end
165
191
 
166
- true
192
+ def split_weight_mult(w)
193
+ if fit_bias?
194
+ [w[0...-1, true].dup, w[-1, true].dup]
195
+ else
196
+ [w.dup, Numo::DFloat.zeros(w.shape[1])]
197
+ end
167
198
  end
168
199
  end
169
200
  end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/base_estimator'
6
+ require 'rumale/base/regressor'
7
+
8
+ module Rumale
9
+ module LinearModel
10
+ # NNLS is a class that implements non-negative least squares regression.
11
+ # NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
12
+ #
13
+ # @example
14
+ # estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
15
+ # estimator.fit(training_samples, traininig_values)
16
+ # results = estimator.predict(testing_samples)
17
+ #
18
+ class NNLS
19
+ include Base::BaseEstimator
20
+ include Base::Regressor
21
+
22
+ # Return the weight vector.
23
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
24
+ attr_reader :weight_vec
25
+
26
+ # Return the bias term (a.k.a. intercept).
27
+ # @return [Numo::DFloat] (shape: [n_outputs])
28
+ attr_reader :bias_term
29
+
30
+ # Returns the number of iterations when converged.
31
+ # @return [Integer]
32
+ attr_reader :n_iter
33
+
34
+ # Return the random generator for initializing weight.
35
+ # @return [Random]
36
+ attr_reader :rng
37
+
38
+ # Create a new regressor with non-negative least squares method.
39
+ #
40
+ # @param reg_param [Float] The regularization parameter for L2 regularization term.
41
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
42
+ # @param bias_scale [Float] The scale of the bias term.
43
+ # @param max_iter [Integer] The maximum number of epochs that indicates
44
+ # how many times the whole data is given to the training process.
45
+ # @param tol [Float] The tolerance of loss for terminating optimization.
46
+ # If solver = 'svd', this parameter is ignored.
47
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
48
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
49
+ def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
50
+ max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
51
+ check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, tol: tol)
52
+ check_params_boolean(fit_bias: fit_bias, verbose: verbose)
53
+ check_params_numeric_or_nil(random_seed: random_seed)
54
+ check_params_positive(reg_param: reg_param, max_iter: max_iter)
55
+ @params = method(:initialize).parameters.each_with_object({}) { |(_, prm), obj| obj[prm] = binding.local_variable_get(prm) }
56
+ @params[:random_seed] ||= srand
57
+ @n_iter = nil
58
+ @weight_vec = nil
59
+ @bias_term = nil
60
+ @rng = Random.new(@params[:random_seed])
61
+ end
62
+
63
+ # Fit the model with given training data.
64
+ #
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
66
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
67
+ # @return [NonneagtiveLeastSquare] The learned regressor itself.
68
+ def fit(x, y)
69
+ x = check_convert_sample_array(x)
70
+ y = check_convert_tvalue_array(y)
71
+ check_sample_tvalue_size(x, y)
72
+
73
+ x = expand_feature(x) if fit_bias?
74
+
75
+ n_features = x.shape[1]
76
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
77
+
78
+ w_init = Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
79
+ w_init[w_init.lt(0)] = 0
80
+ bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
81
+ bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
82
+
83
+ res = Lbfgsb.minimize(
84
+ fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
85
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
86
+ )
87
+
88
+ @n_iter = res[:n_iter]
89
+ w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
90
+
91
+ if fit_bias?
92
+ @weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
93
+ @bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
94
+ else
95
+ @weight_vec = w.dup
96
+ @bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
97
+ end
98
+
99
+ self
100
+ end
101
+
102
+ # Predict values for samples.
103
+ #
104
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
105
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
106
+ def predict(x)
107
+ x = check_convert_sample_array(x)
108
+ x.dot(@weight_vec.transpose) + @bias_term
109
+ end
110
+
111
+ private
112
+
113
+ def nnls_fnc(w, x, y, alpha)
114
+ n_samples, n_features = x.shape
115
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
116
+ z = x.dot(w.transpose)
117
+ d = z - y
118
+ loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
119
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
120
+ [loss, gradient.flatten.dup]
121
+ end
122
+
123
+ def expand_feature(x)
124
+ n_samples = x.shape[0]
125
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
126
+ end
127
+
128
+ def fit_bias?
129
+ @params[:fit_bias] == true
130
+ end
131
+
132
+ def single_target?(y)
133
+ y.ndim == 1
134
+ end
135
+ end
136
+ end
137
+ end
@@ -1,12 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'lbfgsb'
4
+
3
5
  require 'rumale/linear_model/base_sgd'
4
6
  require 'rumale/base/regressor'
5
7
 
6
8
  module Rumale
7
9
  module LinearModel
8
10
  # Ridge is a class that implements Ridge Regression
9
- # with stochastic gradient descent (SGD) optimization or singular value decomposition (SVD).
11
+ # with stochastic gradient descent (SGD) optimization,
12
+ # singular value decomposition (SVD), or L-BFGS optimization.
10
13
  #
11
14
  # @example
12
15
  # estimator =
@@ -41,32 +44,33 @@ module Rumale
41
44
  #
42
45
  # @param learning_rate [Float] The initial value of learning rate.
43
46
  # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
44
- # If solver = 'svd', this parameter is ignored.
47
+ # If solver is not 'sgd', this parameter is ignored.
45
48
  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
46
49
  # If nil is given, the decay sets to 'reg_param * learning_rate'.
47
- # If solver = 'svd', this parameter is ignored.
50
+ # If solver is not 'sgd', this parameter is ignored.
48
51
  # @param momentum [Float] The momentum factor.
49
- # If solver = 'svd', this parameter is ignored.
52
+ # If solver is not 'sgd', this parameter is ignored.
50
53
  # @param reg_param [Float] The regularization parameter.
51
54
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
52
55
  # @param bias_scale [Float] The scale of the bias term.
53
56
  # @param max_iter [Integer] The maximum number of epochs that indicates
54
57
  # how many times the whole data is given to the training process.
55
- # If solver = 'svd', this parameter is ignored.
58
+ # If solver is 'svd', this parameter is ignored.
56
59
  # @param batch_size [Integer] The size of the mini batches.
57
- # If solver = 'svd', this parameter is ignored.
60
+ # If solver is not 'sgd', this parameter is ignored.
58
61
  # @param tol [Float] The tolerance of loss for terminating optimization.
59
- # If solver = 'svd', this parameter is ignored.
60
- # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or 'svd').
61
- # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'sgd' solver.
62
+ # If solver is 'svd', this parameter is ignored.
63
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd', or 'lbfgs').
64
+ # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
62
65
  # 'sgd' uses the stochastic gradient descent optimization.
63
66
  # 'svd' performs singular value decomposition of samples.
67
+ # 'lbfgs' uses the L-BFGS method for optimization.
64
68
  # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
65
69
  # If nil is given, the method does not execute in parallel.
66
70
  # If zero or less is given, it becomes equal to the number of processors.
67
- # This parameter is ignored if the Parallel gem is not loaded or the solver is 'svd'.
71
+ # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
68
72
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
69
- # If solver = 'svd', this parameter is ignored.
73
+ # If solver is 'svd', this parameter is ignored.
70
74
  # @param random_seed [Integer] The seed value using to initialize the random generator.
71
75
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
72
76
  reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
@@ -83,9 +87,9 @@ module Rumale
83
87
  super()
84
88
  @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
85
89
  @params[:solver] = if solver == 'auto'
86
- load_linalg? ? 'svd' : 'sgd'
90
+ enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
87
91
  else
88
- solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
92
+ solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
89
93
  end
90
94
  @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
91
95
  @params[:random_seed] ||= srand
@@ -99,15 +103,17 @@ module Rumale
99
103
  # Fit the model with given training data.
100
104
  #
101
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
102
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
106
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
103
107
  # @return [Ridge] The learned regressor itself.
104
108
  def fit(x, y)
105
109
  x = check_convert_sample_array(x)
106
110
  y = check_convert_tvalue_array(y)
107
111
  check_sample_tvalue_size(x, y)
108
112
 
109
- if @params[:solver] == 'svd' && enable_linalg?
113
+ if @params[:solver] == 'svd' && enable_linalg?(warning: false)
110
114
  fit_svd(x, y)
115
+ elsif @params[:solver] == 'lbfgs'
116
+ fit_lbfgs(x, y)
111
117
  else
112
118
  fit_sgd(x, y)
113
119
  end
@@ -127,27 +133,51 @@ module Rumale
127
133
  private
128
134
 
129
135
  def fit_svd(x, y)
130
- samples = @params[:fit_bias] ? expand_feature(x) : x
136
+ x = expand_feature(x) if fit_bias?
131
137
 
132
- s, u, vt = Numo::Linalg.svd(samples, driver: 'sdd', job: 'S')
138
+ s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
133
139
  d = (s / (s**2 + @params[:reg_param])).diag
134
140
  w = vt.transpose.dot(d).dot(u.transpose).dot(y)
135
141
 
136
- is_single_target_vals = y.shape[1].nil?
137
- if @params[:fit_bias]
138
- @weight_vec = is_single_target_vals ? w[0...-1].dup : w[0...-1, true].dup
139
- @bias_term = is_single_target_vals ? w[-1] : w[-1, true].dup
140
- else
141
- @weight_vec = w.dup
142
- @bias_term = is_single_target_vals ? 0 : Numo::DFloat.zeros(y.shape[1])
143
- end
142
+ @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
144
143
  end
145
144
 
146
- def fit_sgd(x, y)
147
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
145
+ def fit_lbfgs(x, y)
146
+ fnc = proc do |w, x, y, a| # rubocop:disable Lint/ShadowingOuterLocalVariable
147
+ n_samples, n_features = x.shape
148
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
149
+ z = x.dot(w.transpose)
150
+ d = z - y
151
+ loss = (d**2).sum.fdiv(n_samples) + a * (w * w).sum
152
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * a * w
153
+ [loss, gradient.flatten.dup]
154
+ end
155
+
156
+ x = expand_feature(x) if fit_bias?
157
+
148
158
  n_features = x.shape[1]
159
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
160
+
161
+ res = Lbfgsb.minimize(
162
+ fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y, @params[:reg_param]],
163
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
164
+ verbose: @params[:verbose] ? 1 : -1
165
+ )
166
+
167
+ @weight_vec, @bias_term =
168
+ if single_target?(y)
169
+ split_weight(res[:x])
170
+ else
171
+ split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
172
+ end
173
+ end
149
174
 
150
- if n_outputs > 1
175
+ def fit_sgd(x, y)
176
+ if single_target?(y)
177
+ @weight_vec, @bias_term = partial_fit(x, y)
178
+ else
179
+ n_outputs = y.shape[1]
180
+ n_features = x.shape[1]
151
181
  @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
152
182
  @bias_term = Numo::DFloat.zeros(n_outputs)
153
183
  if enable_parallel?
@@ -156,16 +186,23 @@ module Rumale
156
186
  else
157
187
  n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
158
188
  end
159
- else
160
- @weight_vec, @bias_term = partial_fit(x, y)
161
189
  end
162
190
  end
163
191
 
164
- def load_linalg?
165
- return false if defined?(Numo::Linalg).nil?
166
- return false if Numo::Linalg::VERSION < '0.1.4'
192
+ def single_target?(y)
193
+ y.ndim == 1
194
+ end
195
+
196
+ def init_weight(n_features, n_outputs)
197
+ Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
198
+ end
167
199
 
168
- true
200
+ def split_weight_mult(w)
201
+ if fit_bias?
202
+ [w[0...-1, true].dup, w[-1, true].dup]
203
+ else
204
+ [w.dup, Numo::DFloat.zeros(w.shape[1])]
205
+ end
169
206
  end
170
207
  end
171
208
  end