rumale 0.22.2 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.clang-format +149 -0
  3. data/.coveralls.yml +1 -0
  4. data/.github/workflows/build.yml +5 -2
  5. data/.github/workflows/coverage.yml +30 -0
  6. data/.gitignore +1 -0
  7. data/CHANGELOG.md +38 -0
  8. data/Gemfile +3 -2
  9. data/LICENSE.txt +1 -1
  10. data/README.md +45 -8
  11. data/Rakefile +2 -1
  12. data/ext/rumale/extconf.rb +1 -1
  13. data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
  14. data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
  15. data/ext/rumale/tree.c +76 -96
  16. data/ext/rumale/tree.h +2 -0
  17. data/lib/rumale.rb +6 -1
  18. data/lib/rumale/base/base_estimator.rb +5 -3
  19. data/lib/rumale/dataset.rb +7 -3
  20. data/lib/rumale/decomposition/fast_ica.rb +1 -1
  21. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
  22. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
  23. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  24. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  25. data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
  26. data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
  27. data/lib/rumale/ensemble/voting_classifier.rb +126 -0
  28. data/lib/rumale/ensemble/voting_regressor.rb +82 -0
  29. data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
  30. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
  31. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  32. data/lib/rumale/linear_model/elastic_net.rb +1 -1
  33. data/lib/rumale/linear_model/lasso.rb +1 -1
  34. data/lib/rumale/linear_model/linear_regression.rb +66 -35
  35. data/lib/rumale/linear_model/nnls.rb +137 -0
  36. data/lib/rumale/linear_model/ridge.rb +71 -34
  37. data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
  38. data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
  39. data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
  40. data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
  41. data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
  42. data/lib/rumale/tree/base_decision_tree.rb +15 -10
  43. data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
  44. data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
  45. data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
  46. data/lib/rumale/validation.rb +12 -0
  47. data/lib/rumale/version.rb +1 -1
  48. metadata +13 -6
  49. data/.travis.yml +0 -17
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_binarizer'
6
+
7
+ module Rumale
8
+ module KernelMachine
9
+ # KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
10
+ # It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
11
+ #
12
+ # @example
13
+ # require 'numo/linalg/autoloader'
14
+ # require 'rumale'
15
+ #
16
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
+ # kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
18
+ # kridge.fit(kernel_mat_train, traininig_values)
19
+ #
20
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
+ # results = kridge.predict(kernel_mat_test)
22
+ class KernelRidgeClassifier
23
+ include Base::BaseEstimator
24
+ include Base::Classifier
25
+
26
+ # Return the class labels.
27
+ # @return [Numo::Int32] (size: n_classes)
28
+ attr_reader :classes
29
+
30
+ # Return the weight vector.
31
+ # @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
32
+ attr_reader :weight_vec
33
+
34
+ # Create a new regressor with kernel ridge classifier.
35
+ #
36
+ # @param reg_param [Float/Numo::DFloat] The regularization parameter.
37
+ def initialize(reg_param: 1.0)
38
+ @params = {}
39
+ @params[:reg_param] = reg_param
40
+ @classes = nil
41
+ @weight_vec = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
47
+ # The kernel matrix of the training data to be used for fitting the model.
48
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
49
+ # @return [KernelRidgeClassifier] The learned classifier itself.
50
+ def fit(x, y)
51
+ x = check_convert_sample_array(x)
52
+ y = check_convert_label_array(y)
53
+ check_sample_label_size(x, y)
54
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
55
+ raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
56
+
57
+ @encoder = Rumale::Preprocessing::LabelBinarizer.new
58
+ y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
59
+ @classes = Numo::NArray[*@encoder.classes]
60
+
61
+ n_samples = x.shape[0]
62
+ reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
63
+ @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
64
+
65
+ self
66
+ end
67
+
68
+ # Calculate confidence scores for samples.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
71
+ # The kernel matrix between testing samples and training samples to predict values.
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
73
+ def decision_function(x)
74
+ x = check_convert_sample_array(x)
75
+ x.dot(@weight_vec)
76
+ end
77
+
78
+ # Predict class labels for samples.
79
+ #
80
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
81
+ # The kernel matrix between testing samples and training samples to predict the labels.
82
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
83
+ def predict(x)
84
+ x = check_convert_sample_array(x)
85
+ scores = decision_function(x)
86
+ n_samples, n_classes = scores.shape
87
+ label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
88
+ @classes[label_ids].dup
89
+ end
90
+ end
91
+ end
92
+ end
@@ -152,7 +152,7 @@ module Rumale
152
152
 
153
153
  if @classes.size > 2
154
154
  probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
155
- return (probs.transpose / probs.sum(axis: 1)).transpose
155
+ return (probs.transpose / probs.sum(axis: 1)).transpose.dup
156
156
  end
157
157
 
158
158
  n_samples, = x.shape
@@ -81,7 +81,7 @@ module Rumale
81
81
  # Fit the model with given training data.
82
82
  #
83
83
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
84
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
84
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
85
85
  # @return [ElasticNet] The learned regressor itself.
86
86
  def fit(x, y)
87
87
  x = check_convert_sample_array(x)
@@ -77,7 +77,7 @@ module Rumale
77
77
  # Fit the model with given training data.
78
78
  #
79
79
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
80
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
81
81
  # @return [Lasso] The learned regressor itself.
82
82
  def fit(x, y)
83
83
  x = check_convert_sample_array(x)
@@ -1,12 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'lbfgsb'
4
+
3
5
  require 'rumale/linear_model/base_sgd'
4
6
  require 'rumale/base/regressor'
5
7
 
6
8
  module Rumale
7
9
  module LinearModel
8
10
  # LinearRegression is a class that implements ordinary least square linear regression
9
- # with stochastic gradient descent (SGD) optimization or singular value decomposition (SVD).
11
+ # with stochastic gradient descent (SGD) optimization,
12
+ # singular value decomposition (SVD), or L-BFGS optimization.
10
13
  #
11
14
  # @example
12
15
  # estimator =
@@ -41,31 +44,32 @@ module Rumale
41
44
  #
42
45
  # @param learning_rate [Float] The initial value of learning rate.
43
46
  # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
44
- # If solver = 'svd', this parameter is ignored.
47
+ # If solver is not 'sgd', this parameter is ignored.
45
48
  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
46
49
  # If nil is given, the decay sets to 'learning_rate'.
47
- # If solver = 'svd', this parameter is ignored.
50
+ # If solver is not 'sgd', this parameter is ignored.
48
51
  # @param momentum [Float] The momentum factor.
49
- # If solver = 'svd', this parameter is ignored.
52
+ # If solver is not 'sgd', this parameter is ignored.
50
53
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
51
54
  # @param bias_scale [Float] The scale of the bias term.
52
55
  # @param max_iter [Integer] The maximum number of epochs that indicates
53
56
  # how many times the whole data is given to the training process.
54
- # If solver = 'svd', this parameter is ignored.
57
+ # If solver is 'svd', this parameter is ignored.
55
58
  # @param batch_size [Integer] The size of the mini batches.
56
- # If solver = 'svd', this parameter is ignored.
59
+ # If solver is not 'sgd', this parameter is ignored.
57
60
  # @param tol [Float] The tolerance of loss for terminating optimization.
58
- # If solver = 'svd', this parameter is ignored.
59
- # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or 'svd').
60
- # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'sgd' solver.
61
+ # If solver is 'svd', this parameter is ignored.
62
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd' or 'lbfgs').
63
+ # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
61
64
  # 'sgd' uses the stochastic gradient descent optimization.
62
65
  # 'svd' performs singular value decomposition of samples.
66
+ # 'lbfgs' uses the L-BFGS method for optimization.
63
67
  # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
64
68
  # If nil is given, the method does not execute in parallel.
65
69
  # If zero or less is given, it becomes equal to the number of processors.
66
- # This parameter is ignored if the Parallel gem is not loaded.
70
+ # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
67
71
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
68
- # If solver = 'svd', this parameter is ignored.
72
+ # If solver is 'svd', this parameter is ignored.
69
73
  # @param random_seed [Integer] The seed value using to initialize the random generator.
70
74
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
71
75
  fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
@@ -80,9 +84,9 @@ module Rumale
80
84
  super()
81
85
  @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
82
86
  @params[:solver] = if solver == 'auto'
83
- load_linalg? ? 'svd' : 'sgd'
87
+ enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
84
88
  else
85
- solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
89
+ solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
86
90
  end
87
91
  @params[:decay] ||= @params[:learning_rate]
88
92
  @params[:random_seed] ||= srand
@@ -95,15 +99,17 @@ module Rumale
95
99
  # Fit the model with given training data.
96
100
  #
97
101
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
98
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
102
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
99
103
  # @return [LinearRegression] The learned regressor itself.
100
104
  def fit(x, y)
101
105
  x = check_convert_sample_array(x)
102
106
  y = check_convert_tvalue_array(y)
103
107
  check_sample_tvalue_size(x, y)
104
108
 
105
- if @params[:solver] == 'svd' && enable_linalg?
109
+ if @params[:solver] == 'svd' && enable_linalg?(warning: false)
106
110
  fit_svd(x, y)
111
+ elsif @params[:solver] == 'lbfgs'
112
+ fit_lbfgs(x, y)
107
113
  else
108
114
  fit_sgd(x, y)
109
115
  end
@@ -124,24 +130,46 @@ module Rumale
124
130
 
125
131
  def fit_svd(x, y)
126
132
  x = expand_feature(x) if fit_bias?
127
-
128
133
  w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
134
+ @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
135
+ end
129
136
 
130
- is_single_target_vals = y.shape[1].nil?
131
- if @params[:fit_bias]
132
- @weight_vec = is_single_target_vals ? w[0...-1].dup : w[0...-1, true].dup
133
- @bias_term = is_single_target_vals ? w[-1] : w[-1, true].dup
134
- else
135
- @weight_vec = w.dup
136
- @bias_term = is_single_target_vals ? 0 : Numo::DFloat.zeros(y.shape[1])
137
+ def fit_lbfgs(x, y)
138
+ fnc = proc do |w, x, y| # rubocop:disable Lint/ShadowingOuterLocalVariable
139
+ n_samples, n_features = x.shape
140
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
141
+ z = x.dot(w.transpose)
142
+ d = z - y
143
+ loss = (d**2).sum.fdiv(n_samples)
144
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x)
145
+ [loss, gradient.flatten.dup]
137
146
  end
138
- end
139
147
 
140
- def fit_sgd(x, y)
141
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
148
+ x = expand_feature(x) if fit_bias?
149
+
142
150
  n_features = x.shape[1]
151
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
152
+
153
+ res = Lbfgsb.minimize(
154
+ fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y],
155
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
156
+ verbose: @params[:verbose] ? 1 : -1
157
+ )
143
158
 
144
- if n_outputs > 1
159
+ @weight_vec, @bias_term =
160
+ if single_target?(y)
161
+ split_weight(res[:x])
162
+ else
163
+ split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
164
+ end
165
+ end
166
+
167
+ def fit_sgd(x, y)
168
+ if single_target?(y)
169
+ @weight_vec, @bias_term = partial_fit(x, y)
170
+ else
171
+ n_outputs = y.shape[1]
172
+ n_features = x.shape[1]
145
173
  @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
146
174
  @bias_term = Numo::DFloat.zeros(n_outputs)
147
175
  if enable_parallel?
@@ -150,20 +178,23 @@ module Rumale
150
178
  else
151
179
  n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
152
180
  end
153
- else
154
- @weight_vec, @bias_term = partial_fit(x, y)
155
181
  end
156
182
  end
157
183
 
158
- def fit_bias?
159
- @params[:fit_bias] == true
184
+ def single_target?(y)
185
+ y.ndim == 1
160
186
  end
161
187
 
162
- def load_linalg?
163
- return false if defined?(Numo::Linalg).nil?
164
- return false if Numo::Linalg::VERSION < '0.1.4'
188
+ def init_weight(n_features, n_outputs)
189
+ Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
190
+ end
165
191
 
166
- true
192
+ def split_weight_mult(w)
193
+ if fit_bias?
194
+ [w[0...-1, true].dup, w[-1, true].dup]
195
+ else
196
+ [w.dup, Numo::DFloat.zeros(w.shape[1])]
197
+ end
167
198
  end
168
199
  end
169
200
  end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'lbfgsb'
4
+
5
+ require 'rumale/base/base_estimator'
6
+ require 'rumale/base/regressor'
7
+
8
+ module Rumale
9
+ module LinearModel
10
+ # NNLS is a class that implements non-negative least squares regression.
11
+ # NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
12
+ #
13
+ # @example
14
+ # estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
15
+ # estimator.fit(training_samples, traininig_values)
16
+ # results = estimator.predict(testing_samples)
17
+ #
18
+ class NNLS
19
+ include Base::BaseEstimator
20
+ include Base::Regressor
21
+
22
+ # Return the weight vector.
23
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
24
+ attr_reader :weight_vec
25
+
26
+ # Return the bias term (a.k.a. intercept).
27
+ # @return [Numo::DFloat] (shape: [n_outputs])
28
+ attr_reader :bias_term
29
+
30
+ # Returns the number of iterations when converged.
31
+ # @return [Integer]
32
+ attr_reader :n_iter
33
+
34
+ # Return the random generator for initializing weight.
35
+ # @return [Random]
36
+ attr_reader :rng
37
+
38
+ # Create a new regressor with non-negative least squares method.
39
+ #
40
+ # @param reg_param [Float] The regularization parameter for L2 regularization term.
41
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
42
+ # @param bias_scale [Float] The scale of the bias term.
43
+ # @param max_iter [Integer] The maximum number of epochs that indicates
44
+ # how many times the whole data is given to the training process.
45
+ # @param tol [Float] The tolerance of loss for terminating optimization.
46
+ # If solver = 'svd', this parameter is ignored.
47
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
48
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
49
+ def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
50
+ max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
51
+ check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, tol: tol)
52
+ check_params_boolean(fit_bias: fit_bias, verbose: verbose)
53
+ check_params_numeric_or_nil(random_seed: random_seed)
54
+ check_params_positive(reg_param: reg_param, max_iter: max_iter)
55
+ @params = method(:initialize).parameters.each_with_object({}) { |(_, prm), obj| obj[prm] = binding.local_variable_get(prm) }
56
+ @params[:random_seed] ||= srand
57
+ @n_iter = nil
58
+ @weight_vec = nil
59
+ @bias_term = nil
60
+ @rng = Random.new(@params[:random_seed])
61
+ end
62
+
63
+ # Fit the model with given training data.
64
+ #
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
66
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
67
+ # @return [NonneagtiveLeastSquare] The learned regressor itself.
68
+ def fit(x, y)
69
+ x = check_convert_sample_array(x)
70
+ y = check_convert_tvalue_array(y)
71
+ check_sample_tvalue_size(x, y)
72
+
73
+ x = expand_feature(x) if fit_bias?
74
+
75
+ n_features = x.shape[1]
76
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
77
+
78
+ w_init = Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
79
+ w_init[w_init.lt(0)] = 0
80
+ bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
81
+ bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
82
+
83
+ res = Lbfgsb.minimize(
84
+ fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
85
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
86
+ )
87
+
88
+ @n_iter = res[:n_iter]
89
+ w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
90
+
91
+ if fit_bias?
92
+ @weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
93
+ @bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
94
+ else
95
+ @weight_vec = w.dup
96
+ @bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
97
+ end
98
+
99
+ self
100
+ end
101
+
102
+ # Predict values for samples.
103
+ #
104
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
105
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
106
+ def predict(x)
107
+ x = check_convert_sample_array(x)
108
+ x.dot(@weight_vec.transpose) + @bias_term
109
+ end
110
+
111
+ private
112
+
113
+ def nnls_fnc(w, x, y, alpha)
114
+ n_samples, n_features = x.shape
115
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
116
+ z = x.dot(w.transpose)
117
+ d = z - y
118
+ loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
119
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
120
+ [loss, gradient.flatten.dup]
121
+ end
122
+
123
+ def expand_feature(x)
124
+ n_samples = x.shape[0]
125
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
126
+ end
127
+
128
+ def fit_bias?
129
+ @params[:fit_bias] == true
130
+ end
131
+
132
+ def single_target?(y)
133
+ y.ndim == 1
134
+ end
135
+ end
136
+ end
137
+ end
@@ -1,12 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'lbfgsb'
4
+
3
5
  require 'rumale/linear_model/base_sgd'
4
6
  require 'rumale/base/regressor'
5
7
 
6
8
  module Rumale
7
9
  module LinearModel
8
10
  # Ridge is a class that implements Ridge Regression
9
- # with stochastic gradient descent (SGD) optimization or singular value decomposition (SVD).
11
+ # with stochastic gradient descent (SGD) optimization,
12
+ # singular value decomposition (SVD), or L-BFGS optimization.
10
13
  #
11
14
  # @example
12
15
  # estimator =
@@ -41,32 +44,33 @@ module Rumale
41
44
  #
42
45
  # @param learning_rate [Float] The initial value of learning rate.
43
46
  # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
44
- # If solver = 'svd', this parameter is ignored.
47
+ # If solver is not 'sgd', this parameter is ignored.
45
48
  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
46
49
  # If nil is given, the decay sets to 'reg_param * learning_rate'.
47
- # If solver = 'svd', this parameter is ignored.
50
+ # If solver is not 'sgd', this parameter is ignored.
48
51
  # @param momentum [Float] The momentum factor.
49
- # If solver = 'svd', this parameter is ignored.
52
+ # If solver is not 'sgd', this parameter is ignored.
50
53
  # @param reg_param [Float] The regularization parameter.
51
54
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
52
55
  # @param bias_scale [Float] The scale of the bias term.
53
56
  # @param max_iter [Integer] The maximum number of epochs that indicates
54
57
  # how many times the whole data is given to the training process.
55
- # If solver = 'svd', this parameter is ignored.
58
+ # If solver is 'svd', this parameter is ignored.
56
59
  # @param batch_size [Integer] The size of the mini batches.
57
- # If solver = 'svd', this parameter is ignored.
60
+ # If solver is not 'sgd', this parameter is ignored.
58
61
  # @param tol [Float] The tolerance of loss for terminating optimization.
59
- # If solver = 'svd', this parameter is ignored.
60
- # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or 'svd').
61
- # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'sgd' solver.
62
+ # If solver is 'svd', this parameter is ignored.
63
+ # @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd', or 'lbfgs').
64
+ # 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
62
65
  # 'sgd' uses the stochastic gradient descent optimization.
63
66
  # 'svd' performs singular value decomposition of samples.
67
+ # 'lbfgs' uses the L-BFGS method for optimization.
64
68
  # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
65
69
  # If nil is given, the method does not execute in parallel.
66
70
  # If zero or less is given, it becomes equal to the number of processors.
67
- # This parameter is ignored if the Parallel gem is not loaded or the solver is 'svd'.
71
+ # This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
68
72
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
69
- # If solver = 'svd', this parameter is ignored.
73
+ # If solver is 'svd', this parameter is ignored.
70
74
  # @param random_seed [Integer] The seed value using to initialize the random generator.
71
75
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
72
76
  reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
@@ -83,9 +87,9 @@ module Rumale
83
87
  super()
84
88
  @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
85
89
  @params[:solver] = if solver == 'auto'
86
- load_linalg? ? 'svd' : 'sgd'
90
+ enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
87
91
  else
88
- solver != 'svd' ? 'sgd' : 'svd' # rubocop:disable Style/NegatedIfElseCondition
92
+ solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
89
93
  end
90
94
  @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
91
95
  @params[:random_seed] ||= srand
@@ -99,15 +103,17 @@ module Rumale
99
103
  # Fit the model with given training data.
100
104
  #
101
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
102
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
106
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
103
107
  # @return [Ridge] The learned regressor itself.
104
108
  def fit(x, y)
105
109
  x = check_convert_sample_array(x)
106
110
  y = check_convert_tvalue_array(y)
107
111
  check_sample_tvalue_size(x, y)
108
112
 
109
- if @params[:solver] == 'svd' && enable_linalg?
113
+ if @params[:solver] == 'svd' && enable_linalg?(warning: false)
110
114
  fit_svd(x, y)
115
+ elsif @params[:solver] == 'lbfgs'
116
+ fit_lbfgs(x, y)
111
117
  else
112
118
  fit_sgd(x, y)
113
119
  end
@@ -127,27 +133,51 @@ module Rumale
127
133
  private
128
134
 
129
135
  def fit_svd(x, y)
130
- samples = @params[:fit_bias] ? expand_feature(x) : x
136
+ x = expand_feature(x) if fit_bias?
131
137
 
132
- s, u, vt = Numo::Linalg.svd(samples, driver: 'sdd', job: 'S')
138
+ s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
133
139
  d = (s / (s**2 + @params[:reg_param])).diag
134
140
  w = vt.transpose.dot(d).dot(u.transpose).dot(y)
135
141
 
136
- is_single_target_vals = y.shape[1].nil?
137
- if @params[:fit_bias]
138
- @weight_vec = is_single_target_vals ? w[0...-1].dup : w[0...-1, true].dup
139
- @bias_term = is_single_target_vals ? w[-1] : w[-1, true].dup
140
- else
141
- @weight_vec = w.dup
142
- @bias_term = is_single_target_vals ? 0 : Numo::DFloat.zeros(y.shape[1])
143
- end
142
+ @weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
144
143
  end
145
144
 
146
- def fit_sgd(x, y)
147
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
145
+ def fit_lbfgs(x, y)
146
+ fnc = proc do |w, x, y, a| # rubocop:disable Lint/ShadowingOuterLocalVariable
147
+ n_samples, n_features = x.shape
148
+ w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
149
+ z = x.dot(w.transpose)
150
+ d = z - y
151
+ loss = (d**2).sum.fdiv(n_samples) + a * (w * w).sum
152
+ gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * a * w
153
+ [loss, gradient.flatten.dup]
154
+ end
155
+
156
+ x = expand_feature(x) if fit_bias?
157
+
148
158
  n_features = x.shape[1]
159
+ n_outputs = single_target?(y) ? 1 : y.shape[1]
160
+
161
+ res = Lbfgsb.minimize(
162
+ fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y, @params[:reg_param]],
163
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
164
+ verbose: @params[:verbose] ? 1 : -1
165
+ )
166
+
167
+ @weight_vec, @bias_term =
168
+ if single_target?(y)
169
+ split_weight(res[:x])
170
+ else
171
+ split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
172
+ end
173
+ end
149
174
 
150
- if n_outputs > 1
175
+ def fit_sgd(x, y)
176
+ if single_target?(y)
177
+ @weight_vec, @bias_term = partial_fit(x, y)
178
+ else
179
+ n_outputs = y.shape[1]
180
+ n_features = x.shape[1]
151
181
  @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
152
182
  @bias_term = Numo::DFloat.zeros(n_outputs)
153
183
  if enable_parallel?
@@ -156,16 +186,23 @@ module Rumale
156
186
  else
157
187
  n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
158
188
  end
159
- else
160
- @weight_vec, @bias_term = partial_fit(x, y)
161
189
  end
162
190
  end
163
191
 
164
- def load_linalg?
165
- return false if defined?(Numo::Linalg).nil?
166
- return false if Numo::Linalg::VERSION < '0.1.4'
192
+ def single_target?(y)
193
+ y.ndim == 1
194
+ end
195
+
196
+ def init_weight(n_features, n_outputs)
197
+ Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
198
+ end
167
199
 
168
- true
200
+ def split_weight_mult(w)
201
+ if fit_bias?
202
+ [w[0...-1, true].dup, w[-1, true].dup]
203
+ else
204
+ [w.dup, Numo::DFloat.zeros(w.shape[1])]
205
+ end
169
206
  end
170
207
  end
171
208
  end