rumale 0.22.2 → 0.23.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +5 -2
- data/.github/workflows/coverage.yml +30 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +45 -8
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +76 -96
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +6 -1
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
- data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
- data/lib/rumale/ensemble/voting_classifier.rb +126 -0
- data/lib/rumale/ensemble/voting_regressor.rb +82 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +1 -1
- data/lib/rumale/linear_model/lasso.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +66 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +71 -34
- data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/validation.rb +12 -0
- data/lib/rumale/version.rb +1 -1
- metadata +13 -6
- data/.travis.yml +0 -17
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_binarizer'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module KernelMachine
|
9
|
+
# KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
|
10
|
+
# It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale'
|
15
|
+
#
|
16
|
+
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
17
|
+
# kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
|
18
|
+
# kridge.fit(kernel_mat_train, traininig_values)
|
19
|
+
#
|
20
|
+
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
21
|
+
# results = kridge.predict(kernel_mat_test)
|
22
|
+
class KernelRidgeClassifier
|
23
|
+
include Base::BaseEstimator
|
24
|
+
include Base::Classifier
|
25
|
+
|
26
|
+
# Return the class labels.
|
27
|
+
# @return [Numo::Int32] (size: n_classes)
|
28
|
+
attr_reader :classes
|
29
|
+
|
30
|
+
# Return the weight vector.
|
31
|
+
# @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
|
32
|
+
attr_reader :weight_vec
|
33
|
+
|
34
|
+
# Create a new regressor with kernel ridge classifier.
|
35
|
+
#
|
36
|
+
# @param reg_param [Float/Numo::DFloat] The regularization parameter.
|
37
|
+
def initialize(reg_param: 1.0)
|
38
|
+
@params = {}
|
39
|
+
@params[:reg_param] = reg_param
|
40
|
+
@classes = nil
|
41
|
+
@weight_vec = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# Fit the model with given training data.
|
45
|
+
#
|
46
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
47
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
48
|
+
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
49
|
+
# @return [KernelRidgeClassifier] The learned classifier itself.
|
50
|
+
def fit(x, y)
|
51
|
+
x = check_convert_sample_array(x)
|
52
|
+
y = check_convert_label_array(y)
|
53
|
+
check_sample_label_size(x, y)
|
54
|
+
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
55
|
+
raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
56
|
+
|
57
|
+
@encoder = Rumale::Preprocessing::LabelBinarizer.new
|
58
|
+
y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
|
59
|
+
@classes = Numo::NArray[*@encoder.classes]
|
60
|
+
|
61
|
+
n_samples = x.shape[0]
|
62
|
+
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
|
63
|
+
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
|
64
|
+
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Calculate confidence scores for samples.
|
69
|
+
#
|
70
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
71
|
+
# The kernel matrix between testing samples and training samples to predict values.
|
72
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
73
|
+
def decision_function(x)
|
74
|
+
x = check_convert_sample_array(x)
|
75
|
+
x.dot(@weight_vec)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Predict class labels for samples.
|
79
|
+
#
|
80
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
81
|
+
# The kernel matrix between testing samples and training samples to predict the labels.
|
82
|
+
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
83
|
+
def predict(x)
|
84
|
+
x = check_convert_sample_array(x)
|
85
|
+
scores = decision_function(x)
|
86
|
+
n_samples, n_classes = scores.shape
|
87
|
+
label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
|
88
|
+
@classes[label_ids].dup
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -152,7 +152,7 @@ module Rumale
|
|
152
152
|
|
153
153
|
if @classes.size > 2
|
154
154
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
155
|
-
return (probs.transpose / probs.sum(axis: 1)).transpose
|
155
|
+
return (probs.transpose / probs.sum(axis: 1)).transpose.dup
|
156
156
|
end
|
157
157
|
|
158
158
|
n_samples, = x.shape
|
@@ -81,7 +81,7 @@ module Rumale
|
|
81
81
|
# Fit the model with given training data.
|
82
82
|
#
|
83
83
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
84
|
-
# @param y [Numo::
|
84
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
85
85
|
# @return [ElasticNet] The learned regressor itself.
|
86
86
|
def fit(x, y)
|
87
87
|
x = check_convert_sample_array(x)
|
@@ -77,7 +77,7 @@ module Rumale
|
|
77
77
|
# Fit the model with given training data.
|
78
78
|
#
|
79
79
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
80
|
-
# @param y [Numo::
|
80
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
81
81
|
# @return [Lasso] The learned regressor itself.
|
82
82
|
def fit(x, y)
|
83
83
|
x = check_convert_sample_array(x)
|
@@ -1,12 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
3
5
|
require 'rumale/linear_model/base_sgd'
|
4
6
|
require 'rumale/base/regressor'
|
5
7
|
|
6
8
|
module Rumale
|
7
9
|
module LinearModel
|
8
10
|
# LinearRegression is a class that implements ordinary least square linear regression
|
9
|
-
# with stochastic gradient descent (SGD) optimization
|
11
|
+
# with stochastic gradient descent (SGD) optimization,
|
12
|
+
# singular value decomposition (SVD), or L-BFGS optimization.
|
10
13
|
#
|
11
14
|
# @example
|
12
15
|
# estimator =
|
@@ -41,31 +44,32 @@ module Rumale
|
|
41
44
|
#
|
42
45
|
# @param learning_rate [Float] The initial value of learning rate.
|
43
46
|
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
44
|
-
# If solver
|
47
|
+
# If solver is not 'sgd', this parameter is ignored.
|
45
48
|
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
46
49
|
# If nil is given, the decay sets to 'learning_rate'.
|
47
|
-
# If solver
|
50
|
+
# If solver is not 'sgd', this parameter is ignored.
|
48
51
|
# @param momentum [Float] The momentum factor.
|
49
|
-
# If solver
|
52
|
+
# If solver is not 'sgd', this parameter is ignored.
|
50
53
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
51
54
|
# @param bias_scale [Float] The scale of the bias term.
|
52
55
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
53
56
|
# how many times the whole data is given to the training process.
|
54
|
-
# If solver
|
57
|
+
# If solver is 'svd', this parameter is ignored.
|
55
58
|
# @param batch_size [Integer] The size of the mini batches.
|
56
|
-
# If solver
|
59
|
+
# If solver is not 'sgd', this parameter is ignored.
|
57
60
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
58
|
-
# If solver
|
59
|
-
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or '
|
60
|
-
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the '
|
61
|
+
# If solver is 'svd', this parameter is ignored.
|
62
|
+
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd' or 'lbfgs').
|
63
|
+
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
|
61
64
|
# 'sgd' uses the stochastic gradient descent optimization.
|
62
65
|
# 'svd' performs singular value decomposition of samples.
|
66
|
+
# 'lbfgs' uses the L-BFGS method for optimization.
|
63
67
|
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
64
68
|
# If nil is given, the method does not execute in parallel.
|
65
69
|
# If zero or less is given, it becomes equal to the number of processors.
|
66
|
-
# This parameter is ignored if the Parallel gem is not loaded.
|
70
|
+
# This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
|
67
71
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
68
|
-
# If solver
|
72
|
+
# If solver is 'svd', this parameter is ignored.
|
69
73
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
70
74
|
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
71
75
|
fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
|
@@ -80,9 +84,9 @@ module Rumale
|
|
80
84
|
super()
|
81
85
|
@params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
|
82
86
|
@params[:solver] = if solver == 'auto'
|
83
|
-
|
87
|
+
enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
|
84
88
|
else
|
85
|
-
solver
|
89
|
+
solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
|
86
90
|
end
|
87
91
|
@params[:decay] ||= @params[:learning_rate]
|
88
92
|
@params[:random_seed] ||= srand
|
@@ -95,15 +99,17 @@ module Rumale
|
|
95
99
|
# Fit the model with given training data.
|
96
100
|
#
|
97
101
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
98
|
-
# @param y [Numo::
|
102
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
99
103
|
# @return [LinearRegression] The learned regressor itself.
|
100
104
|
def fit(x, y)
|
101
105
|
x = check_convert_sample_array(x)
|
102
106
|
y = check_convert_tvalue_array(y)
|
103
107
|
check_sample_tvalue_size(x, y)
|
104
108
|
|
105
|
-
if @params[:solver] == 'svd' && enable_linalg?
|
109
|
+
if @params[:solver] == 'svd' && enable_linalg?(warning: false)
|
106
110
|
fit_svd(x, y)
|
111
|
+
elsif @params[:solver] == 'lbfgs'
|
112
|
+
fit_lbfgs(x, y)
|
107
113
|
else
|
108
114
|
fit_sgd(x, y)
|
109
115
|
end
|
@@ -124,24 +130,46 @@ module Rumale
|
|
124
130
|
|
125
131
|
def fit_svd(x, y)
|
126
132
|
x = expand_feature(x) if fit_bias?
|
127
|
-
|
128
133
|
w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
|
134
|
+
@weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
|
135
|
+
end
|
129
136
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
+
def fit_lbfgs(x, y)
|
138
|
+
fnc = proc do |w, x, y| # rubocop:disable Lint/ShadowingOuterLocalVariable
|
139
|
+
n_samples, n_features = x.shape
|
140
|
+
w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
|
141
|
+
z = x.dot(w.transpose)
|
142
|
+
d = z - y
|
143
|
+
loss = (d**2).sum.fdiv(n_samples)
|
144
|
+
gradient = 2.fdiv(n_samples) * d.transpose.dot(x)
|
145
|
+
[loss, gradient.flatten.dup]
|
137
146
|
end
|
138
|
-
end
|
139
147
|
|
140
|
-
|
141
|
-
|
148
|
+
x = expand_feature(x) if fit_bias?
|
149
|
+
|
142
150
|
n_features = x.shape[1]
|
151
|
+
n_outputs = single_target?(y) ? 1 : y.shape[1]
|
152
|
+
|
153
|
+
res = Lbfgsb.minimize(
|
154
|
+
fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y],
|
155
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
156
|
+
verbose: @params[:verbose] ? 1 : -1
|
157
|
+
)
|
143
158
|
|
144
|
-
|
159
|
+
@weight_vec, @bias_term =
|
160
|
+
if single_target?(y)
|
161
|
+
split_weight(res[:x])
|
162
|
+
else
|
163
|
+
split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def fit_sgd(x, y)
|
168
|
+
if single_target?(y)
|
169
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
170
|
+
else
|
171
|
+
n_outputs = y.shape[1]
|
172
|
+
n_features = x.shape[1]
|
145
173
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
146
174
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
147
175
|
if enable_parallel?
|
@@ -150,20 +178,23 @@ module Rumale
|
|
150
178
|
else
|
151
179
|
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
152
180
|
end
|
153
|
-
else
|
154
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
155
181
|
end
|
156
182
|
end
|
157
183
|
|
158
|
-
def
|
159
|
-
|
184
|
+
def single_target?(y)
|
185
|
+
y.ndim == 1
|
160
186
|
end
|
161
187
|
|
162
|
-
def
|
163
|
-
|
164
|
-
|
188
|
+
def init_weight(n_features, n_outputs)
|
189
|
+
Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
|
190
|
+
end
|
165
191
|
|
166
|
-
|
192
|
+
def split_weight_mult(w)
|
193
|
+
if fit_bias?
|
194
|
+
[w[0...-1, true].dup, w[-1, true].dup]
|
195
|
+
else
|
196
|
+
[w.dup, Numo::DFloat.zeros(w.shape[1])]
|
197
|
+
end
|
167
198
|
end
|
168
199
|
end
|
169
200
|
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
5
|
+
require 'rumale/base/base_estimator'
|
6
|
+
require 'rumale/base/regressor'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module LinearModel
|
10
|
+
# NNLS is a class that implements non-negative least squares regression.
|
11
|
+
# NNLS solves least squares problem under non-negative constraints on the coefficient using L-BFGS-B method.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01, random_seed: 1)
|
15
|
+
# estimator.fit(training_samples, traininig_values)
|
16
|
+
# results = estimator.predict(testing_samples)
|
17
|
+
#
|
18
|
+
class NNLS
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Base::Regressor
|
21
|
+
|
22
|
+
# Return the weight vector.
|
23
|
+
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
24
|
+
attr_reader :weight_vec
|
25
|
+
|
26
|
+
# Return the bias term (a.k.a. intercept).
|
27
|
+
# @return [Numo::DFloat] (shape: [n_outputs])
|
28
|
+
attr_reader :bias_term
|
29
|
+
|
30
|
+
# Returns the number of iterations when converged.
|
31
|
+
# @return [Integer]
|
32
|
+
attr_reader :n_iter
|
33
|
+
|
34
|
+
# Return the random generator for initializing weight.
|
35
|
+
# @return [Random]
|
36
|
+
attr_reader :rng
|
37
|
+
|
38
|
+
# Create a new regressor with non-negative least squares method.
|
39
|
+
#
|
40
|
+
# @param reg_param [Float] The regularization parameter for L2 regularization term.
|
41
|
+
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
42
|
+
# @param bias_scale [Float] The scale of the bias term.
|
43
|
+
# @param max_iter [Integer] The maximum number of epochs that indicates
|
44
|
+
# how many times the whole data is given to the training process.
|
45
|
+
# @param tol [Float] The tolerance of loss for terminating optimization.
|
46
|
+
# If solver = 'svd', this parameter is ignored.
|
47
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
48
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
|
+
def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
|
50
|
+
max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
|
51
|
+
check_params_numeric(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, tol: tol)
|
52
|
+
check_params_boolean(fit_bias: fit_bias, verbose: verbose)
|
53
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
54
|
+
check_params_positive(reg_param: reg_param, max_iter: max_iter)
|
55
|
+
@params = method(:initialize).parameters.each_with_object({}) { |(_, prm), obj| obj[prm] = binding.local_variable_get(prm) }
|
56
|
+
@params[:random_seed] ||= srand
|
57
|
+
@n_iter = nil
|
58
|
+
@weight_vec = nil
|
59
|
+
@bias_term = nil
|
60
|
+
@rng = Random.new(@params[:random_seed])
|
61
|
+
end
|
62
|
+
|
63
|
+
# Fit the model with given training data.
|
64
|
+
#
|
65
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
66
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
67
|
+
# @return [NonneagtiveLeastSquare] The learned regressor itself.
|
68
|
+
def fit(x, y)
|
69
|
+
x = check_convert_sample_array(x)
|
70
|
+
y = check_convert_tvalue_array(y)
|
71
|
+
check_sample_tvalue_size(x, y)
|
72
|
+
|
73
|
+
x = expand_feature(x) if fit_bias?
|
74
|
+
|
75
|
+
n_features = x.shape[1]
|
76
|
+
n_outputs = single_target?(y) ? 1 : y.shape[1]
|
77
|
+
|
78
|
+
w_init = Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
|
79
|
+
w_init[w_init.lt(0)] = 0
|
80
|
+
bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
|
81
|
+
bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
|
82
|
+
|
83
|
+
res = Lbfgsb.minimize(
|
84
|
+
fnc: method(:nnls_fnc), jcb: true, x_init: w_init, args: [x, y, @params[:reg_param]], bounds: bounds,
|
85
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: @params[:verbose] ? 1 : -1
|
86
|
+
)
|
87
|
+
|
88
|
+
@n_iter = res[:n_iter]
|
89
|
+
w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features).transpose
|
90
|
+
|
91
|
+
if fit_bias?
|
92
|
+
@weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
|
93
|
+
@bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
|
94
|
+
else
|
95
|
+
@weight_vec = w.dup
|
96
|
+
@bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
|
97
|
+
end
|
98
|
+
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# Predict values for samples.
|
103
|
+
#
|
104
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
105
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
106
|
+
def predict(x)
|
107
|
+
x = check_convert_sample_array(x)
|
108
|
+
x.dot(@weight_vec.transpose) + @bias_term
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def nnls_fnc(w, x, y, alpha)
|
114
|
+
n_samples, n_features = x.shape
|
115
|
+
w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
|
116
|
+
z = x.dot(w.transpose)
|
117
|
+
d = z - y
|
118
|
+
loss = (d**2).sum.fdiv(n_samples) + alpha * (w * w).sum
|
119
|
+
gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * alpha * w
|
120
|
+
[loss, gradient.flatten.dup]
|
121
|
+
end
|
122
|
+
|
123
|
+
def expand_feature(x)
|
124
|
+
n_samples = x.shape[0]
|
125
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
126
|
+
end
|
127
|
+
|
128
|
+
def fit_bias?
|
129
|
+
@params[:fit_bias] == true
|
130
|
+
end
|
131
|
+
|
132
|
+
def single_target?(y)
|
133
|
+
y.ndim == 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
3
5
|
require 'rumale/linear_model/base_sgd'
|
4
6
|
require 'rumale/base/regressor'
|
5
7
|
|
6
8
|
module Rumale
|
7
9
|
module LinearModel
|
8
10
|
# Ridge is a class that implements Ridge Regression
|
9
|
-
# with stochastic gradient descent (SGD) optimization
|
11
|
+
# with stochastic gradient descent (SGD) optimization,
|
12
|
+
# singular value decomposition (SVD), or L-BFGS optimization.
|
10
13
|
#
|
11
14
|
# @example
|
12
15
|
# estimator =
|
@@ -41,32 +44,33 @@ module Rumale
|
|
41
44
|
#
|
42
45
|
# @param learning_rate [Float] The initial value of learning rate.
|
43
46
|
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
44
|
-
# If solver
|
47
|
+
# If solver is not 'sgd', this parameter is ignored.
|
45
48
|
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
46
49
|
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
47
|
-
# If solver
|
50
|
+
# If solver is not 'sgd', this parameter is ignored.
|
48
51
|
# @param momentum [Float] The momentum factor.
|
49
|
-
# If solver
|
52
|
+
# If solver is not 'sgd', this parameter is ignored.
|
50
53
|
# @param reg_param [Float] The regularization parameter.
|
51
54
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
52
55
|
# @param bias_scale [Float] The scale of the bias term.
|
53
56
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
54
57
|
# how many times the whole data is given to the training process.
|
55
|
-
# If solver
|
58
|
+
# If solver is 'svd', this parameter is ignored.
|
56
59
|
# @param batch_size [Integer] The size of the mini batches.
|
57
|
-
# If solver
|
60
|
+
# If solver is not 'sgd', this parameter is ignored.
|
58
61
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
59
|
-
# If solver
|
60
|
-
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd' or '
|
61
|
-
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the '
|
62
|
+
# If solver is 'svd', this parameter is ignored.
|
63
|
+
# @param solver [String] The algorithm to calculate weights. ('auto', 'sgd', 'svd', or 'lbfgs').
|
64
|
+
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
|
62
65
|
# 'sgd' uses the stochastic gradient descent optimization.
|
63
66
|
# 'svd' performs singular value decomposition of samples.
|
67
|
+
# 'lbfgs' uses the L-BFGS method for optimization.
|
64
68
|
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
65
69
|
# If nil is given, the method does not execute in parallel.
|
66
70
|
# If zero or less is given, it becomes equal to the number of processors.
|
67
|
-
# This parameter is ignored if the Parallel gem is not loaded or
|
71
|
+
# This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
|
68
72
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
69
|
-
# If solver
|
73
|
+
# If solver is 'svd', this parameter is ignored.
|
70
74
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
71
75
|
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
72
76
|
reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
|
@@ -83,9 +87,9 @@ module Rumale
|
|
83
87
|
super()
|
84
88
|
@params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
|
85
89
|
@params[:solver] = if solver == 'auto'
|
86
|
-
|
90
|
+
enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
|
87
91
|
else
|
88
|
-
solver
|
92
|
+
solver.match?(/^svd$|^sgd$|^lbfgs$/) ? solver : 'lbfgs'
|
89
93
|
end
|
90
94
|
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
91
95
|
@params[:random_seed] ||= srand
|
@@ -99,15 +103,17 @@ module Rumale
|
|
99
103
|
# Fit the model with given training data.
|
100
104
|
#
|
101
105
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
102
|
-
# @param y [Numo::
|
106
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
103
107
|
# @return [Ridge] The learned regressor itself.
|
104
108
|
def fit(x, y)
|
105
109
|
x = check_convert_sample_array(x)
|
106
110
|
y = check_convert_tvalue_array(y)
|
107
111
|
check_sample_tvalue_size(x, y)
|
108
112
|
|
109
|
-
if @params[:solver] == 'svd' && enable_linalg?
|
113
|
+
if @params[:solver] == 'svd' && enable_linalg?(warning: false)
|
110
114
|
fit_svd(x, y)
|
115
|
+
elsif @params[:solver] == 'lbfgs'
|
116
|
+
fit_lbfgs(x, y)
|
111
117
|
else
|
112
118
|
fit_sgd(x, y)
|
113
119
|
end
|
@@ -127,27 +133,51 @@ module Rumale
|
|
127
133
|
private
|
128
134
|
|
129
135
|
def fit_svd(x, y)
|
130
|
-
|
136
|
+
x = expand_feature(x) if fit_bias?
|
131
137
|
|
132
|
-
s, u, vt = Numo::Linalg.svd(
|
138
|
+
s, u, vt = Numo::Linalg.svd(x, driver: 'sdd', job: 'S')
|
133
139
|
d = (s / (s**2 + @params[:reg_param])).diag
|
134
140
|
w = vt.transpose.dot(d).dot(u.transpose).dot(y)
|
135
141
|
|
136
|
-
|
137
|
-
if @params[:fit_bias]
|
138
|
-
@weight_vec = is_single_target_vals ? w[0...-1].dup : w[0...-1, true].dup
|
139
|
-
@bias_term = is_single_target_vals ? w[-1] : w[-1, true].dup
|
140
|
-
else
|
141
|
-
@weight_vec = w.dup
|
142
|
-
@bias_term = is_single_target_vals ? 0 : Numo::DFloat.zeros(y.shape[1])
|
143
|
-
end
|
142
|
+
@weight_vec, @bias_term = single_target?(y) ? split_weight(w) : split_weight_mult(w)
|
144
143
|
end
|
145
144
|
|
146
|
-
def
|
147
|
-
|
145
|
+
def fit_lbfgs(x, y)
|
146
|
+
fnc = proc do |w, x, y, a| # rubocop:disable Lint/ShadowingOuterLocalVariable
|
147
|
+
n_samples, n_features = x.shape
|
148
|
+
w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
|
149
|
+
z = x.dot(w.transpose)
|
150
|
+
d = z - y
|
151
|
+
loss = (d**2).sum.fdiv(n_samples) + a * (w * w).sum
|
152
|
+
gradient = 2.fdiv(n_samples) * d.transpose.dot(x) + 2.0 * a * w
|
153
|
+
[loss, gradient.flatten.dup]
|
154
|
+
end
|
155
|
+
|
156
|
+
x = expand_feature(x) if fit_bias?
|
157
|
+
|
148
158
|
n_features = x.shape[1]
|
159
|
+
n_outputs = single_target?(y) ? 1 : y.shape[1]
|
160
|
+
|
161
|
+
res = Lbfgsb.minimize(
|
162
|
+
fnc: fnc, jcb: true, x_init: init_weight(n_features, n_outputs), args: [x, y, @params[:reg_param]],
|
163
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
164
|
+
verbose: @params[:verbose] ? 1 : -1
|
165
|
+
)
|
166
|
+
|
167
|
+
@weight_vec, @bias_term =
|
168
|
+
if single_target?(y)
|
169
|
+
split_weight(res[:x])
|
170
|
+
else
|
171
|
+
split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
|
172
|
+
end
|
173
|
+
end
|
149
174
|
|
150
|
-
|
175
|
+
def fit_sgd(x, y)
|
176
|
+
if single_target?(y)
|
177
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
178
|
+
else
|
179
|
+
n_outputs = y.shape[1]
|
180
|
+
n_features = x.shape[1]
|
151
181
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
152
182
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
153
183
|
if enable_parallel?
|
@@ -156,16 +186,23 @@ module Rumale
|
|
156
186
|
else
|
157
187
|
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
158
188
|
end
|
159
|
-
else
|
160
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
161
189
|
end
|
162
190
|
end
|
163
191
|
|
164
|
-
def
|
165
|
-
|
166
|
-
|
192
|
+
def single_target?(y)
|
193
|
+
y.ndim == 1
|
194
|
+
end
|
195
|
+
|
196
|
+
def init_weight(n_features, n_outputs)
|
197
|
+
Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
|
198
|
+
end
|
167
199
|
|
168
|
-
|
200
|
+
def split_weight_mult(w)
|
201
|
+
if fit_bias?
|
202
|
+
[w[0...-1, true].dup, w[-1, true].dup]
|
203
|
+
else
|
204
|
+
[w.dup, Numo::DFloat.zeros(w.shape[1])]
|
205
|
+
end
|
169
206
|
end
|
170
207
|
end
|
171
208
|
end
|