rumale-linear_model 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/lib/rumale/linear_model/base_estimator.rb +46 -0
- data/lib/rumale/linear_model/elastic_net.rb +112 -61
- data/lib/rumale/linear_model/lasso.rb +107 -61
- data/lib/rumale/linear_model/linear_regression.rb +39 -104
- data/lib/rumale/linear_model/logistic_regression.rb +36 -119
- data/lib/rumale/linear_model/nnls.rb +14 -46
- data/lib/rumale/linear_model/ridge.rb +39 -108
- data/lib/rumale/linear_model/sgd_classifier.rb +262 -0
- data/lib/rumale/linear_model/{base_sgd.rb → sgd_estimator.rb} +36 -21
- data/lib/rumale/linear_model/sgd_regressor.rb +138 -0
- data/lib/rumale/linear_model/svc.rb +60 -68
- data/lib/rumale/linear_model/svr.rb +54 -52
- data/lib/rumale/linear_model/version.rb +1 -1
- data/lib/rumale/linear_model.rb +2 -1
- metadata +8 -5
@@ -2,21 +2,21 @@
|
|
2
2
|
|
3
3
|
require 'lbfgsb'
|
4
4
|
|
5
|
+
require 'rumale/base/estimator'
|
5
6
|
require 'rumale/base/regressor'
|
6
7
|
require 'rumale/validation'
|
7
|
-
|
8
|
+
|
9
|
+
require_relative 'base_estimator'
|
8
10
|
|
9
11
|
module Rumale
|
10
12
|
module LinearModel
|
11
13
|
# LinearRegression is a class that implements ordinary least square linear regression
|
12
|
-
# with
|
13
|
-
# singular value decomposition (SVD), or L-BFGS optimization.
|
14
|
+
# with singular value decomposition (SVD) or L-BFGS optimization.
|
14
15
|
#
|
15
16
|
# @example
|
16
17
|
# require 'rumale/linear_model/linear_regression'
|
17
18
|
#
|
18
|
-
# estimator =
|
19
|
-
# Rumale::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
|
19
|
+
# estimator = Rumale::LinearModel::LinearRegression.new
|
20
20
|
# estimator.fit(training_samples, traininig_values)
|
21
21
|
# results = estimator.predict(testing_samples)
|
22
22
|
#
|
@@ -27,70 +27,38 @@ module Rumale
|
|
27
27
|
# estimator = Rumale::LinearModel::LinearRegression.new(solver: 'svd')
|
28
28
|
# estimator.fit(training_samples, traininig_values)
|
29
29
|
# results = estimator.predict(testing_samples)
|
30
|
-
|
31
|
-
|
32
|
-
# - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
33
|
-
class LinearRegression < BaseSGD
|
34
|
-
include ::Rumale::Base::Regressor
|
35
|
-
|
36
|
-
# Return the weight vector.
|
37
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
38
|
-
attr_reader :weight_vec
|
39
|
-
|
40
|
-
# Return the bias term (a.k.a. intercept).
|
41
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
42
|
-
attr_reader :bias_term
|
43
|
-
|
44
|
-
# Return the random generator for random sampling.
|
45
|
-
# @return [Random]
|
46
|
-
attr_reader :rng
|
30
|
+
class LinearRegression < Rumale::LinearModel::BaseEstimator
|
31
|
+
include Rumale::Base::Regressor
|
47
32
|
|
48
33
|
# Create a new ordinary least square linear regressor.
|
49
34
|
#
|
50
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
51
|
-
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
52
|
-
# If solver is not 'sgd', this parameter is ignored.
|
53
|
-
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
54
|
-
# If nil is given, the decay sets to 'learning_rate'.
|
55
|
-
# If solver is not 'sgd', this parameter is ignored.
|
56
|
-
# @param momentum [Float] The momentum factor.
|
57
|
-
# If solver is not 'sgd', this parameter is ignored.
|
58
35
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
59
36
|
# @param bias_scale [Float] The scale of the bias term.
|
60
37
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
61
38
|
# how many times the whole data is given to the training process.
|
62
39
|
# If solver is 'svd', this parameter is ignored.
|
63
|
-
# @param batch_size [Integer] The size of the mini batches.
|
64
|
-
# If solver is not 'sgd', this parameter is ignored.
|
65
40
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
66
41
|
# If solver is 'svd', this parameter is ignored.
|
67
|
-
# @param solver [String] The algorithm to calculate weights. ('auto', '
|
42
|
+
# @param solver [String] The algorithm to calculate weights. ('auto', 'svd' or 'lbfgs').
|
68
43
|
# 'auto' chooses the 'svd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'lbfgs' solver.
|
69
|
-
# 'sgd' uses the stochastic gradient descent optimization.
|
70
44
|
# 'svd' performs singular value decomposition of samples.
|
71
45
|
# 'lbfgs' uses the L-BFGS method for optimization.
|
72
|
-
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
73
|
-
# If nil is given, the method does not execute in parallel.
|
74
|
-
# If zero or less is given, it becomes equal to the number of processors.
|
75
|
-
# This parameter is ignored if the Parallel gem is not loaded or solver is not 'sgd'.
|
76
46
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
77
47
|
# If solver is 'svd', this parameter is ignored.
|
78
|
-
|
79
|
-
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
80
|
-
fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
|
81
|
-
solver: 'auto',
|
82
|
-
n_jobs: nil, verbose: false, random_seed: nil)
|
48
|
+
def initialize(fit_bias: true, bias_scale: 1.0, max_iter: 1000, tol: 1e-4, solver: 'auto', verbose: false)
|
83
49
|
super()
|
84
|
-
@params
|
50
|
+
@params = {
|
51
|
+
fit_bias: fit_bias,
|
52
|
+
bias_scale: bias_scale,
|
53
|
+
max_iter: max_iter,
|
54
|
+
tol: tol,
|
55
|
+
verbose: verbose
|
56
|
+
}
|
85
57
|
@params[:solver] = if solver == 'auto'
|
86
58
|
enable_linalg?(warning: false) ? 'svd' : 'lbfgs'
|
87
59
|
else
|
88
|
-
solver.match?(/^svd$|^
|
60
|
+
solver.match?(/^svd$|^lbfgs$/) ? solver : 'lbfgs'
|
89
61
|
end
|
90
|
-
@params[:decay] ||= @params[:learning_rate]
|
91
|
-
@params[:random_seed] ||= srand
|
92
|
-
@rng = Random.new(@params[:random_seed])
|
93
|
-
@loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
|
94
62
|
end
|
95
63
|
|
96
64
|
# Fit the model with given training data.
|
@@ -99,17 +67,15 @@ module Rumale
|
|
99
67
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
100
68
|
# @return [LinearRegression] The learned regressor itself.
|
101
69
|
def fit(x, y)
|
102
|
-
x =
|
103
|
-
y =
|
104
|
-
|
105
|
-
|
106
|
-
if @params[:solver] == 'svd' && enable_linalg?(warning: false)
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
fit_sgd(x, y)
|
112
|
-
end
|
70
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
71
|
+
y = Rumale::Validation.check_convert_target_value_array(y)
|
72
|
+
Rumale::Validation.check_sample_size(x, y)
|
73
|
+
|
74
|
+
@weight_vec, @bias_term = if @params[:solver] == 'svd' && enable_linalg?(warning: false)
|
75
|
+
partial_fit_svd(x, y)
|
76
|
+
else
|
77
|
+
partial_fit_lbfgs(x, y)
|
78
|
+
end
|
113
79
|
|
114
80
|
self
|
115
81
|
end
|
@@ -119,21 +85,22 @@ module Rumale
|
|
119
85
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
120
86
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
121
87
|
def predict(x)
|
122
|
-
x =
|
88
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
123
89
|
|
124
90
|
x.dot(@weight_vec.transpose) + @bias_term
|
125
91
|
end
|
126
92
|
|
127
93
|
private
|
128
94
|
|
129
|
-
def
|
95
|
+
def partial_fit_svd(x, y)
|
130
96
|
x = expand_feature(x) if fit_bias?
|
131
97
|
w = Numo::Linalg.pinv(x, driver: 'svd').dot(y)
|
132
|
-
|
98
|
+
w = w.transpose.dup unless single_target?(y)
|
99
|
+
split_weight(w)
|
133
100
|
end
|
134
101
|
|
135
|
-
def
|
136
|
-
fnc = proc do |w, x, y|
|
102
|
+
def partial_fit_lbfgs(base_x, base_y)
|
103
|
+
fnc = proc do |w, x, y|
|
137
104
|
n_samples, n_features = x.shape
|
138
105
|
w = w.reshape(y.shape[1], n_features) unless y.shape[1].nil?
|
139
106
|
z = x.dot(w.transpose)
|
@@ -143,57 +110,25 @@ module Rumale
|
|
143
110
|
[loss, gradient.flatten.dup]
|
144
111
|
end
|
145
112
|
|
146
|
-
|
113
|
+
base_x = expand_feature(base_x) if fit_bias?
|
147
114
|
|
148
|
-
n_features =
|
149
|
-
n_outputs = single_target?(
|
115
|
+
n_features = base_x.shape[1]
|
116
|
+
n_outputs = single_target?(base_y) ? 1 : base_y.shape[1]
|
117
|
+
w_init = Numo::DFloat.zeros(n_outputs * n_features)
|
150
118
|
|
151
119
|
res = Lbfgsb.minimize(
|
152
|
-
fnc: fnc, jcb: true, x_init:
|
120
|
+
fnc: fnc, jcb: true, x_init: w_init, args: [base_x, base_y],
|
153
121
|
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
154
122
|
verbose: @params[:verbose] ? 1 : -1
|
155
123
|
)
|
156
124
|
|
157
|
-
|
158
|
-
|
159
|
-
split_weight(res[:x])
|
160
|
-
else
|
161
|
-
split_weight_mult(res[:x].reshape(n_outputs, n_features).transpose)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
def fit_sgd(x, y)
|
166
|
-
if single_target?(y)
|
167
|
-
@weight_vec, @bias_term = partial_fit(x, y)
|
168
|
-
else
|
169
|
-
n_outputs = y.shape[1]
|
170
|
-
n_features = x.shape[1]
|
171
|
-
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
172
|
-
@bias_term = Numo::DFloat.zeros(n_outputs)
|
173
|
-
if enable_parallel?
|
174
|
-
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
175
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
176
|
-
else
|
177
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
178
|
-
end
|
179
|
-
end
|
125
|
+
w = single_target?(base_y) ? res[:x] : res[:x].reshape(n_outputs, n_features)
|
126
|
+
split_weight(w)
|
180
127
|
end
|
181
128
|
|
182
129
|
def single_target?(y)
|
183
130
|
y.ndim == 1
|
184
131
|
end
|
185
|
-
|
186
|
-
def init_weight(n_features, n_outputs)
|
187
|
-
Rumale::Utils.rand_normal([n_outputs, n_features], @rng.dup).flatten.dup
|
188
|
-
end
|
189
|
-
|
190
|
-
def split_weight_mult(w)
|
191
|
-
if fit_bias?
|
192
|
-
[w[0...-1, true].dup, w[-1, true].dup]
|
193
|
-
else
|
194
|
-
[w.dup, Numo::DFloat.zeros(w.shape[1])]
|
195
|
-
end
|
196
|
-
end
|
197
132
|
end
|
198
133
|
end
|
199
134
|
end
|
@@ -2,16 +2,15 @@
|
|
2
2
|
|
3
3
|
require 'lbfgsb'
|
4
4
|
|
5
|
-
require 'rumale/base/classifier'
|
6
5
|
require 'rumale/utils'
|
7
6
|
require 'rumale/validation'
|
8
|
-
require 'rumale/
|
7
|
+
require 'rumale/base/classifier'
|
8
|
+
|
9
|
+
require_relative 'base_estimator'
|
9
10
|
|
10
11
|
module Rumale
|
11
12
|
module LinearModel
|
12
|
-
# LogisticRegression is a class that implements Logistic Regression.
|
13
|
-
# In multiclass classification problem, it uses one-vs-the-rest strategy for the sgd solver
|
14
|
-
# and multinomial logistic regression for the lbfgs solver.
|
13
|
+
# LogisticRegression is a class that implements (multinomial) Logistic Regression.
|
15
14
|
#
|
16
15
|
# @note
|
17
16
|
# Rumale::SVM provides Logistic Regression based on LIBLINEAR.
|
@@ -21,88 +20,42 @@ module Rumale
|
|
21
20
|
# @example
|
22
21
|
# require 'rumale/linear_model/logistic_regression'
|
23
22
|
#
|
24
|
-
# estimator =
|
25
|
-
# Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, random_seed: 1)
|
23
|
+
# estimator = Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0)
|
26
24
|
# estimator.fit(training_samples, traininig_labels)
|
27
25
|
# results = estimator.predict(testing_samples)
|
28
|
-
|
29
|
-
|
30
|
-
# - Shalev-Shwartz, S., Singer, Y., Srebro, N., and Cotter, A., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
31
|
-
# - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
|
32
|
-
# - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
33
|
-
class LogisticRegression < BaseSGD # rubocop:disable Metrics/ClassLength
|
34
|
-
include ::Rumale::Base::Classifier
|
35
|
-
|
36
|
-
# Return the weight vector for Logistic Regression.
|
37
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
38
|
-
attr_reader :weight_vec
|
39
|
-
|
40
|
-
# Return the bias term (a.k.a. intercept) for Logistic Regression.
|
41
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
42
|
-
attr_reader :bias_term
|
26
|
+
class LogisticRegression < Rumale::LinearModel::BaseEstimator
|
27
|
+
include Rumale::Base::Classifier
|
43
28
|
|
44
29
|
# Return the class labels.
|
45
30
|
# @return [Numo::Int32] (shape: [n_classes])
|
46
31
|
attr_reader :classes
|
47
32
|
|
48
|
-
# Return the random generator for performing random sampling.
|
49
|
-
# @return [Random]
|
50
|
-
attr_reader :rng
|
51
|
-
|
52
33
|
# Create a new classifier with Logisitc Regression.
|
53
34
|
#
|
54
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
55
|
-
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
56
|
-
# If solver = 'lbfgs', this parameter is ignored.
|
57
|
-
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
58
|
-
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
59
|
-
# If solver = 'lbfgs', this parameter is ignored.
|
60
|
-
# @param momentum [Float] The momentum factor.
|
61
|
-
# If solver = 'lbfgs', this parameter is ignored.
|
62
|
-
# @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
|
63
|
-
# If solver = 'lbfgs', only 'l2' can be selected for this parameter.
|
64
|
-
# @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
|
65
|
-
# If penalty set to 'l2' or 'l1', this parameter is ignored.
|
66
|
-
# If l1_ratio = 1, the regularization is similar to Lasso.
|
67
|
-
# If l1_ratio = 0, the regularization is similar to Ridge.
|
68
|
-
# If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
|
69
|
-
# If solver = 'lbfgs', this parameter is ignored.
|
70
35
|
# @param reg_param [Float] The regularization parameter.
|
71
36
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
72
37
|
# @param bias_scale [Float] The scale of the bias term.
|
73
38
|
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
74
39
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
75
40
|
# how many times the whole data is given to the training process.
|
76
|
-
# @param batch_size [Integer] The size of the mini batches.
|
77
|
-
# If solver = 'lbfgs', this parameter is ignored.
|
78
41
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
79
|
-
#
|
80
|
-
# @param solver [String] The algorithm for optimization. ('lbfgs' or 'sgd').
|
81
|
-
# 'lbfgs' uses the L-BFGS with lbfgs.rb gem.
|
82
|
-
# 'sgd' uses the stochastic gradient descent optimization.
|
83
|
-
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
42
|
+
# @param n_jobs [Integer] The number of jobs for running the predict methods in parallel.
|
84
43
|
# If nil is given, the methods do not execute in parallel.
|
85
44
|
# If zero or less is given, it becomes equal to the number of processors.
|
86
|
-
# This parameter is ignored if the Parallel gem is not loaded
|
45
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
87
46
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
88
|
-
#
|
89
|
-
|
90
|
-
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
91
|
-
penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
|
92
|
-
fit_bias: true, bias_scale: 1.0,
|
93
|
-
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
94
|
-
solver: 'lbfgs',
|
95
|
-
n_jobs: nil, verbose: false, random_seed: nil)
|
96
|
-
raise ArgumentError, "The 'lbfgs' solver supports only 'l2' penalties." if solver == 'lbfgs' && penalty != 'l2'
|
97
|
-
|
47
|
+
# 'iterate.dat' file is generated by lbfgsb.rb.
|
48
|
+
def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0, max_iter: 1000, tol: 1e-4, n_jobs: nil, verbose: false)
|
98
49
|
super()
|
99
|
-
@params
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
50
|
+
@params = {
|
51
|
+
reg_param: reg_param,
|
52
|
+
fit_bias: fit_bias,
|
53
|
+
bias_scale: bias_scale,
|
54
|
+
max_iter: max_iter,
|
55
|
+
tol: tol,
|
56
|
+
n_jobs: n_jobs,
|
57
|
+
verbose: verbose
|
58
|
+
}
|
106
59
|
end
|
107
60
|
|
108
61
|
# Fit the model with given training data.
|
@@ -111,16 +64,12 @@ module Rumale
|
|
111
64
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
112
65
|
# @return [LogisticRegression] The learned classifier itself.
|
113
66
|
def fit(x, y)
|
114
|
-
x =
|
115
|
-
y =
|
116
|
-
|
67
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
68
|
+
y = Rumale::Validation.check_convert_label_array(y)
|
69
|
+
Rumale::Validation.check_sample_size(x, y)
|
117
70
|
|
118
71
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
119
|
-
|
120
|
-
fit_sgd(x, y)
|
121
|
-
else
|
122
|
-
fit_lbfgs(x, y)
|
123
|
-
end
|
72
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
124
73
|
|
125
74
|
self
|
126
75
|
end
|
@@ -130,7 +79,7 @@ module Rumale
|
|
130
79
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
131
80
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
132
81
|
def decision_function(x)
|
133
|
-
x =
|
82
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
134
83
|
|
135
84
|
x.dot(@weight_vec.transpose) + @bias_term
|
136
85
|
end
|
@@ -140,7 +89,7 @@ module Rumale
|
|
140
89
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
141
90
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
142
91
|
def predict(x)
|
143
|
-
x =
|
92
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
144
93
|
|
145
94
|
n_samples, = x.shape
|
146
95
|
decision_values = predict_proba(x)
|
@@ -157,7 +106,7 @@ module Rumale
|
|
157
106
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
158
107
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
159
108
|
def predict_proba(x)
|
160
|
-
x =
|
109
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
161
110
|
|
162
111
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
163
112
|
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
|
@@ -171,11 +120,7 @@ module Rumale
|
|
171
120
|
|
172
121
|
private
|
173
122
|
|
174
|
-
def
|
175
|
-
@classes.size > 2
|
176
|
-
end
|
177
|
-
|
178
|
-
def fit_lbfgs(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
123
|
+
def partial_fit(base_x, base_y) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
179
124
|
if multiclass_problem?
|
180
125
|
fnc = proc do |w, x, y, a|
|
181
126
|
n_features = x.shape[1]
|
@@ -199,20 +144,13 @@ module Rumale
|
|
199
144
|
n_features = base_x.shape[1]
|
200
145
|
w_init = Numo::DFloat.zeros(n_classes * n_features)
|
201
146
|
|
202
|
-
verbose = @params[:verbose] ? 1 : -1
|
203
147
|
res = Lbfgsb.minimize(
|
204
148
|
fnc: fnc, jcb: true, x_init: w_init, args: [base_x, onehot_y, @params[:reg_param]],
|
205
|
-
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
149
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
150
|
+
verbose: @params[:verbose] ? 1 : -1
|
206
151
|
)
|
207
152
|
|
208
|
-
|
209
|
-
weight = res[:x].reshape(n_classes, n_features)
|
210
|
-
@weight_vec = weight[true, 0...-1].dup
|
211
|
-
@bias_term = weight[true, -1].dup
|
212
|
-
else
|
213
|
-
@weight_vec = res[:x].reshape(n_classes, n_features)
|
214
|
-
@bias_term = Numo::DFloat.zeros(n_classes)
|
215
|
-
end
|
153
|
+
split_weight(res[:x].reshape(n_classes, n_features))
|
216
154
|
else
|
217
155
|
fnc = proc do |w, x, y, a|
|
218
156
|
z = 1 + Numo::NMath.exp(-y * x.dot(w))
|
@@ -227,39 +165,18 @@ module Rumale
|
|
227
165
|
n_features = base_x.shape[1]
|
228
166
|
w_init = Numo::DFloat.zeros(n_features)
|
229
167
|
|
230
|
-
verbose = @params[:verbose] ? 1 : -1
|
231
168
|
res = Lbfgsb.minimize(
|
232
169
|
fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
|
233
|
-
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
170
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
171
|
+
verbose: @params[:verbose] ? 1 : -1
|
234
172
|
)
|
235
173
|
|
236
|
-
|
174
|
+
split_weight(res[:x])
|
237
175
|
end
|
238
176
|
end
|
239
177
|
|
240
|
-
def
|
241
|
-
|
242
|
-
n_classes = @classes.size
|
243
|
-
n_features = x.shape[1]
|
244
|
-
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
245
|
-
@bias_term = Numo::DFloat.zeros(n_classes)
|
246
|
-
if enable_parallel?
|
247
|
-
models = parallel_map(n_classes) do |n|
|
248
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
249
|
-
partial_fit(x, bin_y)
|
250
|
-
end
|
251
|
-
n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
252
|
-
else
|
253
|
-
n_classes.times do |n|
|
254
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
255
|
-
@weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
256
|
-
end
|
257
|
-
end
|
258
|
-
else
|
259
|
-
negative_label = @classes[0]
|
260
|
-
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
261
|
-
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
262
|
-
end
|
178
|
+
def multiclass_problem?
|
179
|
+
@classes.size > 2
|
263
180
|
end
|
264
181
|
end
|
265
182
|
end
|
@@ -2,10 +2,11 @@
|
|
2
2
|
|
3
3
|
require 'lbfgsb'
|
4
4
|
|
5
|
-
require 'rumale/base/estimator'
|
6
5
|
require 'rumale/base/regressor'
|
7
6
|
require 'rumale/validation'
|
8
7
|
|
8
|
+
require_relative 'base_estimator'
|
9
|
+
|
9
10
|
module Rumale
|
10
11
|
module LinearModel
|
11
12
|
# NNLS is a class that implements non-negative least squares regression.
|
@@ -14,29 +15,17 @@ module Rumale
|
|
14
15
|
# @example
|
15
16
|
# require 'rumale/linear_model/nnls'
|
16
17
|
#
|
17
|
-
# estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01
|
18
|
+
# estimator = Rumale::LinearModel::NNLS.new(reg_param: 0.01)
|
18
19
|
# estimator.fit(training_samples, traininig_values)
|
19
20
|
# results = estimator.predict(testing_samples)
|
20
21
|
#
|
21
|
-
class NNLS <
|
22
|
-
include
|
23
|
-
|
24
|
-
# Return the weight vector.
|
25
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
26
|
-
attr_reader :weight_vec
|
27
|
-
|
28
|
-
# Return the bias term (a.k.a. intercept).
|
29
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
30
|
-
attr_reader :bias_term
|
22
|
+
class NNLS < Rumale::LinearModel::BaseEstimator
|
23
|
+
include Rumale::Base::Regressor
|
31
24
|
|
32
25
|
# Returns the number of iterations when converged.
|
33
26
|
# @return [Integer]
|
34
27
|
attr_reader :n_iter
|
35
28
|
|
36
|
-
# Return the random generator for initializing weight.
|
37
|
-
# @return [Random]
|
38
|
-
attr_reader :rng
|
39
|
-
|
40
29
|
# Create a new regressor with non-negative least squares method.
|
41
30
|
#
|
42
31
|
# @param reg_param [Float] The regularization parameter for L2 regularization term.
|
@@ -47,9 +36,7 @@ module Rumale
|
|
47
36
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
48
37
|
# If solver = 'svd', this parameter is ignored.
|
49
38
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
50
|
-
|
51
|
-
def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
|
52
|
-
max_iter: 1000, tol: 1e-4, verbose: false, random_seed: nil)
|
39
|
+
def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0, max_iter: 1000, tol: 1e-4, verbose: false)
|
53
40
|
super()
|
54
41
|
@params = {
|
55
42
|
reg_param: reg_param,
|
@@ -57,10 +44,8 @@ module Rumale
|
|
57
44
|
bias_scale: bias_scale,
|
58
45
|
max_iter: max_iter,
|
59
46
|
tol: tol,
|
60
|
-
verbose: verbose
|
61
|
-
random_seed: random_seed || srand
|
47
|
+
verbose: verbose
|
62
48
|
}
|
63
|
-
@rng = Random.new(@params[:random_seed])
|
64
49
|
end
|
65
50
|
|
66
51
|
# Fit the model with given training data.
|
@@ -69,17 +54,16 @@ module Rumale
|
|
69
54
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
70
55
|
# @return [NonneagtiveLeastSquare] The learned regressor itself.
|
71
56
|
def fit(x, y)
|
72
|
-
x =
|
73
|
-
y =
|
74
|
-
|
57
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
58
|
+
y = Rumale::Validation.check_convert_target_value_array(y)
|
59
|
+
Rumale::Validation.check_sample_size(x, y)
|
75
60
|
|
76
61
|
x = expand_feature(x) if fit_bias?
|
77
62
|
|
78
63
|
n_features = x.shape[1]
|
79
64
|
n_outputs = single_target?(y) ? 1 : y.shape[1]
|
80
65
|
|
81
|
-
w_init = ::
|
82
|
-
w_init[w_init.lt(0)] = 0
|
66
|
+
w_init = Numo::DFloat.zeros(n_outputs * n_features)
|
83
67
|
bounds = Numo::DFloat.zeros(n_outputs * n_features, 2)
|
84
68
|
bounds.shape[0].times { |n| bounds[n, 1] = Float::INFINITY }
|
85
69
|
|
@@ -89,15 +73,8 @@ module Rumale
|
|
89
73
|
)
|
90
74
|
|
91
75
|
@n_iter = res[:n_iter]
|
92
|
-
w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features)
|
93
|
-
|
94
|
-
if fit_bias?
|
95
|
-
@weight_vec = single_target?(y) ? w[0...-1].dup : w[0...-1, true].dup
|
96
|
-
@bias_term = single_target?(y) ? w[-1] : w[-1, true].dup
|
97
|
-
else
|
98
|
-
@weight_vec = w.dup
|
99
|
-
@bias_term = single_target?(y) ? 0 : Numo::DFloat.zeros(y.shape[1])
|
100
|
-
end
|
76
|
+
w = single_target?(y) ? res[:x] : res[:x].reshape(n_outputs, n_features)
|
77
|
+
@weight_vec, @bias_term = split_weight(w)
|
101
78
|
|
102
79
|
self
|
103
80
|
end
|
@@ -107,7 +84,7 @@ module Rumale
|
|
107
84
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
108
85
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
109
86
|
def predict(x)
|
110
|
-
x =
|
87
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
111
88
|
|
112
89
|
x.dot(@weight_vec.transpose) + @bias_term
|
113
90
|
end
|
@@ -124,15 +101,6 @@ module Rumale
|
|
124
101
|
[loss, gradient.flatten.dup]
|
125
102
|
end
|
126
103
|
|
127
|
-
def expand_feature(x)
|
128
|
-
n_samples = x.shape[0]
|
129
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
130
|
-
end
|
131
|
-
|
132
|
-
def fit_bias?
|
133
|
-
@params[:fit_bias] == true
|
134
|
-
end
|
135
|
-
|
136
104
|
def single_target?(y)
|
137
105
|
y.ndim == 1
|
138
106
|
end
|