rumale-linear_model 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/lib/rumale/linear_model/base_estimator.rb +46 -0
- data/lib/rumale/linear_model/elastic_net.rb +112 -61
- data/lib/rumale/linear_model/lasso.rb +107 -61
- data/lib/rumale/linear_model/linear_regression.rb +39 -104
- data/lib/rumale/linear_model/logistic_regression.rb +36 -119
- data/lib/rumale/linear_model/nnls.rb +14 -46
- data/lib/rumale/linear_model/ridge.rb +39 -108
- data/lib/rumale/linear_model/sgd_classifier.rb +262 -0
- data/lib/rumale/linear_model/{base_sgd.rb → sgd_estimator.rb} +36 -21
- data/lib/rumale/linear_model/sgd_regressor.rb +138 -0
- data/lib/rumale/linear_model/svc.rb +60 -68
- data/lib/rumale/linear_model/svr.rb +54 -52
- data/lib/rumale/linear_model/version.rb +1 -1
- data/lib/rumale/linear_model.rb +2 -1
- metadata +8 -5
@@ -0,0 +1,138 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/regressor'
|
4
|
+
require 'rumale/validation'
|
5
|
+
|
6
|
+
require_relative 'sgd_estimator'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module LinearModel
|
10
|
+
# SGDRegressor is a class that implements linear regressor with stochastic gradient descent optimization.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/linear_model/sgd_regressor'
|
14
|
+
#
|
15
|
+
# estimator =
|
16
|
+
# Rumale::LinearModel::SGDRegressor.new(loss: 'squared_error', reg_param: 1.0, max_iter: 1000, batch_size: 50, random_seed: 1)
|
17
|
+
# estimator.fit(training_samples, traininig_target_values)
|
18
|
+
# results = estimator.predict(testing_samples)
|
19
|
+
#
|
20
|
+
# *Reference*
|
21
|
+
# - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
22
|
+
# - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
|
23
|
+
# - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
24
|
+
class SGDRegressor < Rumale::LinearModel::SGDEstimator
|
25
|
+
include Rumale::Base::Regressor
|
26
|
+
|
27
|
+
# Return the random generator for performing random sampling.
|
28
|
+
# @return [Random]
|
29
|
+
attr_reader :rng
|
30
|
+
|
31
|
+
# Create a new linear regressor with stochastic gradient descent optimization.
|
32
|
+
#
|
33
|
+
# @param loss [String] The loss function to be used ('squared_error' and 'epsilon_insensitive').
|
34
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
35
|
+
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
36
|
+
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
37
|
+
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
38
|
+
# @param momentum [Float] The momentum factor.
|
39
|
+
# @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
|
40
|
+
# @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
|
41
|
+
# If penalty set to 'l2' or 'l1', this parameter is ignored.
|
42
|
+
# If l1_ratio = 1, the regularization is similar to Lasso.
|
43
|
+
# If l1_ratio = 0, the regularization is similar to Ridge.
|
44
|
+
# If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
|
45
|
+
# @param reg_param [Float] The regularization parameter.
|
46
|
+
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
47
|
+
# @param bias_scale [Float] The scale of the bias term.
|
48
|
+
# @param epsilon [Float] The margin of tolerance. If loss set to 'squared_error', this parameter is ignored.
|
49
|
+
# @param max_iter [Integer] The maximum number of epochs that indicates
|
50
|
+
# how many times the whole data is given to the training process.
|
51
|
+
# @param batch_size [Integer] The size of the mini batches.
|
52
|
+
# @param tol [Float] The tolerance of loss for terminating optimization.
|
53
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
54
|
+
# If nil is given, the method does not execute in parallel.
|
55
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
56
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
57
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
58
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
|
+
def initialize(loss: 'squared_error', learning_rate: 0.01, decay: nil, momentum: 0.9,
|
60
|
+
penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
|
61
|
+
fit_bias: true, bias_scale: 1.0,
|
62
|
+
epsilon: 0.1,
|
63
|
+
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
64
|
+
n_jobs: nil, verbose: false, random_seed: nil)
|
65
|
+
super()
|
66
|
+
@params.merge!(
|
67
|
+
loss: loss,
|
68
|
+
learning_rate: learning_rate,
|
69
|
+
decay: decay,
|
70
|
+
momentum: momentum,
|
71
|
+
penalty: penalty,
|
72
|
+
reg_param: reg_param,
|
73
|
+
l1_ratio: l1_ratio,
|
74
|
+
fit_bias: fit_bias,
|
75
|
+
bias_scale: bias_scale,
|
76
|
+
epsilon: epsilon,
|
77
|
+
max_iter: max_iter,
|
78
|
+
batch_size: batch_size,
|
79
|
+
tol: tol,
|
80
|
+
n_jobs: n_jobs,
|
81
|
+
verbose: verbose,
|
82
|
+
random_seed: random_seed
|
83
|
+
)
|
84
|
+
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
85
|
+
@params[:random_seed] ||= srand
|
86
|
+
@rng = Random.new(@params[:random_seed])
|
87
|
+
@penalty_type = @params[:penalty]
|
88
|
+
@loss_func = case @params[:loss]
|
89
|
+
when Rumale::LinearModel::Loss::MeanSquaredError::NAME
|
90
|
+
Rumale::LinearModel::Loss::MeanSquaredError.new
|
91
|
+
when Rumale::LinearModel::Loss::EpsilonInsensitive::NAME
|
92
|
+
Rumale::LinearModel::Loss::EpsilonInsensitive.new(epsilon: @params[:epsilon])
|
93
|
+
else
|
94
|
+
raise ArgumentError, "given loss '#{loss}' is not supported."
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Fit the model with given training data.
|
99
|
+
#
|
100
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
101
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
102
|
+
# @retu:rn [SGDRegressor] The learned regressor itself.
|
103
|
+
def fit(x, y)
|
104
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
105
|
+
y = Rumale::Validation.check_convert_target_value_array(y)
|
106
|
+
Rumale::Validation.check_sample_size(x, y)
|
107
|
+
|
108
|
+
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
109
|
+
n_features = x.shape[1]
|
110
|
+
|
111
|
+
if n_outputs > 1
|
112
|
+
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
113
|
+
@bias_term = Numo::DFloat.zeros(n_outputs)
|
114
|
+
if enable_parallel?
|
115
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
116
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
117
|
+
else
|
118
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
119
|
+
end
|
120
|
+
else
|
121
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
122
|
+
end
|
123
|
+
|
124
|
+
self
|
125
|
+
end
|
126
|
+
|
127
|
+
# Predict values for samples.
|
128
|
+
#
|
129
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
130
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
131
|
+
def predict(x)
|
132
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
133
|
+
|
134
|
+
x.dot(@weight_vec.transpose) + @bias_term
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -1,15 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
3
5
|
require 'rumale/base/classifier'
|
4
|
-
require 'rumale/linear_model/base_sgd'
|
5
6
|
require 'rumale/probabilistic_output'
|
6
7
|
require 'rumale/validation'
|
7
8
|
|
9
|
+
require_relative 'base_estimator'
|
10
|
+
|
8
11
|
module Rumale
|
9
|
-
# This module consists of the classes that implement generalized linear models.
|
10
12
|
module LinearModel
|
11
|
-
# SVC is a class that implements Support Vector Classifier
|
12
|
-
# with stochastic gradient descent optimization.
|
13
|
+
# SVC is a class that implements Support Vector Classifier with the squared hinge loss.
|
13
14
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
14
15
|
#
|
15
16
|
# @note
|
@@ -21,52 +22,23 @@ module Rumale
|
|
21
22
|
# require 'rumale/linear_model/svc'
|
22
23
|
#
|
23
24
|
# estimator =
|
24
|
-
# Rumale::LinearModel::SVC.new(reg_param: 1.0
|
25
|
+
# Rumale::LinearModel::SVC.new(reg_param: 1.0)
|
25
26
|
# estimator.fit(training_samples, traininig_labels)
|
26
27
|
# results = estimator.predict(testing_samples)
|
27
|
-
|
28
|
-
|
29
|
-
# - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
30
|
-
# - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
|
31
|
-
# - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
32
|
-
class SVC < BaseSGD
|
33
|
-
include ::Rumale::Base::Classifier
|
34
|
-
|
35
|
-
# Return the weight vector for SVC.
|
36
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
37
|
-
attr_reader :weight_vec
|
38
|
-
|
39
|
-
# Return the bias term (a.k.a. intercept) for SVC.
|
40
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
41
|
-
attr_reader :bias_term
|
28
|
+
class SVC < Rumale::LinearModel::BaseEstimator
|
29
|
+
include Rumale::Base::Classifier
|
42
30
|
|
43
31
|
# Return the class labels.
|
44
32
|
# @return [Numo::Int32] (shape: [n_classes])
|
45
33
|
attr_reader :classes
|
46
34
|
|
47
|
-
#
|
48
|
-
# @return [Random]
|
49
|
-
attr_reader :rng
|
50
|
-
|
51
|
-
# Create a new classifier with Support Vector Machine by the SGD optimization.
|
35
|
+
# Create a new linear classifier with Support Vector Machine with the squared hinge loss.
|
52
36
|
#
|
53
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
54
|
-
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
55
|
-
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
56
|
-
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
57
|
-
# @param momentum [Float] The momentum factor.
|
58
|
-
# @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
|
59
|
-
# @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
|
60
|
-
# If penalty set to 'l2' or 'l1', this parameter is ignored.
|
61
|
-
# If l1_ratio = 1, the regularization is similar to Lasso.
|
62
|
-
# If l1_ratio = 0, the regularization is similar to Ridge.
|
63
|
-
# If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
|
64
37
|
# @param reg_param [Float] The regularization parameter.
|
65
38
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
66
39
|
# @param bias_scale [Float] The scale of the bias term.
|
67
40
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
68
41
|
# how many times the whole data is given to the training process.
|
69
|
-
# @param batch_size [Integer] The size of the mini batches.
|
70
42
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
71
43
|
# @param probability [Boolean] The flag indicating whether to perform probability estimation.
|
72
44
|
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
@@ -74,20 +46,20 @@ module Rumale
|
|
74
46
|
# If zero or less is given, it becomes equal to the number of processors.
|
75
47
|
# This parameter is ignored if the Parallel gem is not loaded.
|
76
48
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
77
|
-
#
|
78
|
-
def initialize(
|
79
|
-
|
80
|
-
fit_bias: true, bias_scale: 1.0,
|
81
|
-
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
82
|
-
probability: false,
|
83
|
-
n_jobs: nil, verbose: false, random_seed: nil)
|
49
|
+
# 'iterate.dat' file is generated by lbfgsb.rb.
|
50
|
+
def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0, max_iter: 1000, tol: 1e-4, probability: false,
|
51
|
+
n_jobs: nil, verbose: false)
|
84
52
|
super()
|
85
|
-
@params
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
53
|
+
@params = {
|
54
|
+
reg_param: reg_param,
|
55
|
+
fit_bias: fit_bias,
|
56
|
+
bias_scale: bias_scale,
|
57
|
+
max_iter: max_iter,
|
58
|
+
tol: tol,
|
59
|
+
probability: probability,
|
60
|
+
n_jobs: n_jobs,
|
61
|
+
verbose: verbose
|
62
|
+
}
|
91
63
|
end
|
92
64
|
|
93
65
|
# Fit the model with given training data.
|
@@ -96,20 +68,20 @@ module Rumale
|
|
96
68
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
97
69
|
# @return [SVC] The learned classifier itself.
|
98
70
|
def fit(x, y)
|
99
|
-
x =
|
100
|
-
y =
|
101
|
-
|
71
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
72
|
+
y = Rumale::Validation.check_convert_label_array(y)
|
73
|
+
Rumale::Validation.check_sample_size(x, y)
|
102
74
|
|
103
75
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
76
|
+
x = expand_feature(x) if fit_bias?
|
104
77
|
|
105
78
|
if multiclass_problem?
|
106
79
|
n_classes = @classes.size
|
107
80
|
n_features = x.shape[1]
|
108
|
-
|
81
|
+
n_features -= 1 if fit_bias?
|
109
82
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
110
83
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
111
84
|
@prob_param = Numo::DFloat.zeros(n_classes, 2)
|
112
|
-
# fit model.
|
113
85
|
models = if enable_parallel?
|
114
86
|
parallel_map(n_classes) do |n|
|
115
87
|
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
@@ -121,7 +93,6 @@ module Rumale
|
|
121
93
|
partial_fit(x, bin_y)
|
122
94
|
end
|
123
95
|
end
|
124
|
-
# store model.
|
125
96
|
models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
|
126
97
|
else
|
127
98
|
negative_label = @classes[0]
|
@@ -137,7 +108,7 @@ module Rumale
|
|
137
108
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
138
109
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
139
110
|
def decision_function(x)
|
140
|
-
x =
|
111
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
141
112
|
|
142
113
|
x.dot(@weight_vec.transpose) + @bias_term
|
143
114
|
end
|
@@ -147,7 +118,7 @@ module Rumale
|
|
147
118
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
148
119
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
149
120
|
def predict(x)
|
150
|
-
x =
|
121
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
151
122
|
|
152
123
|
n_samples = x.shape[0]
|
153
124
|
predicted = if multiclass_problem?
|
@@ -169,13 +140,13 @@ module Rumale
|
|
169
140
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
170
141
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
171
142
|
def predict_proba(x)
|
172
|
-
x =
|
143
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
173
144
|
|
174
145
|
if multiclass_problem?
|
175
146
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
176
147
|
(probs.transpose / probs.sum(axis: 1)).transpose.dup
|
177
148
|
else
|
178
|
-
n_samples
|
149
|
+
n_samples = x.shape[0]
|
179
150
|
probs = Numo::DFloat.zeros(n_samples, 2)
|
180
151
|
probs[true, 1] = 1.0 / (Numo::NMath.exp(@prob_param[0] * decision_function(x) + @prob_param[1]) + 1.0)
|
181
152
|
probs[true, 0] = 1.0 - probs[true, 1]
|
@@ -185,14 +156,35 @@ module Rumale
|
|
185
156
|
|
186
157
|
private
|
187
158
|
|
188
|
-
def partial_fit(
|
189
|
-
w,
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
159
|
+
def partial_fit(base_x, bin_y)
|
160
|
+
fnc = proc do |w, x, y, reg_param|
|
161
|
+
n_samples = x.shape[0]
|
162
|
+
z = x.dot(w)
|
163
|
+
t = 1 - y * z
|
164
|
+
loss = 0.5 * reg_param * w.dot(w) + (x.class.maximum(0, t)**2).sum.fdiv(n_samples)
|
165
|
+
indices = t.gt(0)
|
166
|
+
grad = reg_param * w
|
167
|
+
if indices.count.positive?
|
168
|
+
sx = x[indices, true]
|
169
|
+
sy = y[indices]
|
170
|
+
grad += 2.fdiv(n_samples) * sx.transpose.dot((sx.dot(w) - sy))
|
171
|
+
end
|
172
|
+
[loss, grad]
|
173
|
+
end
|
174
|
+
|
175
|
+
n_features = base_x.shape[1]
|
176
|
+
w_init = Numo::DFloat.zeros(n_features)
|
177
|
+
|
178
|
+
res = Lbfgsb.minimize(
|
179
|
+
fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
|
180
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
181
|
+
verbose: @params[:verbose] ? 1 : -1
|
182
|
+
)
|
183
|
+
|
184
|
+
prb = @params[:probability] ? Rumale::ProbabilisticOutput.fit_sigmoid(base_x.dot(res[:x]), bin_y) : Numo::DFloat[1, 0]
|
185
|
+
w, b = split_weight(res[:x])
|
186
|
+
|
187
|
+
[w, b, prb]
|
196
188
|
end
|
197
189
|
|
198
190
|
def multiclass_problem?
|
@@ -1,13 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'lbfgsb'
|
4
|
+
|
3
5
|
require 'rumale/base/regressor'
|
4
6
|
require 'rumale/validation'
|
5
|
-
|
7
|
+
|
8
|
+
require_relative 'base_estimator'
|
6
9
|
|
7
10
|
module Rumale
|
8
11
|
module LinearModel
|
9
|
-
# SVR is a class that implements Support Vector Regressor
|
10
|
-
# with stochastic gradient descent optimization.
|
12
|
+
# SVR is a class that implements Support Vector Regressor with the squared epsilon-insensitive loss.
|
11
13
|
#
|
12
14
|
# @note
|
13
15
|
# Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
|
@@ -17,70 +19,39 @@ module Rumale
|
|
17
19
|
# @example
|
18
20
|
# require 'rumale/linear_model/svr'
|
19
21
|
#
|
20
|
-
# estimator =
|
21
|
-
# Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 50, random_seed: 1)
|
22
|
+
# estimator = Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1)
|
22
23
|
# estimator.fit(training_samples, traininig_target_values)
|
23
24
|
# results = estimator.predict(testing_samples)
|
24
|
-
|
25
|
-
|
26
|
-
# - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
27
|
-
# - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
|
28
|
-
# - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
29
|
-
class SVR < BaseSGD
|
30
|
-
include ::Rumale::Base::Regressor
|
31
|
-
|
32
|
-
# Return the weight vector for SVR.
|
33
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
34
|
-
attr_reader :weight_vec
|
35
|
-
|
36
|
-
# Return the bias term (a.k.a. intercept) for SVR.
|
37
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
38
|
-
attr_reader :bias_term
|
39
|
-
|
40
|
-
# Return the random generator for performing random sampling.
|
41
|
-
# @return [Random]
|
42
|
-
attr_reader :rng
|
25
|
+
class SVR < Rumale::LinearModel::BaseEstimator
|
26
|
+
include Rumale::Base::Regressor
|
43
27
|
|
44
28
|
# Create a new regressor with Support Vector Machine by the SGD optimization.
|
45
29
|
#
|
46
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
47
|
-
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
48
|
-
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
49
|
-
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
50
|
-
# @param momentum [Float] The momentum factor.
|
51
|
-
# @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
|
52
|
-
# @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
|
53
|
-
# If penalty set to 'l2' or 'l1', this parameter is ignored.
|
54
|
-
# If l1_ratio = 1, the regularization is similar to Lasso.
|
55
|
-
# If l1_ratio = 0, the regularization is similar to Ridge.
|
56
|
-
# If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
|
57
30
|
# @param reg_param [Float] The regularization parameter.
|
58
31
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
59
32
|
# @param bias_scale [Float] The scale of the bias term.
|
60
33
|
# @param epsilon [Float] The margin of tolerance.
|
61
34
|
# @param max_iter [Integer] The maximum number of epochs that indicates
|
62
35
|
# how many times the whole data is given to the training process.
|
63
|
-
# @param batch_size [Integer] The size of the mini batches.
|
64
36
|
# @param tol [Float] The tolerance of loss for terminating optimization.
|
65
37
|
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
66
38
|
# If nil is given, the method does not execute in parallel.
|
67
39
|
# If zero or less is given, it becomes equal to the number of processors.
|
68
40
|
# This parameter is ignored if the Parallel gem is not loaded.
|
69
41
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
70
|
-
|
71
|
-
|
72
|
-
penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
|
73
|
-
fit_bias: true, bias_scale: 1.0,
|
74
|
-
epsilon: 0.1,
|
75
|
-
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
76
|
-
n_jobs: nil, verbose: false, random_seed: nil)
|
42
|
+
def initialize(reg_param: 1.0, fit_bias: true, bias_scale: 1.0, epsilon: 0.1, max_iter: 1000, tol: 1e-4,
|
43
|
+
n_jobs: nil, verbose: false)
|
77
44
|
super()
|
78
|
-
@params
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
45
|
+
@params = {
|
46
|
+
reg_param: reg_param,
|
47
|
+
fit_bias: fit_bias,
|
48
|
+
bias_scale: bias_scale,
|
49
|
+
epsilon: epsilon,
|
50
|
+
max_iter: max_iter,
|
51
|
+
tol: tol,
|
52
|
+
n_jobs: n_jobs,
|
53
|
+
verbose: verbose
|
54
|
+
}
|
84
55
|
end
|
85
56
|
|
86
57
|
# Fit the model with given training data.
|
@@ -89,9 +60,9 @@ module Rumale
|
|
89
60
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
90
61
|
# @return [SVR] The learned regressor itself.
|
91
62
|
def fit(x, y)
|
92
|
-
x =
|
93
|
-
y =
|
94
|
-
|
63
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
64
|
+
y = Rumale::Validation.check_convert_target_value_array(y)
|
65
|
+
Rumale::Validation.check_sample_size(x, y)
|
95
66
|
|
96
67
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
97
68
|
n_features = x.shape[1]
|
@@ -121,6 +92,37 @@ module Rumale
|
|
121
92
|
|
122
93
|
x.dot(@weight_vec.transpose) + @bias_term
|
123
94
|
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
def partial_fit(base_x, single_y)
|
99
|
+
fnc = proc do |w, x, y, eps, reg_param|
|
100
|
+
n_samples = x.shape[0]
|
101
|
+
z = x.dot(w)
|
102
|
+
d = y - z
|
103
|
+
loss = 0.5 * reg_param * w.dot(w) + (x.class.maximum(0, d.abs - eps)**2).sum.fdiv(n_samples)
|
104
|
+
c = x.class.zeros(n_samples)
|
105
|
+
indices = d.gt(eps)
|
106
|
+
c[indices] = -d[indices] + eps if indices.count.positive?
|
107
|
+
indices = d.lt(eps)
|
108
|
+
c[indices] = -d[indices] - eps if indices.count.positive?
|
109
|
+
grad = reg_param * w + 2.fdiv(n_samples) * x.transpose.dot(c)
|
110
|
+
[loss, grad]
|
111
|
+
end
|
112
|
+
|
113
|
+
base_x = expand_feature(base_x) if fit_bias?
|
114
|
+
|
115
|
+
n_features = base_x.shape[1]
|
116
|
+
w_init = Numo::DFloat.zeros(n_features)
|
117
|
+
|
118
|
+
res = Lbfgsb.minimize(
|
119
|
+
fnc: fnc, jcb: true, x_init: w_init, args: [base_x, single_y, @params[:epsilon], @params[:reg_param]],
|
120
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON,
|
121
|
+
verbose: @params[:verbose] ? 1 : -1
|
122
|
+
)
|
123
|
+
|
124
|
+
split_weight(res[:x])
|
125
|
+
end
|
124
126
|
end
|
125
127
|
end
|
126
128
|
end
|
data/lib/rumale/linear_model.rb
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'numo/narray'
|
4
4
|
|
5
|
-
require_relative 'linear_model/
|
5
|
+
require_relative 'linear_model/sgd_classifier'
|
6
|
+
require_relative 'linear_model/sgd_regressor'
|
6
7
|
require_relative 'linear_model/elastic_net'
|
7
8
|
require_relative 'linear_model/lasso'
|
8
9
|
require_relative 'linear_model/linear_regression'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale-linear_model
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: lbfgsb
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 0.26.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 0.26.0
|
55
55
|
description: |
|
56
56
|
Rumale::LinearModel provides linear model algorithms,
|
57
57
|
such as Logistic Regression, Support Vector Machine, Lasso, and Ridge Regression
|
@@ -65,13 +65,16 @@ files:
|
|
65
65
|
- LICENSE.txt
|
66
66
|
- README.md
|
67
67
|
- lib/rumale/linear_model.rb
|
68
|
-
- lib/rumale/linear_model/
|
68
|
+
- lib/rumale/linear_model/base_estimator.rb
|
69
69
|
- lib/rumale/linear_model/elastic_net.rb
|
70
70
|
- lib/rumale/linear_model/lasso.rb
|
71
71
|
- lib/rumale/linear_model/linear_regression.rb
|
72
72
|
- lib/rumale/linear_model/logistic_regression.rb
|
73
73
|
- lib/rumale/linear_model/nnls.rb
|
74
74
|
- lib/rumale/linear_model/ridge.rb
|
75
|
+
- lib/rumale/linear_model/sgd_classifier.rb
|
76
|
+
- lib/rumale/linear_model/sgd_estimator.rb
|
77
|
+
- lib/rumale/linear_model/sgd_regressor.rb
|
75
78
|
- lib/rumale/linear_model/svc.rb
|
76
79
|
- lib/rumale/linear_model/svr.rb
|
77
80
|
- lib/rumale/linear_model/version.rb
|