rumale-linear_model 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/linear_model/base_sgd.rb +275 -0
- data/lib/rumale/linear_model/elastic_net.rb +115 -0
- data/lib/rumale/linear_model/lasso.rb +111 -0
- data/lib/rumale/linear_model/linear_regression.rb +199 -0
- data/lib/rumale/linear_model/logistic_regression.rb +266 -0
- data/lib/rumale/linear_model/nnls.rb +141 -0
- data/lib/rumale/linear_model/ridge.rb +206 -0
- data/lib/rumale/linear_model/svc.rb +203 -0
- data/lib/rumale/linear_model/svr.rb +126 -0
- data/lib/rumale/linear_model/version.rb +10 -0
- data/lib/rumale/linear_model.rb +14 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3a7999fbdb27dc6ed43710083da0cecf2336bfb08277fa4240be7b56c2603c9f
|
4
|
+
data.tar.gz: 6bc61b3d80fe71c1d7ed806c810a0eb454244033506ab2078ccd0987e7891455
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4c73e2b03dfb0f14c94b880769103bbb05f77de7ac08f8b9af2dddc0ab6bcada758c0c7c51353a79e052e051999b4afc7610012caf4dbb7c4edc505152fdf1d6
|
7
|
+
data.tar.gz: 91ce194539b8abc95fb3ec7335bd4843aea126774ebb566eb9953955f775427afd032f9d02894c8507337d20f03baafa39903eb9dd9f9157ccf6a8d22028fe8b
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::LinearModel
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/rumale-linear_model)
|
4
|
+
[](https://github.com/yoshoku/rumale/blob/main/rumale-linear_model/LICENSE.txt)
|
5
|
+
[](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::LinearModel provides linear model algorithms,
|
9
|
+
such as Logistic Regression, Support Vector Machine, Lasso, and Ridge Regression
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-linear_model'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-linear_model
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - LinearModel](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,275 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module LinearModel
|
7
|
+
# @!visibility private
|
8
|
+
# This module consists of the classes that implement penalty (regularization) term.
|
9
|
+
module Penalty
|
10
|
+
# @!visibility private
|
11
|
+
# L2Penalty is a class that applies L2 penalty to weight vector of linear model.
|
12
|
+
# This class is used internally.
|
13
|
+
class L2Penalty
|
14
|
+
# @!visibility private
|
15
|
+
def initialize(reg_param:)
|
16
|
+
@reg_param = reg_param
|
17
|
+
end
|
18
|
+
|
19
|
+
# @!visibility private
|
20
|
+
def call(weight, lr)
|
21
|
+
weight - @reg_param * lr * weight
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# @!visibility private
|
26
|
+
# L1Penalty is a class that applies L1 penalty to weight vector of linear model.
|
27
|
+
# This class is used internally.
|
28
|
+
class L1Penalty
|
29
|
+
# @!visibility private
|
30
|
+
def initialize(reg_param:)
|
31
|
+
@u = 0.0
|
32
|
+
@reg_param = reg_param
|
33
|
+
end
|
34
|
+
|
35
|
+
# @!visibility private
|
36
|
+
def call(weight, lr)
|
37
|
+
@q_vec ||= Numo::DFloat.zeros(weight.shape[0])
|
38
|
+
@u += @reg_param * lr
|
39
|
+
z = weight.dup
|
40
|
+
gt = weight.gt(0)
|
41
|
+
lt = weight.lt(0)
|
42
|
+
weight[gt] = Numo::DFloat.maximum(0.0, weight[gt] - (@u + @q_vec[gt])) if gt.count.positive?
|
43
|
+
weight[lt] = Numo::DFloat.minimum(0.0, weight[lt] + (@u - @q_vec[lt])) if lt.count.positive?
|
44
|
+
@q_vec += weight - z
|
45
|
+
weight
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# @!visibility private
|
51
|
+
# This module consists of the class that implements stochastic gradient descent (SGD) optimizer.
|
52
|
+
module Optimizer
|
53
|
+
# @!visibility private
|
54
|
+
# SGD is a class that implements SGD optimizer.
|
55
|
+
# This class is used internally.
|
56
|
+
class SGD
|
57
|
+
# @!visibility private
|
58
|
+
# Create a new SGD optimizer.
|
59
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
60
|
+
# @param momentum [Float] The initial value of momentum.
|
61
|
+
# @param decay [Float] The smooting parameter.
|
62
|
+
def initialize(learning_rate: 0.01, momentum: 0.0, decay: 0.0)
|
63
|
+
@learning_rate = learning_rate
|
64
|
+
@momentum = momentum
|
65
|
+
@decay = decay
|
66
|
+
@iter = 0
|
67
|
+
end
|
68
|
+
|
69
|
+
# @!visibility private
|
70
|
+
def current_learning_rate
|
71
|
+
@learning_rate / (1.0 + @decay * @iter)
|
72
|
+
end
|
73
|
+
|
74
|
+
# @!visibility private
|
75
|
+
def call(weight, gradient)
|
76
|
+
@update ||= Numo::DFloat.zeros(weight.shape[0])
|
77
|
+
@update = @momentum * @update - current_learning_rate * gradient
|
78
|
+
@iter += 1
|
79
|
+
weight + @update
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# @!visibility private
|
85
|
+
# This module consists of the classes that implement loss function for linear model.
|
86
|
+
module Loss
|
87
|
+
# @!visibility private
|
88
|
+
# MeanSquaredError is a class that calculates mean squared error for linear regression model.
|
89
|
+
class MeanSquaredError
|
90
|
+
# @!visibility private
|
91
|
+
def loss(out, y)
|
92
|
+
((out - y)**2).sum.fdiv(y.shape[0])
|
93
|
+
end
|
94
|
+
|
95
|
+
# @!visibility private
|
96
|
+
def dloss(out, y)
|
97
|
+
2.fdiv(y.shape[0]) * (out - y)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# @!visibility private
|
102
|
+
# LogLoss is a class that calculates logistic loss for logistic regression.
|
103
|
+
class LogLoss
|
104
|
+
# @!visibility private
|
105
|
+
def loss(out, y)
|
106
|
+
Numo::NMath.log(1 + Numo::NMath.exp(-y * out)).sum.fdiv(y.shape[0])
|
107
|
+
end
|
108
|
+
|
109
|
+
# @!visibility private
|
110
|
+
def dloss(out, y)
|
111
|
+
y / (1 + Numo::NMath.exp(-y * out)) - y
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# @!visibility private
|
116
|
+
# HingeLoss is a class that calculates hinge loss for support vector classifier.
|
117
|
+
class HingeLoss
|
118
|
+
# @!visibility private
|
119
|
+
def loss(out, y)
|
120
|
+
out.class.maximum(0.0, 1 - y * out).sum.fdiv(y.shape[0])
|
121
|
+
end
|
122
|
+
|
123
|
+
# @!visibility private
|
124
|
+
def dloss(out, y)
|
125
|
+
tids = (y * out).lt(1)
|
126
|
+
d = Numo::DFloat.zeros(y.shape[0])
|
127
|
+
d[tids] = -y[tids] if tids.count.positive?
|
128
|
+
d
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @!visibility private
|
133
|
+
# EpsilonInsensitive is a class that calculates epsilon insensitive for support vector regressor.
|
134
|
+
class EpsilonInsensitive
|
135
|
+
# @!visibility private
|
136
|
+
def initialize(epsilon: 0.1)
|
137
|
+
@epsilon = epsilon
|
138
|
+
end
|
139
|
+
|
140
|
+
# @!visibility private
|
141
|
+
def loss(out, y)
|
142
|
+
out.class.maximum(0.0, (y - out).abs - @epsilon).sum.fdiv(y.shape[0])
|
143
|
+
end
|
144
|
+
|
145
|
+
# @!visibility private
|
146
|
+
def dloss(out, y)
|
147
|
+
d = Numo::DFloat.zeros(y.shape[0])
|
148
|
+
tids = (out - y).gt(@epsilon)
|
149
|
+
d[tids] = 1 if tids.count.positive?
|
150
|
+
tids = (y - out).gt(@epsilon)
|
151
|
+
d[tids] = -1 if tids.count.positive?
|
152
|
+
d
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# BaseSGD is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
|
158
|
+
# This class is used internally.
|
159
|
+
class BaseSGD < ::Rumale::Base::Estimator
|
160
|
+
# Create an initial linear model.
|
161
|
+
def initialize
|
162
|
+
super()
|
163
|
+
@params = {
|
164
|
+
learning_rate: 0.01,
|
165
|
+
decay: nil,
|
166
|
+
momentum: 0.0,
|
167
|
+
bias_scale: 1.0,
|
168
|
+
fit_bias: true,
|
169
|
+
reg_param: 0.0,
|
170
|
+
l1_ratio: 0.0,
|
171
|
+
max_iter: 1000,
|
172
|
+
batch_size: 50,
|
173
|
+
tol: 0.0001,
|
174
|
+
verbose: false
|
175
|
+
}
|
176
|
+
end
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
L2_PENALTY = 'l2'
|
181
|
+
L1_PENALTY = 'l1'
|
182
|
+
ELASTICNET_PENALTY = 'elasticnet'
|
183
|
+
|
184
|
+
private_constant :L2_PENALTY, :L1_PENALTY, :ELASTICNET_PENALTY
|
185
|
+
|
186
|
+
def partial_fit(x, y)
|
187
|
+
class_name = self.class.to_s.split('::').last if @params[:verbose]
|
188
|
+
narr = x.class
|
189
|
+
# Expand feature vectors for bias term.
|
190
|
+
x = expand_feature(x) if fit_bias?
|
191
|
+
# Initialize some variables.
|
192
|
+
sub_rng = @rng.dup
|
193
|
+
n_samples, n_features = x.shape
|
194
|
+
weight = Numo::DFloat.zeros(n_features)
|
195
|
+
optimizer = ::Rumale::LinearModel::Optimizer::SGD.new(
|
196
|
+
learning_rate: @params[:learning_rate], momentum: @params[:momentum], decay: @params[:decay]
|
197
|
+
)
|
198
|
+
l2_penalty = ::Rumale::LinearModel::Penalty::L2Penalty.new(reg_param: l2_reg_param) if apply_l2_penalty?
|
199
|
+
l1_penalty = ::Rumale::LinearModel::Penalty::L1Penalty.new(reg_param: l1_reg_param) if apply_l1_penalty?
|
200
|
+
# Optimization.
|
201
|
+
@params[:max_iter].times do |t|
|
202
|
+
sample_ids = Array(0...n_samples)
|
203
|
+
sample_ids.shuffle!(random: sub_rng)
|
204
|
+
until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
|
205
|
+
# sampling
|
206
|
+
sub_x = x[subset_ids, true]
|
207
|
+
sub_y = y[subset_ids]
|
208
|
+
# calculate gradient
|
209
|
+
dloss = @loss_func.dloss(sub_x.dot(weight), sub_y)
|
210
|
+
dloss = narr.minimum(1e12, narr.maximum(-1e12, dloss))
|
211
|
+
gradient = dloss.dot(sub_x)
|
212
|
+
# update weight
|
213
|
+
lr = optimizer.current_learning_rate
|
214
|
+
weight = optimizer.call(weight, gradient)
|
215
|
+
# l2 regularization
|
216
|
+
weight = l2_penalty.call(weight, lr) if apply_l2_penalty?
|
217
|
+
# l1 regularization
|
218
|
+
weight = l1_penalty.call(weight, lr) if apply_l1_penalty?
|
219
|
+
end
|
220
|
+
loss = @loss_func.loss(x.dot(weight), y)
|
221
|
+
puts "[#{class_name}] Loss after #{t + 1} epochs: #{loss}" if @params[:verbose]
|
222
|
+
break if loss < @params[:tol]
|
223
|
+
end
|
224
|
+
split_weight(weight)
|
225
|
+
end
|
226
|
+
|
227
|
+
def expand_feature(x)
|
228
|
+
n_samples = x.shape[0]
|
229
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
230
|
+
end
|
231
|
+
|
232
|
+
def split_weight(weight)
|
233
|
+
if fit_bias?
|
234
|
+
[weight[0...-1].dup, weight[-1]]
|
235
|
+
else
|
236
|
+
[weight, 0.0]
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def fit_bias?
|
241
|
+
@params[:fit_bias] == true
|
242
|
+
end
|
243
|
+
|
244
|
+
def apply_l2_penalty?
|
245
|
+
@penalty_type == L2_PENALTY || @penalty_type == ELASTICNET_PENALTY
|
246
|
+
end
|
247
|
+
|
248
|
+
def apply_l1_penalty?
|
249
|
+
@penalty_type == L1_PENALTY || @penalty_type == ELASTICNET_PENALTY
|
250
|
+
end
|
251
|
+
|
252
|
+
def l2_reg_param
|
253
|
+
case @penalty_type
|
254
|
+
when ELASTICNET_PENALTY
|
255
|
+
@params[:reg_param] * (1.0 - @params[:l1_ratio])
|
256
|
+
when L2_PENALTY
|
257
|
+
@params[:reg_param]
|
258
|
+
else
|
259
|
+
0.0
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def l1_reg_param
|
264
|
+
case @penalty_type
|
265
|
+
when ELASTICNET_PENALTY
|
266
|
+
@params[:reg_param] * @params[:l1_ratio]
|
267
|
+
when L1_PENALTY
|
268
|
+
@params[:reg_param]
|
269
|
+
else
|
270
|
+
0.0
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/regressor'
|
4
|
+
require 'rumale/validation'
|
5
|
+
require 'rumale/linear_model/base_sgd'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module LinearModel
|
9
|
+
# ElasticNet is a class that implements Elastic-net Regression
|
10
|
+
# with stochastic gradient descent (SGD) optimization.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/linear_model/elastic_net'
|
14
|
+
#
|
15
|
+
# estimator =
|
16
|
+
# Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 1000, batch_size: 50, random_seed: 1)
|
17
|
+
# estimator.fit(training_samples, traininig_values)
|
18
|
+
# results = estimator.predict(testing_samples)
|
19
|
+
#
|
20
|
+
# *Reference*
|
21
|
+
# - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
22
|
+
# - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
|
23
|
+
# - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
24
|
+
class ElasticNet < BaseSGD
|
25
|
+
include ::Rumale::Base::Regressor
|
26
|
+
|
27
|
+
# Return the weight vector.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
29
|
+
attr_reader :weight_vec
|
30
|
+
|
31
|
+
# Return the bias term (a.k.a. intercept).
|
32
|
+
# @return [Numo::DFloat] (shape: [n_outputs])
|
33
|
+
attr_reader :bias_term
|
34
|
+
|
35
|
+
# Return the random generator for random sampling.
|
36
|
+
# @return [Random]
|
37
|
+
attr_reader :rng
|
38
|
+
|
39
|
+
# Create a new Elastic-net regressor.
|
40
|
+
#
|
41
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
42
|
+
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
43
|
+
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
44
|
+
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
45
|
+
# @param momentum [Float] The momentum factor.
|
46
|
+
# @param reg_param [Float] The regularization parameter.
|
47
|
+
# @param l1_ratio [Float] The elastic-net mixing parameter.
|
48
|
+
# If l1_ratio = 1, the regularization is similar to Lasso.
|
49
|
+
# If l1_ratio = 0, the regularization is similar to Ridge.
|
50
|
+
# If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
|
51
|
+
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
52
|
+
# @param bias_scale [Float] The scale of the bias term.
|
53
|
+
# @param max_iter [Integer] The maximum number of epochs that indicates
|
54
|
+
# how many times the whole data is given to the training process.
|
55
|
+
# @param batch_size [Integer] The size of the mini batches.
|
56
|
+
# @param tol [Float] The tolerance of loss for terminating optimization.
|
57
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
58
|
+
# If nil is given, the method does not execute in parallel.
|
59
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
60
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
61
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
62
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
63
|
+
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
64
|
+
reg_param: 1.0, l1_ratio: 0.5, fit_bias: true, bias_scale: 1.0,
|
65
|
+
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
66
|
+
n_jobs: nil, verbose: false, random_seed: nil)
|
67
|
+
super()
|
68
|
+
@params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
|
69
|
+
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
70
|
+
@params[:random_seed] ||= srand
|
71
|
+
@rng = Random.new(@params[:random_seed])
|
72
|
+
@penalty_type = ELASTICNET_PENALTY
|
73
|
+
@loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
|
74
|
+
end
|
75
|
+
|
76
|
+
# Fit the model with given training data.
|
77
|
+
#
|
78
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
79
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
80
|
+
# @return [ElasticNet] The learned regressor itself.
|
81
|
+
def fit(x, y)
|
82
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
83
|
+
y = ::Rumale::Validation.check_convert_target_value_array(y)
|
84
|
+
::Rumale::Validation.check_sample_size(x, y)
|
85
|
+
|
86
|
+
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
87
|
+
n_features = x.shape[1]
|
88
|
+
|
89
|
+
if n_outputs > 1
|
90
|
+
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
91
|
+
@bias_term = Numo::DFloat.zeros(n_outputs)
|
92
|
+
if enable_parallel?
|
93
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
94
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
95
|
+
else
|
96
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
97
|
+
end
|
98
|
+
else
|
99
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
100
|
+
end
|
101
|
+
self
|
102
|
+
end
|
103
|
+
|
104
|
+
# Predict values for samples.
|
105
|
+
#
|
106
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
107
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
108
|
+
def predict(x)
|
109
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
110
|
+
|
111
|
+
x.dot(@weight_vec.transpose) + @bias_term
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/regressor'
|
4
|
+
require 'rumale/validation'
|
5
|
+
require 'rumale/linear_model/base_sgd'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module LinearModel
|
9
|
+
# Lasso is a class that implements Lasso Regression
|
10
|
+
# with stochastic gradient descent (SGD) optimization.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/linear_model/lasso'
|
14
|
+
#
|
15
|
+
# estimator =
|
16
|
+
# Rumale::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
17
|
+
# estimator.fit(training_samples, traininig_values)
|
18
|
+
# results = estimator.predict(testing_samples)
|
19
|
+
#
|
20
|
+
# *Reference*
|
21
|
+
# - Shalev-Shwartz, S., and Singer, Y., "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
22
|
+
# - Tsuruoka, Y., Tsujii, J., and Ananiadou, S., "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
|
23
|
+
# - Bottou, L., "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
24
|
+
class Lasso < BaseSGD
|
25
|
+
include ::Rumale::Base::Regressor
|
26
|
+
|
27
|
+
# Return the weight vector.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
29
|
+
attr_reader :weight_vec
|
30
|
+
|
31
|
+
# Return the bias term (a.k.a. intercept).
|
32
|
+
# @return [Numo::DFloat] (shape: [n_outputs])
|
33
|
+
attr_reader :bias_term
|
34
|
+
|
35
|
+
# Return the random generator for random sampling.
|
36
|
+
# @return [Random]
|
37
|
+
attr_reader :rng
|
38
|
+
|
39
|
+
# Create a new Lasso regressor.
|
40
|
+
#
|
41
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
42
|
+
# The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
|
43
|
+
# @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
|
44
|
+
# If nil is given, the decay sets to 'reg_param * learning_rate'.
|
45
|
+
# @param momentum [Float] The momentum factor.
|
46
|
+
# @param reg_param [Float] The regularization parameter.
|
47
|
+
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
48
|
+
# @param bias_scale [Float] The scale of the bias term.
|
49
|
+
# @param max_iter [Integer] The maximum number of epochs that indicates
|
50
|
+
# how many times the whole data is given to the training process.
|
51
|
+
# @param batch_size [Integer] The size of the mini batches.
|
52
|
+
# @param tol [Float] The tolerance of loss for terminating optimization.
|
53
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
54
|
+
# If nil is given, the method does not execute in parallel.
|
55
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
56
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
57
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
58
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
|
+
def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
|
60
|
+
reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
|
61
|
+
max_iter: 1000, batch_size: 50, tol: 1e-4,
|
62
|
+
n_jobs: nil, verbose: false, random_seed: nil)
|
63
|
+
super()
|
64
|
+
@params.merge!(method(:initialize).parameters.to_h { |_t, arg| [arg, binding.local_variable_get(arg)] })
|
65
|
+
@params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
|
66
|
+
@params[:random_seed] ||= srand
|
67
|
+
@rng = Random.new(@params[:random_seed])
|
68
|
+
@penalty_type = L1_PENALTY
|
69
|
+
@loss_func = ::Rumale::LinearModel::Loss::MeanSquaredError.new
|
70
|
+
end
|
71
|
+
|
72
|
+
# Fit the model with given training data.
|
73
|
+
#
|
74
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
75
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
76
|
+
# @return [Lasso] The learned regressor itself.
|
77
|
+
def fit(x, y)
|
78
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
79
|
+
y = ::Rumale::Validation.check_convert_target_value_array(y)
|
80
|
+
::Rumale::Validation.check_sample_size(x, y)
|
81
|
+
|
82
|
+
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
83
|
+
n_features = x.shape[1]
|
84
|
+
|
85
|
+
if n_outputs > 1
|
86
|
+
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
87
|
+
@bias_term = Numo::DFloat.zeros(n_outputs)
|
88
|
+
if enable_parallel?
|
89
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
90
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
91
|
+
else
|
92
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
93
|
+
end
|
94
|
+
else
|
95
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
96
|
+
end
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
# Predict values for samples.
|
101
|
+
#
|
102
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
103
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
104
|
+
def predict(x)
|
105
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
106
|
+
|
107
|
+
x.dot(@weight_vec.transpose) + @bias_term
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|