rumale 0.16.0 → 0.16.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e97522eaf22db6c80513f8d69513b6aef7dc1a9f
4
- data.tar.gz: a2c2d51a15465f5bac67bc0b60aef1c70067bba4
2
+ SHA256:
3
+ metadata.gz: 30476b58c5c5b39567f1cb3c8346a7c354fbf8d30401555fa2e02995021b759d
4
+ data.tar.gz: 6f664b0c279e0fef2dc47e608cdc2737318274b45017d6d60f0dd516aa2ebb48
5
5
  SHA512:
6
- metadata.gz: b04158c2f4247c58593dad54e1d8c79030aff2a3826032610b9d7afee73490843caa3c5636c86fc3fc3196ed45affdb2da17c7d926091f77c784f1970464c2a4
7
- data.tar.gz: e75814bcbe3aa087f2a45a15790a180016147d9197442d5d329a6484d1baee8b4d2726265d640a3d47ada45189d6dfb3157f2fa5eec7e334824cf96bd8fba493
6
+ metadata.gz: aa51f865e4995901e5587e3089fae724a57022d96c95d2b852cfde99f85f9aae7035c4edfe6c4a7899c22674778e1bfc0332ef83b6f234a8c9e8aa982e55e833
7
+ data.tar.gz: 55e209725a0c716b1f450bed025fceefe36dafe278b96648ac60079e3968778840bc4e1e75ff4181abafc4f98eb93cc40d8e2e0e3b5bf078bc94cf8b9a5dc50d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ # 0.16.1
2
+ - Add regressor class for [ElasticNet](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/ElasticNet.html).
3
+ - Add new linear model abstract class.
4
+ - In version 0.17.0, all LinearModel estimators will be changed to use this new abstract class.
5
+ The major differences from the existing abstract class are that
6
+ the optimizer of LinearModel estimators is fixed to mini-batch SGD with momentum term,
7
+ the max_iter parameter indicates the number of epochs instead of the maximum number of iterations,
8
+ the fit_bias parameter is true by default, and elastic-net style regularization can be used.
9
+
1
10
  # 0.16.0
2
11
  ## Breaking changes
3
12
  - The meaning of the `max_iter` parameter of the multi-layer perceptron estimators
data/lib/rumale.rb CHANGED
@@ -27,12 +27,14 @@ require 'rumale/optimizer/yellow_fin'
27
27
  require 'rumale/pipeline/pipeline'
28
28
  require 'rumale/kernel_approximation/rbf'
29
29
  require 'rumale/linear_model/base_linear_model'
30
+ require 'rumale/linear_model/base_sgd'
30
31
  require 'rumale/linear_model/svc'
31
32
  require 'rumale/linear_model/svr'
32
33
  require 'rumale/linear_model/logistic_regression'
33
34
  require 'rumale/linear_model/linear_regression'
34
35
  require 'rumale/linear_model/ridge'
35
36
  require 'rumale/linear_model/lasso'
37
+ require 'rumale/linear_model/elastic_net'
36
38
  require 'rumale/kernel_machine/kernel_svc'
37
39
  require 'rumale/kernel_machine/kernel_pca'
38
40
  require 'rumale/kernel_machine/kernel_ridge'
@@ -33,7 +33,7 @@ module Rumale
33
33
  @params[:max_iter] = max_iter
34
34
  @params[:batch_size] = batch_size
35
35
  @params[:optimizer] = optimizer
36
- @params[:optimizer] ||= Optimizer::Nadam.new
36
+ @params[:optimizer] ||= Rumale::Optimizer::Nadam.new
37
37
  @params[:n_jobs] = n_jobs
38
38
  @params[:random_seed] = random_seed
39
39
  @params[:random_seed] ||= srand
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+
5
+ module Rumale
6
+ module LinearModel
7
+ # @!visibility private
8
+ # This module consists of the classes that implement penalty (regularization) term.
9
+ module Penalty
10
+ # @!visibility private
11
+ # L2Penalty is a class that applies L2 penalty to weight vector of linear model.
12
+ # This class is used internally.
13
+ class L2Penalty
14
+ # @!visibility private
15
+ def initialize(reg_param:)
16
+ @reg_param = reg_param
17
+ end
18
+
19
+ # @!visibility private
20
+ def call(weight, lr)
21
+ weight - @reg_param * lr * weight
22
+ end
23
+ end
24
+
25
+ # @!visibility private
26
+ # L1Penalty is a class that applies L1 penalty to weight vector of linear model.
27
+ # This class is used internally.
28
+ class L1Penalty
29
+ # @!visibility private
30
+ def initialize(reg_param:)
31
+ @q_vec = nil
32
+ @u = 0
33
+ @reg_param = reg_param
34
+ end
35
+
36
+ # @!visibility private
37
+ def call(weight, lr)
38
+ @q_vec ||= Numo::DFloat.zeros(weight.shape[0])
39
+ @u += @reg_param * lr
40
+ z = weight.dup
41
+ gt = weight.gt(0)
42
+ lt = weight.lt(0)
43
+ weight[gt] = Numo::DFloat.maximum(0.0, weight[gt] - (@u + @q_vec[gt])) if gt.count.positive?
44
+ weight[lt] = Numo::DFloat.minimum(0.0, weight[lt] + (@u - @q_vec[lt])) if lt.count.positive?
45
+ @q_vec += weight - z
46
+ weight
47
+ end
48
+ end
49
+ end
50
+
51
+ # @!visibility private
52
+ # This module consists of the class that implements stochastic gradient descent (SGD) optimizer.
53
+ module Optimizer
54
+ # @!visibility private
55
+ # SGD is a class that implements SGD optimizer.
56
+ # This class is used internally.
57
+ class SGD
58
+ # @!visibility private
59
+ # Create a new SGD optimizer.
60
+ # @param learning_rate [Float] The initial value of learning rate.
61
+ # @param momentum [Float] The initial value of momentum.
62
+ # @param decay [Float] The smooting parameter.
63
+ def initialize(learning_rate: 0.01, momentum: 0.0, decay: 0.0)
64
+ @learning_rate = learning_rate
65
+ @momentum = momentum
66
+ @decay = decay
67
+ @update = nil
68
+ @iter = 0
69
+ end
70
+
71
+ # @!visibility private
72
+ def current_learning_rate
73
+ @learning_rate / (1.0 + @decay * @iter)
74
+ end
75
+
76
+ # @!visibility private
77
+ def call(weight, gradient)
78
+ @update ||= Numo::DFloat.zeros(weight.shape[0])
79
+ @update = @momentum * @update - current_learning_rate * gradient
80
+ @iter += 1
81
+ weight + @update
82
+ end
83
+ end
84
+ end
85
+
86
+ # @!visibility private
87
+ # This module consists of the classes that implement loss function for linear model.
88
+ module Loss
89
+ # @!visibility private
90
+ # MeanSquaredError is a class that calculates mean squared error for linear regression model.
91
+ class MeanSquaredError
92
+ # @!visibility private
93
+ def loss(out, y)
94
+ ((out - y)**2).sum.fdiv(y.shape[0])
95
+ end
96
+
97
+ # @!visibility private
98
+ def dloss(out, y)
99
+ 2.fdiv(y.shape[0]) * (out - y)
100
+ end
101
+ end
102
+ end
103
+
104
+ # BaseSGD is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
105
+ # This class is used internally.
106
+ class BaseSGD
107
+ include Rumale::Base::BaseEstimator
108
+
109
+ # Create an initial linear model.
110
+ def initialize
111
+ @params = {}
112
+ @params[:learning_rate] = 0.01
113
+ @params[:decay] = nil
114
+ @params[:momentum] = 0.0
115
+ @params[:bias_scale] = 1.0
116
+ @params[:fit_bias] = true
117
+ @params[:reg_param] = 0.0
118
+ @params[:l1_ratio] = 0.0
119
+ @params[:max_iter] = 200
120
+ @params[:batch_size] = 50
121
+ @params[:tol] = 0.0001
122
+ @params[:verbose] = false
123
+ @penalty_type = nil
124
+ @loss_func = nil
125
+ @weight_vec = nil
126
+ @bias_term = nil
127
+ @n_iter = nil
128
+ @rng = nil
129
+ end
130
+
131
+ private
132
+
133
+ L2_PENALTY = 'l2'
134
+ L1_PENALTY = 'l1'
135
+ ELASTICNET_PENALTY = 'elasticnet'
136
+
137
+ private_constant :L2_PENALTY, :L1_PENALTY, :ELASTICNET_PENALTY
138
+
139
+ def partial_fit(x, y)
140
+ class_name = self.class.to_s.split('::').last if @params[:verbose]
141
+ narr = x.class
142
+ # Expand feature vectors for bias term.
143
+ x = expand_feature(x) if fit_bias?
144
+ # Initialize some variables.
145
+ sub_rng = @rng.dup
146
+ n_samples, n_features = x.shape
147
+ weight = Numo::DFloat.zeros(n_features)
148
+ optimizer = LinearModel::Optimizer::SGD.new(
149
+ learning_rate: @params[:learning_rate],
150
+ momentum: @params[:momentum],
151
+ decay: @params[:decay]
152
+ )
153
+ l2_penalty = LinearModel::Penalty::L2Penalty.new(reg_param: l2_reg_param) if apply_l2_penalty?
154
+ l1_penalty = LinearModel::Penalty::L1Penalty.new(reg_param: l1_reg_param) if apply_l1_penalty?
155
+ # Optimization.
156
+ @params[:max_iter].times do |t|
157
+ sample_ids = [*0...n_samples]
158
+ sample_ids.shuffle!(random: sub_rng)
159
+ until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
160
+ # sampling
161
+ sub_x = x[subset_ids, true]
162
+ sub_y = y[subset_ids]
163
+ # calculate gradient
164
+ dloss = @loss_func.dloss(sub_x.dot(weight), sub_y)
165
+ dloss = narr.minimum(1e12, narr.maximum(-1e12, dloss))
166
+ gradient = dloss.dot(sub_x)
167
+ # update weight
168
+ lr = optimizer.current_learning_rate
169
+ weight = optimizer.call(weight, gradient)
170
+ # l2 regularization
171
+ weight = l2_penalty.call(weight, lr) if apply_l2_penalty?
172
+ # l1 regularization
173
+ weight = l1_penalty.call(weight, lr) if apply_l1_penalty?
174
+ end
175
+ loss = @loss_func.loss(x.dot(weight), y)
176
+ puts "[#{class_name}] Loss after #{t + 1} epochs: #{loss}" if @params[:verbose]
177
+ break if loss < @params[:tol]
178
+ end
179
+ split_weight(weight)
180
+ end
181
+
182
+ def expand_feature(x)
183
+ n_samples = x.shape[0]
184
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
185
+ end
186
+
187
+ def split_weight(weight)
188
+ if fit_bias?
189
+ [weight[0...-1].dup, weight[-1]]
190
+ else
191
+ [weight, 0.0]
192
+ end
193
+ end
194
+
195
+ def fit_bias?
196
+ @params[:fit_bias] == true
197
+ end
198
+
199
+ def apply_l2_penalty?
200
+ @penalty_type == L2_PENALTY || @penalty_type == ELASTICNET_PENALTY
201
+ end
202
+
203
+ def apply_l1_penalty?
204
+ @penalty_type == L1_PENALTY || @penalty_type == ELASTICNET_PENALTY
205
+ end
206
+
207
+ def l2_reg_param
208
+ case @penalty_type
209
+ when ELASTICNET_PENALTY
210
+ @params[:reg_param] * (1.0 - @params[:l1_ratio])
211
+ when L2_PENALTY
212
+ @params[:reg_param]
213
+ else
214
+ 0.0
215
+ end
216
+ end
217
+
218
+ def l1_reg_param
219
+ case @penalty_type
220
+ when ELASTICNET_PENALTY
221
+ @params[:reg_param] * @params[:l1_ratio]
222
+ when L1_PENALTY
223
+ @params[:reg_param]
224
+ else
225
+ 0.0
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/linear_model/base_sgd'
4
+ require 'rumale/base/regressor'
5
+
6
+ module Rumale
7
+ module LinearModel
8
+ # ElasticNet is a class that implements Elastic-net Regression
9
+ # with stochastic gradient descent (SGD) optimization.
10
+ #
11
+ # @example
12
+ # estimator =
13
+ # Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 200, batch_size: 50, random_seed: 1)
14
+ # estimator.fit(training_samples, traininig_values)
15
+ # results = estimator.predict(testing_samples)
16
+ #
17
+ # *Reference*
18
+ # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
19
+ # - Y. Tsuruoka, J. Tsujii, and S. Ananiadou, "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
20
+ # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
21
+ class ElasticNet < BaseSGD
22
+ include Base::Regressor
23
+
24
+ # Return the weight vector.
25
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
26
+ attr_reader :weight_vec
27
+
28
+ # Return the bias term (a.k.a. intercept).
29
+ # @return [Numo::DFloat] (shape: [n_outputs])
30
+ attr_reader :bias_term
31
+
32
+ # Return the random generator for random sampling.
33
+ # @return [Random]
34
+ attr_reader :rng
35
+
36
+ # Create a new Elastic-net regressor.
37
+ #
38
+ # @param learning_rate [Float] The initial value of learning rate.
39
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
40
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
41
+ # If nil is given, the decay sets to 'reg_param * learning_rate'.
42
+ # @param momentum [Float] The momentum factor.
43
+ # @param reg_param [Float] The regularization parameter.
44
+ # @param l1_ratio [Float] The elastic-net mixing parameter.
45
+ # If l1_ratio = 1, the regularization is similar to Lasso.
46
+ # If l1_ratio = 0, the regularization is similar to Ridge.
47
+ # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
48
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
49
+ # @param bias_scale [Float] The scale of the bias term.
50
+ # @param max_iter [Integer] The maximum number of epochs that indicates
51
+ # how many times the whole data is given to the training process.
52
+ # @param batch_size [Integer] The size of the mini batches.
53
+ # @param tol [Float] The tolerance of loss for terminating optimization.
54
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
55
+ # If nil is given, the method does not execute in parallel.
56
+ # If zero or less is given, it becomes equal to the number of processors.
57
+ # This parameter is ignored if the Parallel gem is not loaded.
58
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
59
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
60
+ def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
61
+ reg_param: 1.0, l1_ratio: 0.5, fit_bias: true, bias_scale: 1.0,
62
+ max_iter: 100, batch_size: 50, tol: 1e-4,
63
+ n_jobs: nil, verbose: false, random_seed: nil)
64
+ check_params_numeric(learning_rate: learning_rate, momentum: momentum,
65
+ reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
66
+ max_iter: max_iter, batch_size: batch_size, tol: tol)
67
+ check_params_boolean(fit_bias: fit_bias, verbose: verbose)
68
+ check_params_numeric_or_nil(decay: nil, n_jobs: n_jobs, random_seed: random_seed)
69
+ check_params_positive(learning_rate: learning_rate, reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
70
+ super()
71
+ @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
72
+ @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
73
+ @params[:random_seed] ||= srand
74
+ @rng = Random.new(@params[:random_seed])
75
+ @penalty_type = ELASTICNET_PENALTY
76
+ @loss_func = LinearModel::Loss::MeanSquaredError.new
77
+ @weight_vec = nil
78
+ @bias_term = nil
79
+ end
80
+
81
+ # Fit the model with given training data.
82
+ #
83
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
84
+ # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
85
+ # @return [ElasticNet] The learned regressor itself.
86
+ def fit(x, y)
87
+ x = check_convert_sample_array(x)
88
+ y = check_convert_tvalue_array(y)
89
+ check_sample_tvalue_size(x, y)
90
+
91
+ n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
92
+ n_features = x.shape[1]
93
+
94
+ if n_outputs > 1
95
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
96
+ @bias_term = Numo::DFloat.zeros(n_outputs)
97
+ if enable_parallel?
98
+ models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
99
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
100
+ else
101
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
102
+ end
103
+ else
104
+ @weight_vec, @bias_term = partial_fit(x, y)
105
+ end
106
+ self
107
+ end
108
+
109
+ # Predict values for samples.
110
+ #
111
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
112
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
113
+ def predict(x)
114
+ x = check_convert_sample_array(x)
115
+ x.dot(@weight_vec.transpose) + @bias_term
116
+ end
117
+ end
118
+ end
119
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.16.0'
6
+ VERSION = '0.16.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-04 00:00:00.000000000 Z
11
+ date: 2020-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -214,6 +214,8 @@ files:
214
214
  - lib/rumale/kernel_machine/kernel_ridge.rb
215
215
  - lib/rumale/kernel_machine/kernel_svc.rb
216
216
  - lib/rumale/linear_model/base_linear_model.rb
217
+ - lib/rumale/linear_model/base_sgd.rb
218
+ - lib/rumale/linear_model/elastic_net.rb
217
219
  - lib/rumale/linear_model/lasso.rb
218
220
  - lib/rumale/linear_model/linear_regression.rb
219
221
  - lib/rumale/linear_model/logistic_regression.rb
@@ -294,8 +296,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
294
296
  - !ruby/object:Gem::Version
295
297
  version: '0'
296
298
  requirements: []
297
- rubyforge_project:
298
- rubygems_version: 2.6.14.4
299
+ rubygems_version: 3.1.2
299
300
  signing_key:
300
301
  specification_version: 4
301
302
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning