rumale 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e97522eaf22db6c80513f8d69513b6aef7dc1a9f
4
- data.tar.gz: a2c2d51a15465f5bac67bc0b60aef1c70067bba4
2
+ SHA256:
3
+ metadata.gz: 30476b58c5c5b39567f1cb3c8346a7c354fbf8d30401555fa2e02995021b759d
4
+ data.tar.gz: 6f664b0c279e0fef2dc47e608cdc2737318274b45017d6d60f0dd516aa2ebb48
5
5
  SHA512:
6
- metadata.gz: b04158c2f4247c58593dad54e1d8c79030aff2a3826032610b9d7afee73490843caa3c5636c86fc3fc3196ed45affdb2da17c7d926091f77c784f1970464c2a4
7
- data.tar.gz: e75814bcbe3aa087f2a45a15790a180016147d9197442d5d329a6484d1baee8b4d2726265d640a3d47ada45189d6dfb3157f2fa5eec7e334824cf96bd8fba493
6
+ metadata.gz: aa51f865e4995901e5587e3089fae724a57022d96c95d2b852cfde99f85f9aae7035c4edfe6c4a7899c22674778e1bfc0332ef83b6f234a8c9e8aa982e55e833
7
+ data.tar.gz: 55e209725a0c716b1f450bed025fceefe36dafe278b96648ac60079e3968778840bc4e1e75ff4181abafc4f98eb93cc40d8e2e0e3b5bf078bc94cf8b9a5dc50d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ # 0.16.1
2
+ - Add regressor class for [ElasticNet](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/ElasticNet.html).
3
+ - Add new linear model abstract class.
4
+ - In version 0.17.0, all LinearModel estimators will be changed to use this new abstract class.
5
+ The major differences from the existing abstract class are that
6
+ the optimizer of LinearModel estimators is fixed to mini-batch SGD with momentum term,
7
+ the max_iter parameter indicates the number of epochs instead of the maximum number of iterations,
8
+ the fit_bias parameter is true by default, and elastic-net style regularization can be used.
9
+
1
10
  # 0.16.0
2
11
  ## Breaking changes
3
12
  - The meaning of the `max_iter` parameter of the multi-layer perceptron estimators
data/lib/rumale.rb CHANGED
@@ -27,12 +27,14 @@ require 'rumale/optimizer/yellow_fin'
27
27
  require 'rumale/pipeline/pipeline'
28
28
  require 'rumale/kernel_approximation/rbf'
29
29
  require 'rumale/linear_model/base_linear_model'
30
+ require 'rumale/linear_model/base_sgd'
30
31
  require 'rumale/linear_model/svc'
31
32
  require 'rumale/linear_model/svr'
32
33
  require 'rumale/linear_model/logistic_regression'
33
34
  require 'rumale/linear_model/linear_regression'
34
35
  require 'rumale/linear_model/ridge'
35
36
  require 'rumale/linear_model/lasso'
37
+ require 'rumale/linear_model/elastic_net'
36
38
  require 'rumale/kernel_machine/kernel_svc'
37
39
  require 'rumale/kernel_machine/kernel_pca'
38
40
  require 'rumale/kernel_machine/kernel_ridge'
@@ -33,7 +33,7 @@ module Rumale
33
33
  @params[:max_iter] = max_iter
34
34
  @params[:batch_size] = batch_size
35
35
  @params[:optimizer] = optimizer
36
- @params[:optimizer] ||= Optimizer::Nadam.new
36
+ @params[:optimizer] ||= Rumale::Optimizer::Nadam.new
37
37
  @params[:n_jobs] = n_jobs
38
38
  @params[:random_seed] = random_seed
39
39
  @params[:random_seed] ||= srand
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+
5
+ module Rumale
6
+ module LinearModel
7
+ # @!visibility private
8
+ # This module consists of the classes that implement penalty (regularization) term.
9
+ module Penalty
10
+ # @!visibility private
11
+ # L2Penalty is a class that applies L2 penalty to weight vector of linear model.
12
+ # This class is used internally.
13
+ class L2Penalty
14
+ # @!visibility private
15
+ def initialize(reg_param:)
16
+ @reg_param = reg_param
17
+ end
18
+
19
+ # @!visibility private
20
+ def call(weight, lr)
21
+ weight - @reg_param * lr * weight
22
+ end
23
+ end
24
+
25
+ # @!visibility private
26
+ # L1Penalty is a class that applies L1 penalty to weight vector of linear model.
27
+ # This class is used internally.
28
+ class L1Penalty
29
+ # @!visibility private
30
+ def initialize(reg_param:)
31
+ @q_vec = nil
32
+ @u = 0
33
+ @reg_param = reg_param
34
+ end
35
+
36
+ # @!visibility private
37
+ def call(weight, lr)
38
+ @q_vec ||= Numo::DFloat.zeros(weight.shape[0])
39
+ @u += @reg_param * lr
40
+ z = weight.dup
41
+ gt = weight.gt(0)
42
+ lt = weight.lt(0)
43
+ weight[gt] = Numo::DFloat.maximum(0.0, weight[gt] - (@u + @q_vec[gt])) if gt.count.positive?
44
+ weight[lt] = Numo::DFloat.minimum(0.0, weight[lt] + (@u - @q_vec[lt])) if lt.count.positive?
45
+ @q_vec += weight - z
46
+ weight
47
+ end
48
+ end
49
+ end
50
+
51
+ # @!visibility private
52
+ # This module consists of the class that implements stochastic gradient descent (SGD) optimizer.
53
+ module Optimizer
54
+ # @!visibility private
55
+ # SGD is a class that implements SGD optimizer.
56
+ # This class is used internally.
57
+ class SGD
58
+ # @!visibility private
59
+ # Create a new SGD optimizer.
60
+ # @param learning_rate [Float] The initial value of learning rate.
61
+ # @param momentum [Float] The initial value of momentum.
62
+ # @param decay [Float] The smooting parameter.
63
+ def initialize(learning_rate: 0.01, momentum: 0.0, decay: 0.0)
64
+ @learning_rate = learning_rate
65
+ @momentum = momentum
66
+ @decay = decay
67
+ @update = nil
68
+ @iter = 0
69
+ end
70
+
71
+ # @!visibility private
72
+ def current_learning_rate
73
+ @learning_rate / (1.0 + @decay * @iter)
74
+ end
75
+
76
+ # @!visibility private
77
+ def call(weight, gradient)
78
+ @update ||= Numo::DFloat.zeros(weight.shape[0])
79
+ @update = @momentum * @update - current_learning_rate * gradient
80
+ @iter += 1
81
+ weight + @update
82
+ end
83
+ end
84
+ end
85
+
86
+ # @!visibility private
87
+ # This module consists of the classes that implement loss function for linear model.
88
+ module Loss
89
+ # @!visibility private
90
+ # MeanSquaredError is a class that calculates mean squared error for linear regression model.
91
+ class MeanSquaredError
92
+ # @!visibility private
93
+ def loss(out, y)
94
+ ((out - y)**2).sum.fdiv(y.shape[0])
95
+ end
96
+
97
+ # @!visibility private
98
+ def dloss(out, y)
99
+ 2.fdiv(y.shape[0]) * (out - y)
100
+ end
101
+ end
102
+ end
103
+
104
+ # BaseSGD is an abstract class for implementation of linear model with mini-batch stochastic gradient descent (SGD) optimization.
105
+ # This class is used internally.
106
+ class BaseSGD
107
+ include Rumale::Base::BaseEstimator
108
+
109
+ # Create an initial linear model.
110
+ def initialize
111
+ @params = {}
112
+ @params[:learning_rate] = 0.01
113
+ @params[:decay] = nil
114
+ @params[:momentum] = 0.0
115
+ @params[:bias_scale] = 1.0
116
+ @params[:fit_bias] = true
117
+ @params[:reg_param] = 0.0
118
+ @params[:l1_ratio] = 0.0
119
+ @params[:max_iter] = 200
120
+ @params[:batch_size] = 50
121
+ @params[:tol] = 0.0001
122
+ @params[:verbose] = false
123
+ @penalty_type = nil
124
+ @loss_func = nil
125
+ @weight_vec = nil
126
+ @bias_term = nil
127
+ @n_iter = nil
128
+ @rng = nil
129
+ end
130
+
131
+ private
132
+
133
+ L2_PENALTY = 'l2'
134
+ L1_PENALTY = 'l1'
135
+ ELASTICNET_PENALTY = 'elasticnet'
136
+
137
+ private_constant :L2_PENALTY, :L1_PENALTY, :ELASTICNET_PENALTY
138
+
139
+ def partial_fit(x, y)
140
+ class_name = self.class.to_s.split('::').last if @params[:verbose]
141
+ narr = x.class
142
+ # Expand feature vectors for bias term.
143
+ x = expand_feature(x) if fit_bias?
144
+ # Initialize some variables.
145
+ sub_rng = @rng.dup
146
+ n_samples, n_features = x.shape
147
+ weight = Numo::DFloat.zeros(n_features)
148
+ optimizer = LinearModel::Optimizer::SGD.new(
149
+ learning_rate: @params[:learning_rate],
150
+ momentum: @params[:momentum],
151
+ decay: @params[:decay]
152
+ )
153
+ l2_penalty = LinearModel::Penalty::L2Penalty.new(reg_param: l2_reg_param) if apply_l2_penalty?
154
+ l1_penalty = LinearModel::Penalty::L1Penalty.new(reg_param: l1_reg_param) if apply_l1_penalty?
155
+ # Optimization.
156
+ @params[:max_iter].times do |t|
157
+ sample_ids = [*0...n_samples]
158
+ sample_ids.shuffle!(random: sub_rng)
159
+ until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
160
+ # sampling
161
+ sub_x = x[subset_ids, true]
162
+ sub_y = y[subset_ids]
163
+ # calculate gradient
164
+ dloss = @loss_func.dloss(sub_x.dot(weight), sub_y)
165
+ dloss = narr.minimum(1e12, narr.maximum(-1e12, dloss))
166
+ gradient = dloss.dot(sub_x)
167
+ # update weight
168
+ lr = optimizer.current_learning_rate
169
+ weight = optimizer.call(weight, gradient)
170
+ # l2 regularization
171
+ weight = l2_penalty.call(weight, lr) if apply_l2_penalty?
172
+ # l1 regularization
173
+ weight = l1_penalty.call(weight, lr) if apply_l1_penalty?
174
+ end
175
+ loss = @loss_func.loss(x.dot(weight), y)
176
+ puts "[#{class_name}] Loss after #{t + 1} epochs: #{loss}" if @params[:verbose]
177
+ break if loss < @params[:tol]
178
+ end
179
+ split_weight(weight)
180
+ end
181
+
182
+ def expand_feature(x)
183
+ n_samples = x.shape[0]
184
+ Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
185
+ end
186
+
187
+ def split_weight(weight)
188
+ if fit_bias?
189
+ [weight[0...-1].dup, weight[-1]]
190
+ else
191
+ [weight, 0.0]
192
+ end
193
+ end
194
+
195
+ def fit_bias?
196
+ @params[:fit_bias] == true
197
+ end
198
+
199
+ def apply_l2_penalty?
200
+ @penalty_type == L2_PENALTY || @penalty_type == ELASTICNET_PENALTY
201
+ end
202
+
203
+ def apply_l1_penalty?
204
+ @penalty_type == L1_PENALTY || @penalty_type == ELASTICNET_PENALTY
205
+ end
206
+
207
+ def l2_reg_param
208
+ case @penalty_type
209
+ when ELASTICNET_PENALTY
210
+ @params[:reg_param] * (1.0 - @params[:l1_ratio])
211
+ when L2_PENALTY
212
+ @params[:reg_param]
213
+ else
214
+ 0.0
215
+ end
216
+ end
217
+
218
+ def l1_reg_param
219
+ case @penalty_type
220
+ when ELASTICNET_PENALTY
221
+ @params[:reg_param] * @params[:l1_ratio]
222
+ when L1_PENALTY
223
+ @params[:reg_param]
224
+ else
225
+ 0.0
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/linear_model/base_sgd'
4
+ require 'rumale/base/regressor'
5
+
6
+ module Rumale
7
+ module LinearModel
8
+ # ElasticNet is a class that implements Elastic-net Regression
9
+ # with stochastic gradient descent (SGD) optimization.
10
+ #
11
+ # @example
12
+ # estimator =
13
+ # Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 200, batch_size: 50, random_seed: 1)
14
+ # estimator.fit(training_samples, traininig_values)
15
+ # results = estimator.predict(testing_samples)
16
+ #
17
+ # *Reference*
18
+ # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
19
+ # - Y. Tsuruoka, J. Tsujii, and S. Ananiadou, "Stochastic Gradient Descent Training for L1-regularized Log-linear Models with Cumulative Penalty," Proc. ACL'09, pp. 477--485, 2009.
20
+ # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
21
+ class ElasticNet < BaseSGD
22
+ include Base::Regressor
23
+
24
+ # Return the weight vector.
25
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
26
+ attr_reader :weight_vec
27
+
28
+ # Return the bias term (a.k.a. intercept).
29
+ # @return [Numo::DFloat] (shape: [n_outputs])
30
+ attr_reader :bias_term
31
+
32
+ # Return the random generator for random sampling.
33
+ # @return [Random]
34
+ attr_reader :rng
35
+
36
+ # Create a new Elastic-net regressor.
37
+ #
38
+ # @param learning_rate [Float] The initial value of learning rate.
39
+ # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
40
+ # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
41
+ # If nil is given, the decay sets to 'reg_param * learning_rate'.
42
+ # @param momentum [Float] The momentum factor.
43
+ # @param reg_param [Float] The regularization parameter.
44
+ # @param l1_ratio [Float] The elastic-net mixing parameter.
45
+ # If l1_ratio = 1, the regularization is similar to Lasso.
46
+ # If l1_ratio = 0, the regularization is similar to Ridge.
47
+ # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
48
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
49
+ # @param bias_scale [Float] The scale of the bias term.
50
+ # @param max_iter [Integer] The maximum number of epochs that indicates
51
+ # how many times the whole data is given to the training process.
52
+ # @param batch_size [Integer] The size of the mini batches.
53
+ # @param tol [Float] The tolerance of loss for terminating optimization.
54
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
55
+ # If nil is given, the method does not execute in parallel.
56
+ # If zero or less is given, it becomes equal to the number of processors.
57
+ # This parameter is ignored if the Parallel gem is not loaded.
58
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
59
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
60
+ def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
61
+ reg_param: 1.0, l1_ratio: 0.5, fit_bias: true, bias_scale: 1.0,
62
+ max_iter: 100, batch_size: 50, tol: 1e-4,
63
+ n_jobs: nil, verbose: false, random_seed: nil)
64
+ check_params_numeric(learning_rate: learning_rate, momentum: momentum,
65
+ reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
66
+ max_iter: max_iter, batch_size: batch_size, tol: tol)
67
+ check_params_boolean(fit_bias: fit_bias, verbose: verbose)
68
+ check_params_numeric_or_nil(decay: nil, n_jobs: n_jobs, random_seed: random_seed)
69
+ check_params_positive(learning_rate: learning_rate, reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
70
+ super()
71
+ @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
72
+ @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
73
+ @params[:random_seed] ||= srand
74
+ @rng = Random.new(@params[:random_seed])
75
+ @penalty_type = ELASTICNET_PENALTY
76
+ @loss_func = LinearModel::Loss::MeanSquaredError.new
77
+ @weight_vec = nil
78
+ @bias_term = nil
79
+ end
80
+
81
+ # Fit the model with given training data.
82
+ #
83
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
84
+ # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
85
+ # @return [ElasticNet] The learned regressor itself.
86
+ def fit(x, y)
87
+ x = check_convert_sample_array(x)
88
+ y = check_convert_tvalue_array(y)
89
+ check_sample_tvalue_size(x, y)
90
+
91
+ n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
92
+ n_features = x.shape[1]
93
+
94
+ if n_outputs > 1
95
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
96
+ @bias_term = Numo::DFloat.zeros(n_outputs)
97
+ if enable_parallel?
98
+ models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
99
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
100
+ else
101
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
102
+ end
103
+ else
104
+ @weight_vec, @bias_term = partial_fit(x, y)
105
+ end
106
+ self
107
+ end
108
+
109
+ # Predict values for samples.
110
+ #
111
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
112
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
113
+ def predict(x)
114
+ x = check_convert_sample_array(x)
115
+ x.dot(@weight_vec.transpose) + @bias_term
116
+ end
117
+ end
118
+ end
119
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.16.0'
6
+ VERSION = '0.16.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-04 00:00:00.000000000 Z
11
+ date: 2020-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -214,6 +214,8 @@ files:
214
214
  - lib/rumale/kernel_machine/kernel_ridge.rb
215
215
  - lib/rumale/kernel_machine/kernel_svc.rb
216
216
  - lib/rumale/linear_model/base_linear_model.rb
217
+ - lib/rumale/linear_model/base_sgd.rb
218
+ - lib/rumale/linear_model/elastic_net.rb
217
219
  - lib/rumale/linear_model/lasso.rb
218
220
  - lib/rumale/linear_model/linear_regression.rb
219
221
  - lib/rumale/linear_model/logistic_regression.rb
@@ -294,8 +296,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
294
296
  - !ruby/object:Gem::Version
295
297
  version: '0'
296
298
  requirements: []
297
- rubyforge_project:
298
- rubygems_version: 2.6.14.4
299
+ rubygems_version: 3.1.2
299
300
  signing_key:
300
301
  specification_version: 4
301
302
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning