svmkit 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c0a64cc46c00a252946033b072b4d9498fb4d5cf7131830333483a336c29315
4
- data.tar.gz: 1eb9415f08167772764f1eba4e67f6a3479768db75efcc32a8de85276440d41c
3
+ metadata.gz: cef050a2ac6b55583414cb3ce9c3678dd6d2d1c8b2be04a249222683e10465e1
4
+ data.tar.gz: 7c67ab0e90246f1d9b7e5d0bfb19ed76061d0edf17a05014f521b8ef41e41aed
5
5
  SHA512:
6
- metadata.gz: 2f994dad593e5b752c2a062507f849483a9e4dbdd90190313b672c2f8cd9c9ed102b2fc088823665812f44e6b549bc67cab7d16fb545031bae0b57e713c3c3c3
7
- data.tar.gz: d6da2f56721b8d264898fea922e2bee016987d898b4169f12bc6963044b69a4952f25d1b75380a93ce1fccd9854bb86e085c18fdf9535e273bd2eb1a328d3b98
6
+ metadata.gz: 15341450f3bf3ca49901ae55b507d647468261682c7fdb0b058c21a470c2eec261718b6721ca0e2ad7738cfdabd184128a588d68ad6d079e53c9b1e916efa2b1
7
+ data.tar.gz: fd562db538be12896c005840e065f867e342691e899b33f0524a4db26da33439bfc174141e022d4de3d805657d09e854a4593b9b05b2d9eb99f6cd41da064a1d
data/HISTORY.md CHANGED
@@ -1,3 +1,25 @@
1
+ # 0.4.0
2
+ ## Breaking changes
3
+
4
+ SVMKit introduces optimizer algorithm that calculates learning rates adaptively
5
+ on each iteration of stochastic gradient descent (SGD).
6
+ While Pegasos SGD runs fast, it sometimes fails to optimize complicated models
7
+ like Factorization Machine.
8
+ To solve this problem, in version 0.3.3, SVMKit introduced optimization with RMSProp on
9
+ FactorizationMachineRegressor, Ridge and Lasso.
10
+ This attempt realized stable optimization of those estimators.
11
+ Following the success of the attempt, author decided to use modern optimizer algorithms
12
+ with all SGD optimizations in SVMKit.
13
+ Through some preliminary experiments, author implemented Nadam as the default optimizer.
14
+ SVMKit plans to add other optimizer algorithms sequentially, so that users can select them.
15
+
16
+ - Fix to use Nadam for optimization on SVC, SVR, LogisticRegression, Ridge, Lasso, and Factorization Machine estimators.
17
+ - Combine reg_param_weight and reg_param_bias parameters on Factorization Machine estimators into the unified parameter named reg_param_linear.
18
+ - Remove init_std paramter on Factorization Machine estimators.
19
+ - Remove learning_rate, decay, and momentum parameters on Ridge, Lasso, and FactorizationMachineRegressor.
20
+ - Remove normalize parameter on SVC, SVR, and LogisticRegression.
21
+
22
+
1
23
  # 0.3.3
2
24
  - Add class for Ridge regressor.
3
25
  - Add class for Lasso regressor.
@@ -13,6 +13,7 @@ require 'svmkit/base/regressor'
13
13
  require 'svmkit/base/transformer'
14
14
  require 'svmkit/base/splitter'
15
15
  require 'svmkit/base/evaluator'
16
+ require 'svmkit/optimizer/nadam'
16
17
  require 'svmkit/kernel_approximation/rbf'
17
18
  require 'svmkit/linear_model/svc'
18
19
  require 'svmkit/linear_model/svr'
@@ -3,6 +3,7 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module LinearModel
@@ -11,15 +12,13 @@ module SVMKit
11
12
  #
12
13
  # @example
13
14
  # estimator =
14
- # SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 5000, batch_size: 50, random_seed: 1)
15
+ # SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
16
  # estimator.fit(training_samples, traininig_values)
16
17
  # results = estimator.predict(testing_samples)
17
18
  #
18
19
  # *Reference*
19
20
  # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
21
  # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
21
- # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
22
- # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
23
22
  class Lasso
24
23
  include Base::BaseEstimator
25
24
  include Base::Regressor
@@ -41,30 +40,23 @@ module SVMKit
41
40
  #
42
41
  # @param reg_param [Float] The regularization parameter.
43
42
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
44
- # @param learning_rate [Float] The learning rate for optimization.
45
- # @param decay [Float] The discounting factor for RMS prop optimization.
46
- # @param momentum [Float] The momentum for optimization.
47
43
  # @param max_iter [Integer] The maximum number of iterations.
48
44
  # @param batch_size [Integer] The size of the mini batches.
45
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
46
+ # Nadam is selected automatically on current version.
49
47
  # @param random_seed [Integer] The seed value using to initialize the random generator.
50
- def initialize(reg_param: 1.0, fit_bias: false, learning_rate: 0.01, decay: 0.9, momentum: 0.9,
51
- max_iter: 1000, batch_size: 10, random_seed: nil)
52
- check_params_float(reg_param: reg_param,
53
- learning_rate: learning_rate, decay: decay, momentum: momentum)
48
+ def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
49
+ check_params_float(reg_param: reg_param)
54
50
  check_params_integer(max_iter: max_iter, batch_size: batch_size)
55
51
  check_params_boolean(fit_bias: fit_bias)
56
52
  check_params_type_or_nil(Integer, random_seed: random_seed)
57
- check_params_positive(reg_param: reg_param,
58
- learning_rate: learning_rate, decay: decay, momentum: momentum,
59
- max_iter: max_iter, batch_size: batch_size)
53
+ check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
60
54
  @params = {}
61
55
  @params[:reg_param] = reg_param
62
56
  @params[:fit_bias] = fit_bias
63
- @params[:learning_rate] = learning_rate
64
- @params[:decay] = decay
65
- @params[:momentum] = momentum
66
57
  @params[:max_iter] = max_iter
67
58
  @params[:batch_size] = batch_size
59
+ @params[:optimizer] = optimizer
68
60
  @params[:random_seed] = random_seed
69
61
  @params[:random_seed] ||= srand
70
62
  @weight_vec = nil
@@ -138,11 +130,9 @@ module SVMKit
138
130
  rand_ids = [*0...n_samples].shuffle(random: @rng)
139
131
  weight_vec = Numo::DFloat.zeros(n_features)
140
132
  left_weight_vec = Numo::DFloat.zeros(n_features)
141
- left_weight_sqrsum = Numo::DFloat.zeros(n_features)
142
- left_weight_update = Numo::DFloat.zeros(n_features)
143
133
  right_weight_vec = Numo::DFloat.zeros(n_features)
144
- right_weight_sqrsum = Numo::DFloat.zeros(n_features)
145
- right_weight_update = Numo::DFloat.zeros(n_features)
134
+ left_optimizer = Optimizer::Nadam.new
135
+ right_optimizer = Optimizer::Nadam.new
146
136
  # Start optimization.
147
137
  @params[:max_iter].times do |_t|
148
138
  # Random sampling.
@@ -154,12 +144,8 @@ module SVMKit
154
144
  loss_grad = loss_gradient(data, values, weight_vec)
155
145
  next if loss_grad.ne(0.0).count.zero?
156
146
  # Update weight.
157
- left_weight_vec, left_weight_sqrsum, left_weight_update =
158
- update_weight(left_weight_vec, left_weight_sqrsum, left_weight_update,
159
- left_weight_gradient(loss_grad, data))
160
- right_weight_vec, right_weight_sqrsum, right_weight_update =
161
- update_weight(right_weight_vec, right_weight_sqrsum, right_weight_update,
162
- right_weight_gradient(loss_grad, data))
147
+ left_weight_vec = round_weight(left_optimizer.call(left_weight_vec, left_weight_gradient(loss_grad, data)))
148
+ right_weight_vec = round_weight(right_optimizer.call(right_weight_vec, right_weight_gradient(loss_grad, data)))
163
149
  weight_vec = left_weight_vec - right_weight_vec
164
150
  end
165
151
  split_weight_vec_bias(weight_vec)
@@ -177,12 +163,8 @@ module SVMKit
177
163
  ((@params[:reg_param] - loss_grad).expand_dims(1) * data).mean(0)
178
164
  end
179
165
 
180
- def update_weight(weight, sqrsum, update, gr)
181
- new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
182
- new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
183
- new_weight = weight - (new_update + @params[:momentum] * update)
184
- new_weight = 0.5 * (new_weight + new_weight.abs)
185
- [new_weight, new_sqrsum, new_update]
166
+ def round_weight(weight)
167
+ 0.5 * (weight + weight.abs)
186
168
  end
187
169
 
188
170
  def expand_feature(x)
@@ -3,25 +3,26 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
- # This module consists of the classes that implement generalized linear models.
9
9
  module LinearModel
10
10
  # LogisticRegression is a class that implements Logistic Regression
11
- # with stochastic gradient descent (SGD) optimization.
11
+ # with mini-batch stochastic gradient descent optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
14
  # @example
15
15
  # estimator =
16
- # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
16
+ # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
17
17
  # estimator.fit(training_samples, traininig_labels)
18
18
  # results = estimator.predict(testing_samples)
19
19
  #
20
20
  # *Reference*
21
- # 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
21
+ # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
22
22
  class LogisticRegression
23
23
  include Base::BaseEstimator
24
24
  include Base::Classifier
25
+ include Validation
25
26
 
26
27
  # Return the weight vector for Logistic Regression.
27
28
  # @return [Numo::DFloat] (shape: [n_classes, n_features])
@@ -47,23 +48,23 @@ module SVMKit
47
48
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
48
49
  # @param max_iter [Integer] The maximum number of iterations.
49
50
  # @param batch_size [Integer] The size of the mini batches.
50
- # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
51
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
52
+ # Nadam is selected automatically on current version.
51
53
  # @param random_seed [Integer] The seed value using to initialize the random generator.
52
54
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
53
- max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
54
- SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
55
- SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
56
- SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
57
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
58
- SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter,
59
- batch_size: batch_size)
55
+ max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
56
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale)
57
+ check_params_integer(max_iter: max_iter, batch_size: batch_size)
58
+ check_params_boolean(fit_bias: fit_bias)
59
+ check_params_type_or_nil(Integer, random_seed: random_seed)
60
+ check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
60
61
  @params = {}
61
62
  @params[:reg_param] = reg_param
62
63
  @params[:fit_bias] = fit_bias
63
64
  @params[:bias_scale] = bias_scale
64
65
  @params[:max_iter] = max_iter
65
66
  @params[:batch_size] = batch_size
66
- @params[:normalize] = normalize
67
+ @params[:optimizer] = optimizer
67
68
  @params[:random_seed] = random_seed
68
69
  @params[:random_seed] ||= srand
69
70
  @weight_vec = nil
@@ -78,9 +79,9 @@ module SVMKit
78
79
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
79
80
  # @return [LogisticRegression] The learned classifier itself.
80
81
  def fit(x, y)
81
- SVMKit::Validation.check_sample_array(x)
82
- SVMKit::Validation.check_label_array(y)
83
- SVMKit::Validation.check_sample_label_size(x, y)
82
+ check_sample_array(x)
83
+ check_label_array(y)
84
+ check_sample_label_size(x, y)
84
85
 
85
86
  @classes = Numo::Int32[*y.to_a.uniq.sort]
86
87
  n_classes = @classes.size
@@ -109,8 +110,7 @@ module SVMKit
109
110
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
110
111
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
111
112
  def decision_function(x)
112
- SVMKit::Validation.check_sample_array(x)
113
-
113
+ check_sample_array(x)
114
114
  x.dot(@weight_vec.transpose) + @bias_term
115
115
  end
116
116
 
@@ -119,7 +119,7 @@ module SVMKit
119
119
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
120
120
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
121
121
  def predict(x)
122
- SVMKit::Validation.check_sample_array(x)
122
+ check_sample_array(x)
123
123
 
124
124
  return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
125
125
 
@@ -133,7 +133,7 @@ module SVMKit
133
133
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
134
134
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
135
135
  def predict_proba(x)
136
- SVMKit::Validation.check_sample_array(x)
136
+ check_sample_array(x)
137
137
 
138
138
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
139
139
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
@@ -168,40 +168,41 @@ module SVMKit
168
168
 
169
169
  private
170
170
 
171
- def binary_fit(x, bin_y)
171
+ def binary_fit(x, y)
172
172
  # Expand feature vectors for bias term.
173
173
  samples = @params[:fit_bias] ? expand_feature(x) : x
174
174
  # Initialize some variables.
175
175
  n_samples, n_features = samples.shape
176
176
  rand_ids = [*0...n_samples].shuffle(random: @rng)
177
177
  weight_vec = Numo::DFloat.zeros(n_features)
178
+ optimizer = Optimizer::Nadam.new
178
179
  # Start optimization.
179
- @params[:max_iter].times do |t|
180
+ @params[:max_iter].times do |_t|
180
181
  # random sampling
181
182
  subset_ids = rand_ids.shift(@params[:batch_size])
182
183
  rand_ids.concat(subset_ids)
183
- # update the weight vector.
184
- df = samples[subset_ids, true].dot(weight_vec.transpose)
185
- coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0) - bin_y[subset_ids]
186
- mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
187
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
188
- # scale the weight vector.
189
- normalize_weight_vec(weight_vec) if @params[:normalize]
184
+ data = samples[subset_ids, true]
185
+ labels = y[subset_ids]
186
+ # calculate gradient for loss function.
187
+ loss_grad = loss_gradient(data, labels, weight_vec)
188
+ # update weight.
189
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
190
190
  end
191
191
  split_weight_vec_bias(weight_vec)
192
192
  end
193
193
 
194
- def expand_feature(x)
195
- Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
194
+ def loss_gradient(x, y, weight)
195
+ z = x.dot(weight)
196
+ grad = y / (Numo::NMath.exp(-y * z) + 1.0) - y
197
+ grad
196
198
  end
197
199
 
198
- def learning_rate(iter)
199
- 1.0 / (@params[:reg_param] * (iter + 1))
200
+ def weight_gradient(loss_grad, x, weight)
201
+ x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
200
202
  end
201
203
 
202
- def normalize_weight_vec(weight_vec)
203
- norm = Math.sqrt(weight_vec.dot(weight_vec))
204
- weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
204
+ def expand_feature(x)
205
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
205
206
  end
206
207
 
207
208
  def split_weight_vec_bias(weight_vec)
@@ -3,22 +3,19 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module LinearModel
9
10
  # Ridge is a class that implements Ridge Regression
10
- # with stochastic gradient descent (SGD) optimization.
11
+ # with mini-batch stochastic gradient descent optimization.
11
12
  #
12
13
  # @example
13
14
  # estimator =
14
- # SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 5000, batch_size: 50, random_seed: 1)
15
+ # SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
16
  # estimator.fit(training_samples, traininig_values)
16
17
  # results = estimator.predict(testing_samples)
17
18
  #
18
- # *Reference*
19
- # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
- # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
21
- # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
22
19
  class Ridge
23
20
  include Base::BaseEstimator
24
21
  include Base::Regressor
@@ -40,30 +37,21 @@ module SVMKit
40
37
  #
41
38
  # @param reg_param [Float] The regularization parameter.
42
39
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
43
- # @param learning_rate [Float] The learning rate for optimization.
44
- # @param decay [Float] The discounting factor for RMS prop optimization.
45
- # @param momentum [Float] The Nesterov momentum for optimization.
46
40
  # @param max_iter [Integer] The maximum number of iterations.
47
41
  # @param batch_size [Integer] The size of the mini batches.
48
42
  # @param random_seed [Integer] The seed value using to initialize the random generator.
49
- def initialize(reg_param: 1.0, fit_bias: false, learning_rate: 0.01, decay: 0.9, momentum: 0.9,
50
- max_iter: 1000, batch_size: 10, random_seed: nil)
51
- check_params_float(reg_param: reg_param,
52
- learning_rate: learning_rate, decay: decay, momentum: momentum)
43
+ def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
44
+ check_params_float(reg_param: reg_param)
53
45
  check_params_integer(max_iter: max_iter, batch_size: batch_size)
54
46
  check_params_boolean(fit_bias: fit_bias)
55
47
  check_params_type_or_nil(Integer, random_seed: random_seed)
56
- check_params_positive(reg_param: reg_param,
57
- learning_rate: learning_rate, decay: decay, momentum: momentum,
58
- max_iter: max_iter, batch_size: batch_size)
48
+ check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
59
49
  @params = {}
60
50
  @params[:reg_param] = reg_param
61
51
  @params[:fit_bias] = fit_bias
62
- @params[:learning_rate] = learning_rate
63
- @params[:decay] = decay
64
- @params[:momentum] = momentum
65
52
  @params[:max_iter] = max_iter
66
53
  @params[:batch_size] = batch_size
54
+ @params[:optimizer] = optimizer
67
55
  @params[:random_seed] = random_seed
68
56
  @params[:random_seed] ||= srand
69
57
  @weight_vec = nil
@@ -136,8 +124,7 @@ module SVMKit
136
124
  n_samples, n_features = samples.shape
137
125
  rand_ids = [*0...n_samples].shuffle(random: @rng)
138
126
  weight_vec = Numo::DFloat.zeros(n_features)
139
- weight_sqrsum = Numo::DFloat.zeros(n_features)
140
- weight_update = Numo::DFloat.zeros(n_features)
127
+ optimizer = Optimizer::Nadam.new
141
128
  # Start optimization.
142
129
  @params[:max_iter].times do |_t|
143
130
  # Random sampling.
@@ -146,12 +133,10 @@ module SVMKit
146
133
  data = samples[subset_ids, true]
147
134
  values = y[subset_ids]
148
135
  # Calculate gradients for loss function.
149
- loss_grad = loss_gradient(data, values, weight_vec - @params[:momentum] * weight_update)
136
+ loss_grad = loss_gradient(data, values, weight_vec)
150
137
  next if loss_grad.ne(0.0).count.zero?
151
138
  # Update weight.
152
- weight_vec, weight_sqrsum, weight_update =
153
- update_weight(weight_vec, weight_sqrsum, weight_update,
154
- weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
139
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
155
140
  end
156
141
  split_weight_vec_bias(weight_vec)
157
142
  end
@@ -164,13 +149,6 @@ module SVMKit
164
149
  (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param] * weight
165
150
  end
166
151
 
167
- def update_weight(weight, sqrsum, update, gr)
168
- new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
169
- new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
170
- new_weight = weight - (new_update + @params[:momentum] * update)
171
- [new_weight, new_sqrsum, new_update]
172
- end
173
-
174
152
  def expand_feature(x)
175
153
  Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
176
154
  end
@@ -3,26 +3,28 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/optimizer/nadam'
6
7
  require 'svmkit/probabilistic_output'
7
8
 
8
9
  module SVMKit
9
10
  # This module consists of the classes that implement generalized linear models.
10
11
  module LinearModel
11
12
  # SVC is a class that implements Support Vector Classifier
12
- # with stochastic gradient descent (SGD) optimization.
13
+ # with mini-batch stochastic gradient descent optimization.
13
14
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
14
15
  #
15
16
  # @example
16
17
  # estimator =
17
- # SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
18
+ # SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
18
19
  # estimator.fit(training_samples, traininig_labels)
19
20
  # results = estimator.predict(testing_samples)
20
21
  #
21
22
  # *Reference*
22
- # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
23
+ # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
23
24
  class SVC
24
25
  include Base::BaseEstimator
25
26
  include Base::Classifier
27
+ include Validation
26
28
 
27
29
  # Return the weight vector for SVC.
28
30
  # @return [Numo::DFloat] (shape: [n_classes, n_features])
@@ -48,16 +50,16 @@ module SVMKit
48
50
  # @param max_iter [Integer] The maximum number of iterations.
49
51
  # @param batch_size [Integer] The size of the mini batches.
50
52
  # @param probability [Boolean] The flag indicating whether to perform probability estimation.
51
- # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
53
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
54
+ # Nadam is selected automatically on current version.
52
55
  # @param random_seed [Integer] The seed value using to initialize the random generator.
53
56
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
54
- max_iter: 100, batch_size: 50, probability: false, normalize: true, random_seed: nil)
55
- SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
56
- SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
57
- SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, probability: probability, normalize: normalize)
58
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
59
- SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter,
60
- batch_size: batch_size)
57
+ max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
58
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale)
59
+ check_params_integer(max_iter: max_iter, batch_size: batch_size)
60
+ check_params_boolean(fit_bias: fit_bias, probability: probability)
61
+ check_params_type_or_nil(Integer, random_seed: random_seed)
62
+ check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
61
63
  @params = {}
62
64
  @params[:reg_param] = reg_param
63
65
  @params[:fit_bias] = fit_bias
@@ -65,7 +67,7 @@ module SVMKit
65
67
  @params[:max_iter] = max_iter
66
68
  @params[:batch_size] = batch_size
67
69
  @params[:probability] = probability
68
- @params[:normalize] = normalize
70
+ @params[:optimizer] = optimizer
69
71
  @params[:random_seed] = random_seed
70
72
  @params[:random_seed] ||= srand
71
73
  @weight_vec = nil
@@ -81,9 +83,9 @@ module SVMKit
81
83
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
82
84
  # @return [SVC] The learned classifier itself.
83
85
  def fit(x, y)
84
- SVMKit::Validation.check_sample_array(x)
85
- SVMKit::Validation.check_label_array(y)
86
- SVMKit::Validation.check_sample_label_size(x, y)
86
+ check_sample_array(x)
87
+ check_label_array(y)
88
+ check_sample_label_size(x, y)
87
89
 
88
90
  @classes = Numo::Int32[*y.to_a.uniq.sort]
89
91
  n_classes = @classes.size
@@ -123,8 +125,7 @@ module SVMKit
123
125
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
124
126
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
125
127
  def decision_function(x)
126
- SVMKit::Validation.check_sample_array(x)
127
-
128
+ check_sample_array(x)
128
129
  x.dot(@weight_vec.transpose) + @bias_term
129
130
  end
130
131
 
@@ -133,7 +134,7 @@ module SVMKit
133
134
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
134
135
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
135
136
  def predict(x)
136
- SVMKit::Validation.check_sample_array(x)
137
+ check_sample_array(x)
137
138
 
138
139
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
139
140
 
@@ -147,7 +148,7 @@ module SVMKit
147
148
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
148
149
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
149
150
  def predict_proba(x)
150
- SVMKit::Validation.check_sample_array(x)
151
+ check_sample_array(x)
151
152
 
152
153
  if @classes.size > 2
153
154
  probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
@@ -186,43 +187,43 @@ module SVMKit
186
187
 
187
188
  private
188
189
 
189
- def binary_fit(x, bin_y)
190
+ def binary_fit(x, y)
190
191
  # Expand feature vectors for bias term.
191
192
  samples = @params[:fit_bias] ? expand_feature(x) : x
192
193
  # Initialize some variables.
193
194
  n_samples, n_features = samples.shape
194
195
  rand_ids = [*0...n_samples].shuffle(random: @rng)
195
196
  weight_vec = Numo::DFloat.zeros(n_features)
197
+ optimizer = Optimizer::Nadam.new
196
198
  # Start optimization.
197
- @params[:max_iter].times do |t|
198
- # random sampling
199
+ @params[:max_iter].times do |_t|
200
+ # random sampling.
199
201
  subset_ids = rand_ids.shift(@params[:batch_size])
200
202
  rand_ids.concat(subset_ids)
201
- sub_samples = samples[subset_ids, true]
202
- sub_bin_y = bin_y[subset_ids]
203
- target_ids = (sub_samples.dot(weight_vec.transpose) * sub_bin_y).lt(1.0).where
204
- n_targets = target_ids.size
205
- next if n_targets.zero?
206
- # update the weight vector.
207
- mean_vec = sub_samples[target_ids, true].transpose.dot(sub_bin_y[target_ids]) / n_targets
208
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
209
- # scale the weight vector.
210
- normalize_weight_vec(weight_vec) if @params[:normalize]
203
+ data = samples[subset_ids, true]
204
+ labels = y[subset_ids]
205
+ # calculate gradient for loss function.
206
+ loss_grad = loss_gradient(data, labels, weight_vec)
207
+ next if loss_grad.ne(0.0).count.zero?
208
+ # update weight.
209
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
211
210
  end
212
211
  split_weight_vec_bias(weight_vec)
213
212
  end
214
213
 
215
- def expand_feature(x)
216
- Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
214
+ def loss_gradient(x, y, weight)
215
+ target_ids = (x.dot(weight) * y).lt(1.0).where
216
+ grad = Numo::DFloat.zeros(@params[:batch_size])
217
+ grad[target_ids] = -y[target_ids]
218
+ grad
217
219
  end
218
220
 
219
- def learning_rate(iter)
220
- 1.0 / (@params[:reg_param] * (iter + 1))
221
+ def weight_gradient(loss_grad, x, weight)
222
+ x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
221
223
  end
222
224
 
223
- def normalize_weight_vec(weight_vec)
224
- norm = Math.sqrt(weight_vec.dot(weight_vec))
225
- weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
225
+ def expand_feature(x)
226
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
226
227
  end
227
228
 
228
229
  def split_weight_vec_bias(weight_vec)
@@ -3,15 +3,16 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module LinearModel
9
10
  # SVR is a class that implements Support Vector Regressor
10
- # with stochastic gradient descent (SGD) optimization.
11
+ # with mini-batch stochastic gradient descent optimization.
11
12
  #
12
13
  # @example
13
14
  # estimator =
14
- # SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 100, batch_size: 20, random_seed: 1)
15
+ # SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
16
  # estimator.fit(training_samples, traininig_target_values)
16
17
  # results = estimator.predict(testing_samples)
17
18
  #
@@ -20,6 +21,7 @@ module SVMKit
20
21
  class SVR
21
22
  include Base::BaseEstimator
22
23
  include Base::Regressor
24
+ include Validation
23
25
 
24
26
  # Return the weight vector for SVR.
25
27
  # @return [Numo::DFloat] (shape: [n_outputs, n_features])
@@ -41,16 +43,17 @@ module SVMKit
41
43
  # @param epsilon [Float] The margin of tolerance.
42
44
  # @param max_iter [Integer] The maximum number of iterations.
43
45
  # @param batch_size [Integer] The size of the mini batches.
44
- # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
46
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
47
+ # Nadam is selected automatically on current version.
45
48
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
49
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
47
- max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
48
- SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
49
- SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
50
- SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
51
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
52
- SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
53
- max_iter: max_iter, batch_size: batch_size)
50
+ max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
51
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
52
+ check_params_integer(max_iter: max_iter, batch_size: batch_size)
53
+ check_params_boolean(fit_bias: fit_bias)
54
+ check_params_type_or_nil(Integer, random_seed: random_seed)
55
+ check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
56
+ max_iter: max_iter, batch_size: batch_size)
54
57
  @params = {}
55
58
  @params[:reg_param] = reg_param
56
59
  @params[:fit_bias] = fit_bias
@@ -58,7 +61,7 @@ module SVMKit
58
61
  @params[:epsilon] = epsilon
59
62
  @params[:max_iter] = max_iter
60
63
  @params[:batch_size] = batch_size
61
- @params[:normalize] = normalize
64
+ @params[:optimizer] = optimizer
62
65
  @params[:random_seed] = random_seed
63
66
  @params[:random_seed] ||= srand
64
67
  @weight_vec = nil
@@ -72,9 +75,9 @@ module SVMKit
72
75
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
73
76
  # @return [SVR] The learned regressor itself.
74
77
  def fit(x, y)
75
- SVMKit::Validation.check_sample_array(x)
76
- SVMKit::Validation.check_tvalue_array(y)
77
- SVMKit::Validation.check_sample_tvalue_size(x, y)
78
+ check_sample_array(x)
79
+ check_tvalue_array(y)
80
+ check_sample_tvalue_size(x, y)
78
81
 
79
82
  n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
80
83
  _n_samples, n_features = x.shape
@@ -99,7 +102,7 @@ module SVMKit
99
102
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
100
103
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
101
104
  def predict(x)
102
- SVMKit::Validation.check_sample_array(x)
105
+ check_sample_array(x)
103
106
  x.dot(@weight_vec.transpose) + @bias_term
104
107
  end
105
108
 
@@ -131,35 +134,35 @@ module SVMKit
131
134
  n_samples, n_features = samples.shape
132
135
  rand_ids = [*0...n_samples].shuffle(random: @rng)
133
136
  weight_vec = Numo::DFloat.zeros(n_features)
137
+ optimizer = Optimizer::Nadam.new
134
138
  # Start optimization.
135
- @params[:max_iter].times do |t|
139
+ @params[:max_iter].times do |_t|
136
140
  # random sampling
137
141
  subset_ids = rand_ids.shift(@params[:batch_size])
138
142
  rand_ids.concat(subset_ids)
143
+ data = samples[subset_ids, true]
144
+ values = y[subset_ids]
139
145
  # update the weight vector.
140
- z = samples[subset_ids, true].dot(weight_vec.transpose)
141
- coef = Numo::DFloat.zeros(@params[:batch_size])
142
- coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
143
- coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
144
- mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
145
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
146
- # scale the weight vector.
147
- normalize_weight_vec(weight_vec) if @params[:normalize]
146
+ loss_grad = loss_gradient(data, values, weight_vec)
147
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
148
148
  end
149
149
  split_weight_vec_bias(weight_vec)
150
150
  end
151
151
 
152
- def expand_feature(x)
153
- Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
152
+ def loss_gradient(x, y, weight)
153
+ z = x.dot(weight)
154
+ grad = Numo::DFloat.zeros(@params[:batch_size])
155
+ grad[(z - y).gt(@params[:epsilon]).where] = 1
156
+ grad[(y - z).gt(@params[:epsilon]).where] = -1
157
+ grad
154
158
  end
155
159
 
156
- def learning_rate(iter)
157
- 1.0 / (@params[:reg_param] * (iter + 1))
160
+ def weight_gradient(loss_grad, x, weight)
161
+ x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
158
162
  end
159
163
 
160
- def normalize_weight_vec(weight_vec)
161
- norm = Math.sqrt(weight_vec.dot(weight_vec))
162
- weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
164
+ def expand_feature(x)
165
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
163
166
  end
164
167
 
165
168
  def split_weight_vec_bias(weight_vec)
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+
5
+ module SVMKit
6
+ # This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
7
+ module Optimizer
8
+ # Nadam is a class that implements Nadam optimizer.
9
+ # This class is used for internal processes.
10
+ #
11
+ # *Reference*
12
+ # - T. Dozat, "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
13
+ class Nadam
14
+ include Validation
15
+
16
+ # Create a new optimizer with Nadam
17
+ #
18
+ # @param learning_rate [Float] The initial value of learning rate.
19
+ # @param momentum [Float] The initial value of momentum.
20
+ # @param decay1 [Float] The smoothing parameter for the first moment.
21
+ # @param decay2 [Float] The smoothing parameter for the second moment.
22
+ # @param schedule_decay [Float] The smooting parameter.
23
+ def initialize(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
24
+ check_params_float(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
25
+ check_params_positive(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
26
+ @params = {}
27
+ @params[:learning_rate] = learning_rate
28
+ @params[:momentum] = momentum
29
+ @params[:decay1] = decay1
30
+ @params[:decay2] = decay2
31
+ @fst_moment = nil
32
+ @sec_moment = nil
33
+ @decay1_prod = 1.0
34
+ @iter = 0
35
+ end
36
+
37
+ # Calculate the updated weight with Nadam adaptive learning rate.
38
+ #
39
+ # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
40
+ # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
41
+ # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
42
+ def call(weight, gradient)
43
+ @fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
44
+ @sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
45
+
46
+ @iter += 1
47
+
48
+ decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
49
+ decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
50
+ decay1_prod_curr = @decay1_prod * decay1_curr
51
+ decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
52
+ @decay1_prod = decay1_prod_curr
53
+
54
+ @fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
55
+ @sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
56
+ nm_gradient = gradient / (1.0 - decay1_prod_curr)
57
+ nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
58
+ nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
59
+
60
+ weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
61
+ end
62
+ end
63
+ end
64
+ end
@@ -3,6 +3,7 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  # This module consists of the classes that implement polynomial models.
@@ -14,7 +15,7 @@ module SVMKit
14
15
  # @example
15
16
  # estimator =
16
17
  # SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
17
- # n_factors: 10, loss: 'hinge', reg_param_bias: 0.001, reg_param_weight: 0.001, reg_param_factor: 0.001,
18
+ # n_factors: 10, loss: 'hinge', reg_param_linear: 0.001, reg_param_factor: 0.001,
18
19
  # max_iter: 5000, batch_size: 50, random_seed: 1)
19
20
  # estimator.fit(training_samples, traininig_labels)
20
21
  # results = estimator.predict(testing_samples)
@@ -25,6 +26,7 @@ module SVMKit
25
26
  class FactorizationMachineClassifier
26
27
  include Base::BaseEstimator
27
28
  include Base::Classifier
29
+ include Validation
28
30
 
29
31
  # Return the factor matrix for Factorization Machine.
30
32
  # @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
@@ -50,32 +52,30 @@ module SVMKit
50
52
  #
51
53
  # @param n_factors [Integer] The maximum number of iterations.
52
54
  # @param loss [String] The loss function ('hinge' or 'logistic').
53
- # @param reg_param_bias [Float] The regularization parameter for bias term.
54
- # @param reg_param_weight [Float] The regularization parameter for weight vector.
55
+ # @param reg_param_linear [Float] The regularization parameter for linear model.
55
56
  # @param reg_param_factor [Float] The regularization parameter for factor matrix.
56
- # @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
57
57
  # @param max_iter [Integer] The maximum number of iterations.
58
58
  # @param batch_size [Integer] The size of the mini batches.
59
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
60
+ # Nadam is selected automatically on current version.
59
61
  # @param random_seed [Integer] The seed value using to initialize the random generator.
60
- def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
61
- init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
62
- SVMKit::Validation.check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
63
- reg_param_factor: reg_param_factor, init_std: init_std)
64
- SVMKit::Validation.check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
65
- SVMKit::Validation.check_params_string(loss: loss)
66
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
67
- SVMKit::Validation.check_params_positive(n_factors: n_factors, reg_param_bias: reg_param_bias,
68
- reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
69
- max_iter: max_iter, batch_size: batch_size)
62
+ def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
63
+ max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
64
+ check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
65
+ check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
66
+ check_params_string(loss: loss)
67
+ check_params_type_or_nil(Integer, random_seed: random_seed)
68
+ check_params_positive(n_factors: n_factors,
69
+ reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
70
+ max_iter: max_iter, batch_size: batch_size)
70
71
  @params = {}
71
72
  @params[:n_factors] = n_factors
72
73
  @params[:loss] = loss
73
- @params[:reg_param_bias] = reg_param_bias
74
- @params[:reg_param_weight] = reg_param_weight
74
+ @params[:reg_param_linear] = reg_param_linear
75
75
  @params[:reg_param_factor] = reg_param_factor
76
- @params[:init_std] = init_std
77
76
  @params[:max_iter] = max_iter
78
77
  @params[:batch_size] = batch_size
78
+ @params[:optimizer] = optimizer
79
79
  @params[:random_seed] = random_seed
80
80
  @params[:random_seed] ||= srand
81
81
  @factor_mat = nil
@@ -91,9 +91,9 @@ module SVMKit
91
91
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
92
92
  # @return [FactorizationMachineClassifier] The learned classifier itself.
93
93
  def fit(x, y)
94
- SVMKit::Validation.check_sample_array(x)
95
- SVMKit::Validation.check_label_array(y)
96
- SVMKit::Validation.check_sample_label_size(x, y)
94
+ check_sample_array(x)
95
+ check_label_array(y)
96
+ check_sample_label_size(x, y)
97
97
 
98
98
  @classes = Numo::Int32[*y.to_a.uniq.sort]
99
99
  n_classes = @classes.size
@@ -124,7 +124,7 @@ module SVMKit
124
124
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
125
125
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
126
126
  def decision_function(x)
127
- SVMKit::Validation.check_sample_array(x)
127
+ check_sample_array(x)
128
128
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
129
129
  factor_term = if @classes.size <= 2
130
130
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
@@ -139,7 +139,7 @@ module SVMKit
139
139
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
140
140
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
141
141
  def predict(x)
142
- SVMKit::Validation.check_sample_array(x)
142
+ check_sample_array(x)
143
143
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
144
144
 
145
145
  n_samples, = x.shape
@@ -152,7 +152,7 @@ module SVMKit
152
152
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
153
153
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
154
154
  def predict_proba(x)
155
- SVMKit::Validation.check_sample_array(x)
155
+ check_sample_array(x)
156
156
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
157
157
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
158
158
 
@@ -188,84 +188,76 @@ module SVMKit
188
188
 
189
189
  private
190
190
 
191
- def binary_fit(x, bin_y)
191
+ def binary_fit(x, y)
192
192
  # Initialize some variables.
193
193
  n_samples, n_features = x.shape
194
194
  rand_ids = [*0...n_samples].shuffle(random: @rng)
195
- factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
196
- weight_vec = Numo::DFloat.zeros(n_features)
197
- bias_term = 0.0
195
+ weight_vec = Numo::DFloat.zeros(n_features + 1)
196
+ factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
197
+ weight_optimizer = Optimizer::Nadam.new
198
+ factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
198
199
  # Start optimization.
199
- @params[:max_iter].times do |t|
200
+ @params[:max_iter].times do |_t|
200
201
  # Random sampling.
201
202
  subset_ids = rand_ids.shift(@params[:batch_size])
202
203
  rand_ids.concat(subset_ids)
203
204
  data = x[subset_ids, true]
204
- label = bin_y[subset_ids]
205
+ ex_data = expand_feature(data)
206
+ label = y[subset_ids]
205
207
  # Calculate gradients for loss function.
206
- loss_grad = loss_gradient(data, label, factor_mat, weight_vec, bias_term)
208
+ loss_grad = loss_gradient(data, ex_data, label, factor_mat, weight_vec)
207
209
  next if loss_grad.ne(0.0).count.zero?
208
210
  # Update each parameter.
209
- bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad, bias_term)
210
- weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
211
+ weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
211
212
  @params[:n_factors].times do |n|
212
- factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
213
- factor_gradient(loss_grad, data, factor_mat[n, true])
213
+ factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
214
+ factor_gradient(loss_grad, data, factor_mat[n, true]))
214
215
  end
215
216
  end
216
- [factor_mat, weight_vec, bias_term]
217
+ [factor_mat, *split_weight_vec_bias(weight_vec)]
217
218
  end
218
219
 
219
- def bin_decision_function(x, factor, weight, bias)
220
- bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
220
+ def bin_decision_function(x, ex_x, factor, weight)
221
+ ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
221
222
  end
222
223
 
223
- def hinge_loss_gradient(x, y, factor, weight, bias)
224
- evaluated = y * bin_decision_function(x, factor, weight, bias)
224
+ def hinge_loss_gradient(x, ex_x, y, factor, weight)
225
+ evaluated = y * bin_decision_function(x, ex_x, factor, weight)
225
226
  gradient = Numo::DFloat.zeros(evaluated.size)
226
227
  gradient[evaluated < 1.0] = -y[evaluated < 1.0]
227
228
  gradient
228
229
  end
229
230
 
230
- def logistic_loss_gradient(x, y, factor, weight, bias)
231
- evaluated = y * bin_decision_function(x, factor, weight, bias)
231
+ def logistic_loss_gradient(x, ex_x, y, factor, weight)
232
+ evaluated = y * bin_decision_function(x, ex_x, factor, weight)
232
233
  sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
233
234
  (sigmoid_func - 1.0) * y
234
235
  end
235
236
 
236
- def loss_gradient(x, y, factor, weight, bias)
237
+ def loss_gradient(x, ex_x, y, factor, weight)
237
238
  if @params[:loss] == 'hinge'
238
- hinge_loss_gradient(x, y, factor, weight, bias)
239
+ hinge_loss_gradient(x, ex_x, y, factor, weight)
239
240
  else
240
- logistic_loss_gradient(x, y, factor, weight, bias)
241
+ logistic_loss_gradient(x, ex_x, y, factor, weight)
241
242
  end
242
243
  end
243
244
 
244
- def learning_rate(reg_param, iter)
245
- 1.0 / (reg_param * (iter + 1))
246
- end
247
-
248
- def bias_gradient(loss_grad, bias)
249
- loss_grad.mean + @params[:reg_param_bias] * bias
250
- end
251
-
252
245
  def weight_gradient(loss_grad, data, weight)
253
- (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
246
+ (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
254
247
  end
255
248
 
256
249
  def factor_gradient(loss_grad, data, factor)
257
- reg_term = @params[:reg_param_factor] * factor
258
- (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + reg_term
250
+ (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
259
251
  end
260
252
 
261
- def rand_uniform(shape)
262
- Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
253
+ def expand_feature(x)
254
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
263
255
  end
264
256
 
265
- def rand_normal(shape, mu, sigma)
266
- a = rand_uniform(shape)
267
- b = rand_uniform(shape)
268
- mu + sigma * (Numo::NMath.sqrt(-2.0 * Numo::NMath.log(a)) * Numo::NMath.sin(2.0 * Math::PI * b))
257
+ def split_weight_vec_bias(weight_vec)
258
+ weights = weight_vec[0...-1]
259
+ bias = weight_vec[-1]
260
+ [weights, bias]
269
261
  end
270
262
  end
271
263
  end
@@ -3,6 +3,7 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module PolynomialModel
@@ -12,7 +13,7 @@ module SVMKit
12
13
  # @example
13
14
  # estimator =
14
15
  # SVMKit::PolynomialModel::FactorizationMachineRegressor.new(
15
- # n_factors: 10, reg_param_bias: 0.1, reg_param_weight: 0.1, reg_param_factor: 0.1,
16
+ # n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
16
17
  # max_iter: 5000, batch_size: 50, random_seed: 1)
17
18
  # estimator.fit(training_samples, traininig_values)
18
19
  # results = estimator.predict(testing_samples)
@@ -20,8 +21,6 @@ module SVMKit
20
21
  # *Reference*
21
22
  # - S. Rendle, "Factorization Machines with libFM," ACM Transactions on Intelligent Systems and Technology, vol. 3 (3), pp. 57:1--57:22, 2012.
22
23
  # - S. Rendle, "Factorization Machines," Proc. the 10th IEEE International Conference on Data Mining (ICDM'10), pp. 995--1000, 2010.
23
- # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. the 30th International Conference on Machine Learning (ICML' 13), pp. 1139--1147, 2013.
24
- # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
25
24
  class FactorizationMachineRegressor
26
25
  include Base::BaseEstimator
27
26
  include Base::Regressor
@@ -46,40 +45,27 @@ module SVMKit
46
45
  # Create a new regressor with Factorization Machine.
47
46
  #
48
47
  # @param n_factors [Integer] The maximum number of iterations.
49
- # @param reg_param_bias [Float] The regularization parameter for bias term.
50
- # @param reg_param_weight [Float] The regularization parameter for weight vector.
48
+ # @param reg_param_linear [Float] The regularization parameter for linear model.
51
49
  # @param reg_param_factor [Float] The regularization parameter for factor matrix.
52
- # @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
53
- # @param learning_rate [Float] The learning rate for optimization.
54
- # @param decay [Float] The discounting factor for RMS prop optimization.
55
- # @param momentum [Float] The Nesterov momentum for optimization.
56
50
  # @param max_iter [Integer] The maximum number of iterations.
57
51
  # @param batch_size [Integer] The size of the mini batches.
52
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
53
+ # Nadam is selected automatically on current version.
58
54
  # @param random_seed [Integer] The seed value using to initialize the random generator.
59
- def initialize(n_factors: 2,
60
- reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0, init_std: 0.01,
61
- learning_rate: 0.01, decay: 0.9, momentum: 0.9,
62
- max_iter: 1000, batch_size: 10, random_seed: nil)
63
- check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
64
- reg_param_factor: reg_param_factor, init_std: init_std,
65
- learning_rate: learning_rate, decay: decay, momentum: momentum)
55
+ def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
56
+ max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
57
+ check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
66
58
  check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
67
59
  check_params_type_or_nil(Integer, random_seed: random_seed)
68
- check_params_positive(n_factors: n_factors, reg_param_bias: reg_param_bias,
69
- reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
70
- learning_rate: learning_rate, decay: decay, momentum: momentum,
60
+ check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
71
61
  max_iter: max_iter, batch_size: batch_size)
72
62
  @params = {}
73
63
  @params[:n_factors] = n_factors
74
- @params[:reg_param_bias] = reg_param_bias
75
- @params[:reg_param_weight] = reg_param_weight
64
+ @params[:reg_param_linear] = reg_param_linear
76
65
  @params[:reg_param_factor] = reg_param_factor
77
- @params[:init_std] = init_std
78
- @params[:learning_rate] = learning_rate
79
- @params[:decay] = decay
80
- @params[:momentum] = momentum
81
66
  @params[:max_iter] = max_iter
82
67
  @params[:batch_size] = batch_size
68
+ @params[:optimizer] = optimizer
83
69
  @params[:random_seed] = random_seed
84
70
  @params[:random_seed] ||= srand
85
71
  @factor_mat = nil
@@ -160,74 +146,52 @@ module SVMKit
160
146
  # Initialize some variables.
161
147
  n_samples, n_features = x.shape
162
148
  rand_ids = [*0...n_samples].shuffle(random: @rng)
163
- factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
164
- factor_sqrsum = Numo::DFloat.zeros(factor_mat.shape)
165
- factor_update = Numo::DFloat.zeros(factor_mat.shape)
166
- weight_vec = Numo::DFloat.zeros(n_features)
167
- weight_sqrsum = Numo::DFloat.zeros(n_features)
168
- weight_update = Numo::DFloat.zeros(n_features)
169
- bias_term = 0.0
170
- bias_sqrsum = 0.0
171
- bias_update = 0.0
149
+ weight_vec = Numo::DFloat.zeros(n_features + 1)
150
+ factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
151
+ weight_optimizer = Optimizer::Nadam.new
152
+ factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
172
153
  # Start optimization.
173
154
  @params[:max_iter].times do |_t|
174
155
  # Random sampling.
175
156
  subset_ids = rand_ids.shift(@params[:batch_size])
176
157
  rand_ids.concat(subset_ids)
177
158
  data = x[subset_ids, true]
159
+ ex_data = expand_feature(data)
178
160
  values = y[subset_ids]
179
161
  # Calculate gradients for loss function.
180
- loss_grad = loss_gradient(data, values,
181
- factor_mat - @params[:momentum] * factor_update,
182
- weight_vec - @params[:momentum] * weight_update,
183
- bias_term - @params[:momentum] * bias_update)
162
+ loss_grad = loss_gradient(data, ex_data, values, factor_mat, weight_vec)
184
163
  next if loss_grad.ne(0.0).count.zero?
185
164
  # Update each parameter.
186
- bias_term, bias_sqrsum, bias_update =
187
- update_param(bias_term, bias_sqrsum, bias_update,
188
- bias_gradient(loss_grad, bias_term - @params[:momentum] * bias_update))
189
- weight_vec, weight_sqrsum, weight_update =
190
- update_param(weight_vec, weight_sqrsum, weight_update,
191
- weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
165
+ weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
192
166
  @params[:n_factors].times do |n|
193
- factor_update[n, true], factor_sqrsum[n, true], factor_update[n, true] =
194
- update_param(factor_update[n, true], factor_sqrsum[n, true], factor_update[n, true],
195
- factor_gradient(loss_grad, data, factor_mat[n, true] - @params[:momentum] * factor_update[n, true]))
167
+ factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
168
+ factor_gradient(loss_grad, data, factor_mat[n, true]))
196
169
  end
197
170
  end
198
- [factor_mat, weight_vec, bias_term]
171
+ [factor_mat, *split_weight_vec_bias(weight_vec)]
199
172
  end
200
173
 
201
- def loss_gradient(x, y, factor, weight, bias)
202
- z = bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
174
+ def loss_gradient(x, ex_x, y, factor, weight)
175
+ z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
203
176
  2.0 * (z - y)
204
177
  end
205
178
 
206
- def bias_gradient(loss_grad, bias)
207
- loss_grad.mean + @params[:reg_param_bias] * bias
208
- end
209
-
210
179
  def weight_gradient(loss_grad, data, weight)
211
- (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
180
+ (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
212
181
  end
213
182
 
214
183
  def factor_gradient(loss_grad, data, factor)
215
184
  (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
216
185
  end
217
186
 
218
- def update_param(param, sqrsum, update, gr)
219
- new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
220
- new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
221
- new_param = param - (new_update + @params[:momentum] * update)
222
- [new_param, new_sqrsum, new_update]
223
- end
224
-
225
- def rand_uniform(shape)
226
- Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
187
+ def expand_feature(x)
188
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
227
189
  end
228
190
 
229
- def rand_normal(shape, mu, sigma)
230
- mu + sigma * (Numo::NMath.sqrt(-2.0 * Numo::NMath.log(rand_uniform(shape))) * Numo::NMath.sin(2.0 * Math::PI * rand_uniform(shape)))
191
+ def split_weight_vec_bias(weight_vec)
192
+ weights = weight_vec[0...-1]
193
+ bias = weight_vec[-1]
194
+ [weights, bias]
231
195
  end
232
196
  end
233
197
  end
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.3.3'.freeze
6
+ VERSION = '0.4.0'.freeze
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-25 00:00:00.000000000 Z
11
+ date: 2018-06-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -139,6 +139,7 @@ files:
139
139
  - lib/svmkit/naive_bayes/naive_bayes.rb
140
140
  - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
141
141
  - lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
142
+ - lib/svmkit/optimizer/nadam.rb
142
143
  - lib/svmkit/pairwise_metric.rb
143
144
  - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
144
145
  - lib/svmkit/polynomial_model/factorization_machine_regressor.rb