svmkit 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c0a64cc46c00a252946033b072b4d9498fb4d5cf7131830333483a336c29315
4
- data.tar.gz: 1eb9415f08167772764f1eba4e67f6a3479768db75efcc32a8de85276440d41c
3
+ metadata.gz: cef050a2ac6b55583414cb3ce9c3678dd6d2d1c8b2be04a249222683e10465e1
4
+ data.tar.gz: 7c67ab0e90246f1d9b7e5d0bfb19ed76061d0edf17a05014f521b8ef41e41aed
5
5
  SHA512:
6
- metadata.gz: 2f994dad593e5b752c2a062507f849483a9e4dbdd90190313b672c2f8cd9c9ed102b2fc088823665812f44e6b549bc67cab7d16fb545031bae0b57e713c3c3c3
7
- data.tar.gz: d6da2f56721b8d264898fea922e2bee016987d898b4169f12bc6963044b69a4952f25d1b75380a93ce1fccd9854bb86e085c18fdf9535e273bd2eb1a328d3b98
6
+ metadata.gz: 15341450f3bf3ca49901ae55b507d647468261682c7fdb0b058c21a470c2eec261718b6721ca0e2ad7738cfdabd184128a588d68ad6d079e53c9b1e916efa2b1
7
+ data.tar.gz: fd562db538be12896c005840e065f867e342691e899b33f0524a4db26da33439bfc174141e022d4de3d805657d09e854a4593b9b05b2d9eb99f6cd41da064a1d
data/HISTORY.md CHANGED
@@ -1,3 +1,25 @@
1
+ # 0.4.0
2
+ ## Breaking changes
3
+
4
+ SVMKit introduces optimizer algorithm that calculates learning rates adaptively
5
+ on each iteration of stochastic gradient descent (SGD).
6
+ While Pegasos SGD runs fast, it sometimes fails to optimize complicated models
7
+ like Factorization Machine.
8
+ To solve this problem, in version 0.3.3, SVMKit introduced optimization with RMSProp on
9
+ FactorizationMachineRegressor, Ridge and Lasso.
10
+ This attempt realized stable optimization of those estimators.
11
+ Following the success of the attempt, author decided to use modern optimizer algorithms
12
+ with all SGD optimizations in SVMKit.
13
+ Through some preliminary experiments, author implemented Nadam as the default optimizer.
14
+ SVMKit plans to add other optimizer algorithms sequentially, so that users can select them.
15
+
16
+ - Fix to use Nadam for optimization on SVC, SVR, LogisticRegression, Ridge, Lasso, and Factorization Machine estimators.
17
+ - Combine reg_param_weight and reg_param_bias parameters on Factorization Machine estimators into the unified parameter named reg_param_linear.
18
+ - Remove init_std paramter on Factorization Machine estimators.
19
+ - Remove learning_rate, decay, and momentum parameters on Ridge, Lasso, and FactorizationMachineRegressor.
20
+ - Remove normalize parameter on SVC, SVR, and LogisticRegression.
21
+
22
+
1
23
  # 0.3.3
2
24
  - Add class for Ridge regressor.
3
25
  - Add class for Lasso regressor.
@@ -13,6 +13,7 @@ require 'svmkit/base/regressor'
13
13
  require 'svmkit/base/transformer'
14
14
  require 'svmkit/base/splitter'
15
15
  require 'svmkit/base/evaluator'
16
+ require 'svmkit/optimizer/nadam'
16
17
  require 'svmkit/kernel_approximation/rbf'
17
18
  require 'svmkit/linear_model/svc'
18
19
  require 'svmkit/linear_model/svr'
@@ -3,6 +3,7 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module LinearModel
@@ -11,15 +12,13 @@ module SVMKit
11
12
  #
12
13
  # @example
13
14
  # estimator =
14
- # SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 5000, batch_size: 50, random_seed: 1)
15
+ # SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
16
  # estimator.fit(training_samples, traininig_values)
16
17
  # results = estimator.predict(testing_samples)
17
18
  #
18
19
  # *Reference*
19
20
  # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
21
  # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
21
- # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
22
- # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
23
22
  class Lasso
24
23
  include Base::BaseEstimator
25
24
  include Base::Regressor
@@ -41,30 +40,23 @@ module SVMKit
41
40
  #
42
41
  # @param reg_param [Float] The regularization parameter.
43
42
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
44
- # @param learning_rate [Float] The learning rate for optimization.
45
- # @param decay [Float] The discounting factor for RMS prop optimization.
46
- # @param momentum [Float] The momentum for optimization.
47
43
  # @param max_iter [Integer] The maximum number of iterations.
48
44
  # @param batch_size [Integer] The size of the mini batches.
45
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
46
+ # Nadam is selected automatically on current version.
49
47
  # @param random_seed [Integer] The seed value using to initialize the random generator.
50
- def initialize(reg_param: 1.0, fit_bias: false, learning_rate: 0.01, decay: 0.9, momentum: 0.9,
51
- max_iter: 1000, batch_size: 10, random_seed: nil)
52
- check_params_float(reg_param: reg_param,
53
- learning_rate: learning_rate, decay: decay, momentum: momentum)
48
+ def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
49
+ check_params_float(reg_param: reg_param)
54
50
  check_params_integer(max_iter: max_iter, batch_size: batch_size)
55
51
  check_params_boolean(fit_bias: fit_bias)
56
52
  check_params_type_or_nil(Integer, random_seed: random_seed)
57
- check_params_positive(reg_param: reg_param,
58
- learning_rate: learning_rate, decay: decay, momentum: momentum,
59
- max_iter: max_iter, batch_size: batch_size)
53
+ check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
60
54
  @params = {}
61
55
  @params[:reg_param] = reg_param
62
56
  @params[:fit_bias] = fit_bias
63
- @params[:learning_rate] = learning_rate
64
- @params[:decay] = decay
65
- @params[:momentum] = momentum
66
57
  @params[:max_iter] = max_iter
67
58
  @params[:batch_size] = batch_size
59
+ @params[:optimizer] = optimizer
68
60
  @params[:random_seed] = random_seed
69
61
  @params[:random_seed] ||= srand
70
62
  @weight_vec = nil
@@ -138,11 +130,9 @@ module SVMKit
138
130
  rand_ids = [*0...n_samples].shuffle(random: @rng)
139
131
  weight_vec = Numo::DFloat.zeros(n_features)
140
132
  left_weight_vec = Numo::DFloat.zeros(n_features)
141
- left_weight_sqrsum = Numo::DFloat.zeros(n_features)
142
- left_weight_update = Numo::DFloat.zeros(n_features)
143
133
  right_weight_vec = Numo::DFloat.zeros(n_features)
144
- right_weight_sqrsum = Numo::DFloat.zeros(n_features)
145
- right_weight_update = Numo::DFloat.zeros(n_features)
134
+ left_optimizer = Optimizer::Nadam.new
135
+ right_optimizer = Optimizer::Nadam.new
146
136
  # Start optimization.
147
137
  @params[:max_iter].times do |_t|
148
138
  # Random sampling.
@@ -154,12 +144,8 @@ module SVMKit
154
144
  loss_grad = loss_gradient(data, values, weight_vec)
155
145
  next if loss_grad.ne(0.0).count.zero?
156
146
  # Update weight.
157
- left_weight_vec, left_weight_sqrsum, left_weight_update =
158
- update_weight(left_weight_vec, left_weight_sqrsum, left_weight_update,
159
- left_weight_gradient(loss_grad, data))
160
- right_weight_vec, right_weight_sqrsum, right_weight_update =
161
- update_weight(right_weight_vec, right_weight_sqrsum, right_weight_update,
162
- right_weight_gradient(loss_grad, data))
147
+ left_weight_vec = round_weight(left_optimizer.call(left_weight_vec, left_weight_gradient(loss_grad, data)))
148
+ right_weight_vec = round_weight(right_optimizer.call(right_weight_vec, right_weight_gradient(loss_grad, data)))
163
149
  weight_vec = left_weight_vec - right_weight_vec
164
150
  end
165
151
  split_weight_vec_bias(weight_vec)
@@ -177,12 +163,8 @@ module SVMKit
177
163
  ((@params[:reg_param] - loss_grad).expand_dims(1) * data).mean(0)
178
164
  end
179
165
 
180
- def update_weight(weight, sqrsum, update, gr)
181
- new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
182
- new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
183
- new_weight = weight - (new_update + @params[:momentum] * update)
184
- new_weight = 0.5 * (new_weight + new_weight.abs)
185
- [new_weight, new_sqrsum, new_update]
166
+ def round_weight(weight)
167
+ 0.5 * (weight + weight.abs)
186
168
  end
187
169
 
188
170
  def expand_feature(x)
@@ -3,25 +3,26 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
- # This module consists of the classes that implement generalized linear models.
9
9
  module LinearModel
10
10
  # LogisticRegression is a class that implements Logistic Regression
11
- # with stochastic gradient descent (SGD) optimization.
11
+ # with mini-batch stochastic gradient descent optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
14
  # @example
15
15
  # estimator =
16
- # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
16
+ # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
17
17
  # estimator.fit(training_samples, traininig_labels)
18
18
  # results = estimator.predict(testing_samples)
19
19
  #
20
20
  # *Reference*
21
- # 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
21
+ # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
22
22
  class LogisticRegression
23
23
  include Base::BaseEstimator
24
24
  include Base::Classifier
25
+ include Validation
25
26
 
26
27
  # Return the weight vector for Logistic Regression.
27
28
  # @return [Numo::DFloat] (shape: [n_classes, n_features])
@@ -47,23 +48,23 @@ module SVMKit
47
48
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
48
49
  # @param max_iter [Integer] The maximum number of iterations.
49
50
  # @param batch_size [Integer] The size of the mini batches.
50
- # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
51
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
52
+ # Nadam is selected automatically on current version.
51
53
  # @param random_seed [Integer] The seed value using to initialize the random generator.
52
54
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
53
- max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
54
- SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
55
- SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
56
- SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
57
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
58
- SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter,
59
- batch_size: batch_size)
55
+ max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
56
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale)
57
+ check_params_integer(max_iter: max_iter, batch_size: batch_size)
58
+ check_params_boolean(fit_bias: fit_bias)
59
+ check_params_type_or_nil(Integer, random_seed: random_seed)
60
+ check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
60
61
  @params = {}
61
62
  @params[:reg_param] = reg_param
62
63
  @params[:fit_bias] = fit_bias
63
64
  @params[:bias_scale] = bias_scale
64
65
  @params[:max_iter] = max_iter
65
66
  @params[:batch_size] = batch_size
66
- @params[:normalize] = normalize
67
+ @params[:optimizer] = optimizer
67
68
  @params[:random_seed] = random_seed
68
69
  @params[:random_seed] ||= srand
69
70
  @weight_vec = nil
@@ -78,9 +79,9 @@ module SVMKit
78
79
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
79
80
  # @return [LogisticRegression] The learned classifier itself.
80
81
  def fit(x, y)
81
- SVMKit::Validation.check_sample_array(x)
82
- SVMKit::Validation.check_label_array(y)
83
- SVMKit::Validation.check_sample_label_size(x, y)
82
+ check_sample_array(x)
83
+ check_label_array(y)
84
+ check_sample_label_size(x, y)
84
85
 
85
86
  @classes = Numo::Int32[*y.to_a.uniq.sort]
86
87
  n_classes = @classes.size
@@ -109,8 +110,7 @@ module SVMKit
109
110
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
110
111
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
111
112
  def decision_function(x)
112
- SVMKit::Validation.check_sample_array(x)
113
-
113
+ check_sample_array(x)
114
114
  x.dot(@weight_vec.transpose) + @bias_term
115
115
  end
116
116
 
@@ -119,7 +119,7 @@ module SVMKit
119
119
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
120
120
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
121
121
  def predict(x)
122
- SVMKit::Validation.check_sample_array(x)
122
+ check_sample_array(x)
123
123
 
124
124
  return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
125
125
 
@@ -133,7 +133,7 @@ module SVMKit
133
133
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
134
134
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
135
135
  def predict_proba(x)
136
- SVMKit::Validation.check_sample_array(x)
136
+ check_sample_array(x)
137
137
 
138
138
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
139
139
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
@@ -168,40 +168,41 @@ module SVMKit
168
168
 
169
169
  private
170
170
 
171
- def binary_fit(x, bin_y)
171
+ def binary_fit(x, y)
172
172
  # Expand feature vectors for bias term.
173
173
  samples = @params[:fit_bias] ? expand_feature(x) : x
174
174
  # Initialize some variables.
175
175
  n_samples, n_features = samples.shape
176
176
  rand_ids = [*0...n_samples].shuffle(random: @rng)
177
177
  weight_vec = Numo::DFloat.zeros(n_features)
178
+ optimizer = Optimizer::Nadam.new
178
179
  # Start optimization.
179
- @params[:max_iter].times do |t|
180
+ @params[:max_iter].times do |_t|
180
181
  # random sampling
181
182
  subset_ids = rand_ids.shift(@params[:batch_size])
182
183
  rand_ids.concat(subset_ids)
183
- # update the weight vector.
184
- df = samples[subset_ids, true].dot(weight_vec.transpose)
185
- coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0) - bin_y[subset_ids]
186
- mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
187
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
188
- # scale the weight vector.
189
- normalize_weight_vec(weight_vec) if @params[:normalize]
184
+ data = samples[subset_ids, true]
185
+ labels = y[subset_ids]
186
+ # calculate gradient for loss function.
187
+ loss_grad = loss_gradient(data, labels, weight_vec)
188
+ # update weight.
189
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
190
190
  end
191
191
  split_weight_vec_bias(weight_vec)
192
192
  end
193
193
 
194
- def expand_feature(x)
195
- Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
194
+ def loss_gradient(x, y, weight)
195
+ z = x.dot(weight)
196
+ grad = y / (Numo::NMath.exp(-y * z) + 1.0) - y
197
+ grad
196
198
  end
197
199
 
198
- def learning_rate(iter)
199
- 1.0 / (@params[:reg_param] * (iter + 1))
200
+ def weight_gradient(loss_grad, x, weight)
201
+ x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
200
202
  end
201
203
 
202
- def normalize_weight_vec(weight_vec)
203
- norm = Math.sqrt(weight_vec.dot(weight_vec))
204
- weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
204
+ def expand_feature(x)
205
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
205
206
  end
206
207
 
207
208
  def split_weight_vec_bias(weight_vec)
@@ -3,22 +3,19 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module LinearModel
9
10
  # Ridge is a class that implements Ridge Regression
10
- # with stochastic gradient descent (SGD) optimization.
11
+ # with mini-batch stochastic gradient descent optimization.
11
12
  #
12
13
  # @example
13
14
  # estimator =
14
- # SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 5000, batch_size: 50, random_seed: 1)
15
+ # SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
16
  # estimator.fit(training_samples, traininig_values)
16
17
  # results = estimator.predict(testing_samples)
17
18
  #
18
- # *Reference*
19
- # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
- # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
21
- # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
22
19
  class Ridge
23
20
  include Base::BaseEstimator
24
21
  include Base::Regressor
@@ -40,30 +37,21 @@ module SVMKit
40
37
  #
41
38
  # @param reg_param [Float] The regularization parameter.
42
39
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
43
- # @param learning_rate [Float] The learning rate for optimization.
44
- # @param decay [Float] The discounting factor for RMS prop optimization.
45
- # @param momentum [Float] The Nesterov momentum for optimization.
46
40
  # @param max_iter [Integer] The maximum number of iterations.
47
41
  # @param batch_size [Integer] The size of the mini batches.
48
42
  # @param random_seed [Integer] The seed value using to initialize the random generator.
49
- def initialize(reg_param: 1.0, fit_bias: false, learning_rate: 0.01, decay: 0.9, momentum: 0.9,
50
- max_iter: 1000, batch_size: 10, random_seed: nil)
51
- check_params_float(reg_param: reg_param,
52
- learning_rate: learning_rate, decay: decay, momentum: momentum)
43
+ def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
44
+ check_params_float(reg_param: reg_param)
53
45
  check_params_integer(max_iter: max_iter, batch_size: batch_size)
54
46
  check_params_boolean(fit_bias: fit_bias)
55
47
  check_params_type_or_nil(Integer, random_seed: random_seed)
56
- check_params_positive(reg_param: reg_param,
57
- learning_rate: learning_rate, decay: decay, momentum: momentum,
58
- max_iter: max_iter, batch_size: batch_size)
48
+ check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
59
49
  @params = {}
60
50
  @params[:reg_param] = reg_param
61
51
  @params[:fit_bias] = fit_bias
62
- @params[:learning_rate] = learning_rate
63
- @params[:decay] = decay
64
- @params[:momentum] = momentum
65
52
  @params[:max_iter] = max_iter
66
53
  @params[:batch_size] = batch_size
54
+ @params[:optimizer] = optimizer
67
55
  @params[:random_seed] = random_seed
68
56
  @params[:random_seed] ||= srand
69
57
  @weight_vec = nil
@@ -136,8 +124,7 @@ module SVMKit
136
124
  n_samples, n_features = samples.shape
137
125
  rand_ids = [*0...n_samples].shuffle(random: @rng)
138
126
  weight_vec = Numo::DFloat.zeros(n_features)
139
- weight_sqrsum = Numo::DFloat.zeros(n_features)
140
- weight_update = Numo::DFloat.zeros(n_features)
127
+ optimizer = Optimizer::Nadam.new
141
128
  # Start optimization.
142
129
  @params[:max_iter].times do |_t|
143
130
  # Random sampling.
@@ -146,12 +133,10 @@ module SVMKit
146
133
  data = samples[subset_ids, true]
147
134
  values = y[subset_ids]
148
135
  # Calculate gradients for loss function.
149
- loss_grad = loss_gradient(data, values, weight_vec - @params[:momentum] * weight_update)
136
+ loss_grad = loss_gradient(data, values, weight_vec)
150
137
  next if loss_grad.ne(0.0).count.zero?
151
138
  # Update weight.
152
- weight_vec, weight_sqrsum, weight_update =
153
- update_weight(weight_vec, weight_sqrsum, weight_update,
154
- weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
139
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
155
140
  end
156
141
  split_weight_vec_bias(weight_vec)
157
142
  end
@@ -164,13 +149,6 @@ module SVMKit
164
149
  (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param] * weight
165
150
  end
166
151
 
167
- def update_weight(weight, sqrsum, update, gr)
168
- new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
169
- new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
170
- new_weight = weight - (new_update + @params[:momentum] * update)
171
- [new_weight, new_sqrsum, new_update]
172
- end
173
-
174
152
  def expand_feature(x)
175
153
  Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
176
154
  end
@@ -3,26 +3,28 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/optimizer/nadam'
6
7
  require 'svmkit/probabilistic_output'
7
8
 
8
9
  module SVMKit
9
10
  # This module consists of the classes that implement generalized linear models.
10
11
  module LinearModel
11
12
  # SVC is a class that implements Support Vector Classifier
12
- # with stochastic gradient descent (SGD) optimization.
13
+ # with mini-batch stochastic gradient descent optimization.
13
14
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
14
15
  #
15
16
  # @example
16
17
  # estimator =
17
- # SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
18
+ # SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
18
19
  # estimator.fit(training_samples, traininig_labels)
19
20
  # results = estimator.predict(testing_samples)
20
21
  #
21
22
  # *Reference*
22
- # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
23
+ # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
23
24
  class SVC
24
25
  include Base::BaseEstimator
25
26
  include Base::Classifier
27
+ include Validation
26
28
 
27
29
  # Return the weight vector for SVC.
28
30
  # @return [Numo::DFloat] (shape: [n_classes, n_features])
@@ -48,16 +50,16 @@ module SVMKit
48
50
  # @param max_iter [Integer] The maximum number of iterations.
49
51
  # @param batch_size [Integer] The size of the mini batches.
50
52
  # @param probability [Boolean] The flag indicating whether to perform probability estimation.
51
- # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
53
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
54
+ # Nadam is selected automatically on current version.
52
55
  # @param random_seed [Integer] The seed value using to initialize the random generator.
53
56
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
54
- max_iter: 100, batch_size: 50, probability: false, normalize: true, random_seed: nil)
55
- SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
56
- SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
57
- SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, probability: probability, normalize: normalize)
58
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
59
- SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter,
60
- batch_size: batch_size)
57
+ max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
58
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale)
59
+ check_params_integer(max_iter: max_iter, batch_size: batch_size)
60
+ check_params_boolean(fit_bias: fit_bias, probability: probability)
61
+ check_params_type_or_nil(Integer, random_seed: random_seed)
62
+ check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
61
63
  @params = {}
62
64
  @params[:reg_param] = reg_param
63
65
  @params[:fit_bias] = fit_bias
@@ -65,7 +67,7 @@ module SVMKit
65
67
  @params[:max_iter] = max_iter
66
68
  @params[:batch_size] = batch_size
67
69
  @params[:probability] = probability
68
- @params[:normalize] = normalize
70
+ @params[:optimizer] = optimizer
69
71
  @params[:random_seed] = random_seed
70
72
  @params[:random_seed] ||= srand
71
73
  @weight_vec = nil
@@ -81,9 +83,9 @@ module SVMKit
81
83
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
82
84
  # @return [SVC] The learned classifier itself.
83
85
  def fit(x, y)
84
- SVMKit::Validation.check_sample_array(x)
85
- SVMKit::Validation.check_label_array(y)
86
- SVMKit::Validation.check_sample_label_size(x, y)
86
+ check_sample_array(x)
87
+ check_label_array(y)
88
+ check_sample_label_size(x, y)
87
89
 
88
90
  @classes = Numo::Int32[*y.to_a.uniq.sort]
89
91
  n_classes = @classes.size
@@ -123,8 +125,7 @@ module SVMKit
123
125
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
124
126
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
125
127
  def decision_function(x)
126
- SVMKit::Validation.check_sample_array(x)
127
-
128
+ check_sample_array(x)
128
129
  x.dot(@weight_vec.transpose) + @bias_term
129
130
  end
130
131
 
@@ -133,7 +134,7 @@ module SVMKit
133
134
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
134
135
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
135
136
  def predict(x)
136
- SVMKit::Validation.check_sample_array(x)
137
+ check_sample_array(x)
137
138
 
138
139
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
139
140
 
@@ -147,7 +148,7 @@ module SVMKit
147
148
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
148
149
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
149
150
  def predict_proba(x)
150
- SVMKit::Validation.check_sample_array(x)
151
+ check_sample_array(x)
151
152
 
152
153
  if @classes.size > 2
153
154
  probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
@@ -186,43 +187,43 @@ module SVMKit
186
187
 
187
188
  private
188
189
 
189
- def binary_fit(x, bin_y)
190
+ def binary_fit(x, y)
190
191
  # Expand feature vectors for bias term.
191
192
  samples = @params[:fit_bias] ? expand_feature(x) : x
192
193
  # Initialize some variables.
193
194
  n_samples, n_features = samples.shape
194
195
  rand_ids = [*0...n_samples].shuffle(random: @rng)
195
196
  weight_vec = Numo::DFloat.zeros(n_features)
197
+ optimizer = Optimizer::Nadam.new
196
198
  # Start optimization.
197
- @params[:max_iter].times do |t|
198
- # random sampling
199
+ @params[:max_iter].times do |_t|
200
+ # random sampling.
199
201
  subset_ids = rand_ids.shift(@params[:batch_size])
200
202
  rand_ids.concat(subset_ids)
201
- sub_samples = samples[subset_ids, true]
202
- sub_bin_y = bin_y[subset_ids]
203
- target_ids = (sub_samples.dot(weight_vec.transpose) * sub_bin_y).lt(1.0).where
204
- n_targets = target_ids.size
205
- next if n_targets.zero?
206
- # update the weight vector.
207
- mean_vec = sub_samples[target_ids, true].transpose.dot(sub_bin_y[target_ids]) / n_targets
208
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
209
- # scale the weight vector.
210
- normalize_weight_vec(weight_vec) if @params[:normalize]
203
+ data = samples[subset_ids, true]
204
+ labels = y[subset_ids]
205
+ # calculate gradient for loss function.
206
+ loss_grad = loss_gradient(data, labels, weight_vec)
207
+ next if loss_grad.ne(0.0).count.zero?
208
+ # update weight.
209
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
211
210
  end
212
211
  split_weight_vec_bias(weight_vec)
213
212
  end
214
213
 
215
- def expand_feature(x)
216
- Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
214
+ def loss_gradient(x, y, weight)
215
+ target_ids = (x.dot(weight) * y).lt(1.0).where
216
+ grad = Numo::DFloat.zeros(@params[:batch_size])
217
+ grad[target_ids] = -y[target_ids]
218
+ grad
217
219
  end
218
220
 
219
- def learning_rate(iter)
220
- 1.0 / (@params[:reg_param] * (iter + 1))
221
+ def weight_gradient(loss_grad, x, weight)
222
+ x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
221
223
  end
222
224
 
223
- def normalize_weight_vec(weight_vec)
224
- norm = Math.sqrt(weight_vec.dot(weight_vec))
225
- weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
225
+ def expand_feature(x)
226
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
226
227
  end
227
228
 
228
229
  def split_weight_vec_bias(weight_vec)
@@ -3,15 +3,16 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module LinearModel
9
10
  # SVR is a class that implements Support Vector Regressor
10
- # with stochastic gradient descent (SGD) optimization.
11
+ # with mini-batch stochastic gradient descent optimization.
11
12
  #
12
13
  # @example
13
14
  # estimator =
14
- # SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 100, batch_size: 20, random_seed: 1)
15
+ # SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
15
16
  # estimator.fit(training_samples, traininig_target_values)
16
17
  # results = estimator.predict(testing_samples)
17
18
  #
@@ -20,6 +21,7 @@ module SVMKit
20
21
  class SVR
21
22
  include Base::BaseEstimator
22
23
  include Base::Regressor
24
+ include Validation
23
25
 
24
26
  # Return the weight vector for SVR.
25
27
  # @return [Numo::DFloat] (shape: [n_outputs, n_features])
@@ -41,16 +43,17 @@ module SVMKit
41
43
  # @param epsilon [Float] The margin of tolerance.
42
44
  # @param max_iter [Integer] The maximum number of iterations.
43
45
  # @param batch_size [Integer] The size of the mini batches.
44
- # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
46
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
47
+ # Nadam is selected automatically on current version.
45
48
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
49
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
47
- max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
48
- SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
49
- SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
50
- SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
51
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
52
- SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
53
- max_iter: max_iter, batch_size: batch_size)
50
+ max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
51
+ check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
52
+ check_params_integer(max_iter: max_iter, batch_size: batch_size)
53
+ check_params_boolean(fit_bias: fit_bias)
54
+ check_params_type_or_nil(Integer, random_seed: random_seed)
55
+ check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
56
+ max_iter: max_iter, batch_size: batch_size)
54
57
  @params = {}
55
58
  @params[:reg_param] = reg_param
56
59
  @params[:fit_bias] = fit_bias
@@ -58,7 +61,7 @@ module SVMKit
58
61
  @params[:epsilon] = epsilon
59
62
  @params[:max_iter] = max_iter
60
63
  @params[:batch_size] = batch_size
61
- @params[:normalize] = normalize
64
+ @params[:optimizer] = optimizer
62
65
  @params[:random_seed] = random_seed
63
66
  @params[:random_seed] ||= srand
64
67
  @weight_vec = nil
@@ -72,9 +75,9 @@ module SVMKit
72
75
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
73
76
  # @return [SVR] The learned regressor itself.
74
77
  def fit(x, y)
75
- SVMKit::Validation.check_sample_array(x)
76
- SVMKit::Validation.check_tvalue_array(y)
77
- SVMKit::Validation.check_sample_tvalue_size(x, y)
78
+ check_sample_array(x)
79
+ check_tvalue_array(y)
80
+ check_sample_tvalue_size(x, y)
78
81
 
79
82
  n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
80
83
  _n_samples, n_features = x.shape
@@ -99,7 +102,7 @@ module SVMKit
99
102
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
100
103
  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
101
104
  def predict(x)
102
- SVMKit::Validation.check_sample_array(x)
105
+ check_sample_array(x)
103
106
  x.dot(@weight_vec.transpose) + @bias_term
104
107
  end
105
108
 
@@ -131,35 +134,35 @@ module SVMKit
131
134
  n_samples, n_features = samples.shape
132
135
  rand_ids = [*0...n_samples].shuffle(random: @rng)
133
136
  weight_vec = Numo::DFloat.zeros(n_features)
137
+ optimizer = Optimizer::Nadam.new
134
138
  # Start optimization.
135
- @params[:max_iter].times do |t|
139
+ @params[:max_iter].times do |_t|
136
140
  # random sampling
137
141
  subset_ids = rand_ids.shift(@params[:batch_size])
138
142
  rand_ids.concat(subset_ids)
143
+ data = samples[subset_ids, true]
144
+ values = y[subset_ids]
139
145
  # update the weight vector.
140
- z = samples[subset_ids, true].dot(weight_vec.transpose)
141
- coef = Numo::DFloat.zeros(@params[:batch_size])
142
- coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
143
- coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
144
- mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
145
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
146
- # scale the weight vector.
147
- normalize_weight_vec(weight_vec) if @params[:normalize]
146
+ loss_grad = loss_gradient(data, values, weight_vec)
147
+ weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
148
148
  end
149
149
  split_weight_vec_bias(weight_vec)
150
150
  end
151
151
 
152
- def expand_feature(x)
153
- Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
152
+ def loss_gradient(x, y, weight)
153
+ z = x.dot(weight)
154
+ grad = Numo::DFloat.zeros(@params[:batch_size])
155
+ grad[(z - y).gt(@params[:epsilon]).where] = 1
156
+ grad[(y - z).gt(@params[:epsilon]).where] = -1
157
+ grad
154
158
  end
155
159
 
156
- def learning_rate(iter)
157
- 1.0 / (@params[:reg_param] * (iter + 1))
160
+ def weight_gradient(loss_grad, x, weight)
161
+ x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
158
162
  end
159
163
 
160
- def normalize_weight_vec(weight_vec)
161
- norm = Math.sqrt(weight_vec.dot(weight_vec))
162
- weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
164
+ def expand_feature(x)
165
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
163
166
  end
164
167
 
165
168
  def split_weight_vec_bias(weight_vec)
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+
5
+ module SVMKit
6
+ # This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
7
+ module Optimizer
8
+ # Nadam is a class that implements Nadam optimizer.
9
+ # This class is used for internal processes.
10
+ #
11
+ # *Reference*
12
+ # - T. Dozat, "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
13
+ class Nadam
14
+ include Validation
15
+
16
+ # Create a new optimizer with Nadam
17
+ #
18
+ # @param learning_rate [Float] The initial value of learning rate.
19
+ # @param momentum [Float] The initial value of momentum.
20
+ # @param decay1 [Float] The smoothing parameter for the first moment.
21
+ # @param decay2 [Float] The smoothing parameter for the second moment.
22
+ # @param schedule_decay [Float] The smooting parameter.
23
+ def initialize(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
24
+ check_params_float(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
25
+ check_params_positive(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
26
+ @params = {}
27
+ @params[:learning_rate] = learning_rate
28
+ @params[:momentum] = momentum
29
+ @params[:decay1] = decay1
30
+ @params[:decay2] = decay2
31
+ @fst_moment = nil
32
+ @sec_moment = nil
33
+ @decay1_prod = 1.0
34
+ @iter = 0
35
+ end
36
+
37
+ # Calculate the updated weight with Nadam adaptive learning rate.
38
+ #
39
+ # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
40
+ # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
41
+ # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
42
+ def call(weight, gradient)
43
+ @fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
44
+ @sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
45
+
46
+ @iter += 1
47
+
48
+ decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
49
+ decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
50
+ decay1_prod_curr = @decay1_prod * decay1_curr
51
+ decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
52
+ @decay1_prod = decay1_prod_curr
53
+
54
+ @fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
55
+ @sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
56
+ nm_gradient = gradient / (1.0 - decay1_prod_curr)
57
+ nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
58
+ nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
59
+
60
+ weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
61
+ end
62
+ end
63
+ end
64
+ end
@@ -3,6 +3,7 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  # This module consists of the classes that implement polynomial models.
@@ -14,7 +15,7 @@ module SVMKit
14
15
  # @example
15
16
  # estimator =
16
17
  # SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
17
- # n_factors: 10, loss: 'hinge', reg_param_bias: 0.001, reg_param_weight: 0.001, reg_param_factor: 0.001,
18
+ # n_factors: 10, loss: 'hinge', reg_param_linear: 0.001, reg_param_factor: 0.001,
18
19
  # max_iter: 5000, batch_size: 50, random_seed: 1)
19
20
  # estimator.fit(training_samples, traininig_labels)
20
21
  # results = estimator.predict(testing_samples)
@@ -25,6 +26,7 @@ module SVMKit
25
26
  class FactorizationMachineClassifier
26
27
  include Base::BaseEstimator
27
28
  include Base::Classifier
29
+ include Validation
28
30
 
29
31
  # Return the factor matrix for Factorization Machine.
30
32
  # @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
@@ -50,32 +52,30 @@ module SVMKit
50
52
  #
51
53
  # @param n_factors [Integer] The maximum number of iterations.
52
54
  # @param loss [String] The loss function ('hinge' or 'logistic').
53
- # @param reg_param_bias [Float] The regularization parameter for bias term.
54
- # @param reg_param_weight [Float] The regularization parameter for weight vector.
55
+ # @param reg_param_linear [Float] The regularization parameter for linear model.
55
56
  # @param reg_param_factor [Float] The regularization parameter for factor matrix.
56
- # @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
57
57
  # @param max_iter [Integer] The maximum number of iterations.
58
58
  # @param batch_size [Integer] The size of the mini batches.
59
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
60
+ # Nadam is selected automatically on current version.
59
61
  # @param random_seed [Integer] The seed value using to initialize the random generator.
60
- def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
61
- init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
62
- SVMKit::Validation.check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
63
- reg_param_factor: reg_param_factor, init_std: init_std)
64
- SVMKit::Validation.check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
65
- SVMKit::Validation.check_params_string(loss: loss)
66
- SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
67
- SVMKit::Validation.check_params_positive(n_factors: n_factors, reg_param_bias: reg_param_bias,
68
- reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
69
- max_iter: max_iter, batch_size: batch_size)
62
+ def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
63
+ max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
64
+ check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
65
+ check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
66
+ check_params_string(loss: loss)
67
+ check_params_type_or_nil(Integer, random_seed: random_seed)
68
+ check_params_positive(n_factors: n_factors,
69
+ reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
70
+ max_iter: max_iter, batch_size: batch_size)
70
71
  @params = {}
71
72
  @params[:n_factors] = n_factors
72
73
  @params[:loss] = loss
73
- @params[:reg_param_bias] = reg_param_bias
74
- @params[:reg_param_weight] = reg_param_weight
74
+ @params[:reg_param_linear] = reg_param_linear
75
75
  @params[:reg_param_factor] = reg_param_factor
76
- @params[:init_std] = init_std
77
76
  @params[:max_iter] = max_iter
78
77
  @params[:batch_size] = batch_size
78
+ @params[:optimizer] = optimizer
79
79
  @params[:random_seed] = random_seed
80
80
  @params[:random_seed] ||= srand
81
81
  @factor_mat = nil
@@ -91,9 +91,9 @@ module SVMKit
91
91
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
92
92
  # @return [FactorizationMachineClassifier] The learned classifier itself.
93
93
  def fit(x, y)
94
- SVMKit::Validation.check_sample_array(x)
95
- SVMKit::Validation.check_label_array(y)
96
- SVMKit::Validation.check_sample_label_size(x, y)
94
+ check_sample_array(x)
95
+ check_label_array(y)
96
+ check_sample_label_size(x, y)
97
97
 
98
98
  @classes = Numo::Int32[*y.to_a.uniq.sort]
99
99
  n_classes = @classes.size
@@ -124,7 +124,7 @@ module SVMKit
124
124
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
125
125
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
126
126
  def decision_function(x)
127
- SVMKit::Validation.check_sample_array(x)
127
+ check_sample_array(x)
128
128
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
129
129
  factor_term = if @classes.size <= 2
130
130
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
@@ -139,7 +139,7 @@ module SVMKit
139
139
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
140
140
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
141
141
  def predict(x)
142
- SVMKit::Validation.check_sample_array(x)
142
+ check_sample_array(x)
143
143
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
144
144
 
145
145
  n_samples, = x.shape
@@ -152,7 +152,7 @@ module SVMKit
152
152
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
153
153
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
154
154
  def predict_proba(x)
155
- SVMKit::Validation.check_sample_array(x)
155
+ check_sample_array(x)
156
156
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
157
157
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
158
158
 
@@ -188,84 +188,76 @@ module SVMKit
188
188
 
189
189
  private
190
190
 
191
- def binary_fit(x, bin_y)
191
+ def binary_fit(x, y)
192
192
  # Initialize some variables.
193
193
  n_samples, n_features = x.shape
194
194
  rand_ids = [*0...n_samples].shuffle(random: @rng)
195
- factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
196
- weight_vec = Numo::DFloat.zeros(n_features)
197
- bias_term = 0.0
195
+ weight_vec = Numo::DFloat.zeros(n_features + 1)
196
+ factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
197
+ weight_optimizer = Optimizer::Nadam.new
198
+ factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
198
199
  # Start optimization.
199
- @params[:max_iter].times do |t|
200
+ @params[:max_iter].times do |_t|
200
201
  # Random sampling.
201
202
  subset_ids = rand_ids.shift(@params[:batch_size])
202
203
  rand_ids.concat(subset_ids)
203
204
  data = x[subset_ids, true]
204
- label = bin_y[subset_ids]
205
+ ex_data = expand_feature(data)
206
+ label = y[subset_ids]
205
207
  # Calculate gradients for loss function.
206
- loss_grad = loss_gradient(data, label, factor_mat, weight_vec, bias_term)
208
+ loss_grad = loss_gradient(data, ex_data, label, factor_mat, weight_vec)
207
209
  next if loss_grad.ne(0.0).count.zero?
208
210
  # Update each parameter.
209
- bias_term -= learning_rate(@params[:reg_param_bias], t) * bias_gradient(loss_grad, bias_term)
210
- weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
211
+ weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
211
212
  @params[:n_factors].times do |n|
212
- factor_mat[n, true] -= learning_rate(@params[:reg_param_factor], t) *
213
- factor_gradient(loss_grad, data, factor_mat[n, true])
213
+ factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
214
+ factor_gradient(loss_grad, data, factor_mat[n, true]))
214
215
  end
215
216
  end
216
- [factor_mat, weight_vec, bias_term]
217
+ [factor_mat, *split_weight_vec_bias(weight_vec)]
217
218
  end
218
219
 
219
- def bin_decision_function(x, factor, weight, bias)
220
- bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
220
+ def bin_decision_function(x, ex_x, factor, weight)
221
+ ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
221
222
  end
222
223
 
223
- def hinge_loss_gradient(x, y, factor, weight, bias)
224
- evaluated = y * bin_decision_function(x, factor, weight, bias)
224
+ def hinge_loss_gradient(x, ex_x, y, factor, weight)
225
+ evaluated = y * bin_decision_function(x, ex_x, factor, weight)
225
226
  gradient = Numo::DFloat.zeros(evaluated.size)
226
227
  gradient[evaluated < 1.0] = -y[evaluated < 1.0]
227
228
  gradient
228
229
  end
229
230
 
230
- def logistic_loss_gradient(x, y, factor, weight, bias)
231
- evaluated = y * bin_decision_function(x, factor, weight, bias)
231
+ def logistic_loss_gradient(x, ex_x, y, factor, weight)
232
+ evaluated = y * bin_decision_function(x, ex_x, factor, weight)
232
233
  sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
233
234
  (sigmoid_func - 1.0) * y
234
235
  end
235
236
 
236
- def loss_gradient(x, y, factor, weight, bias)
237
+ def loss_gradient(x, ex_x, y, factor, weight)
237
238
  if @params[:loss] == 'hinge'
238
- hinge_loss_gradient(x, y, factor, weight, bias)
239
+ hinge_loss_gradient(x, ex_x, y, factor, weight)
239
240
  else
240
- logistic_loss_gradient(x, y, factor, weight, bias)
241
+ logistic_loss_gradient(x, ex_x, y, factor, weight)
241
242
  end
242
243
  end
243
244
 
244
- def learning_rate(reg_param, iter)
245
- 1.0 / (reg_param * (iter + 1))
246
- end
247
-
248
- def bias_gradient(loss_grad, bias)
249
- loss_grad.mean + @params[:reg_param_bias] * bias
250
- end
251
-
252
245
  def weight_gradient(loss_grad, data, weight)
253
- (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
246
+ (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
254
247
  end
255
248
 
256
249
  def factor_gradient(loss_grad, data, factor)
257
- reg_term = @params[:reg_param_factor] * factor
258
- (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + reg_term
250
+ (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
259
251
  end
260
252
 
261
- def rand_uniform(shape)
262
- Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
253
+ def expand_feature(x)
254
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
263
255
  end
264
256
 
265
- def rand_normal(shape, mu, sigma)
266
- a = rand_uniform(shape)
267
- b = rand_uniform(shape)
268
- mu + sigma * (Numo::NMath.sqrt(-2.0 * Numo::NMath.log(a)) * Numo::NMath.sin(2.0 * Math::PI * b))
257
+ def split_weight_vec_bias(weight_vec)
258
+ weights = weight_vec[0...-1]
259
+ bias = weight_vec[-1]
260
+ [weights, bias]
269
261
  end
270
262
  end
271
263
  end
@@ -3,6 +3,7 @@
3
3
  require 'svmkit/validation'
4
4
  require 'svmkit/base/base_estimator'
5
5
  require 'svmkit/base/regressor'
6
+ require 'svmkit/optimizer/nadam'
6
7
 
7
8
  module SVMKit
8
9
  module PolynomialModel
@@ -12,7 +13,7 @@ module SVMKit
12
13
  # @example
13
14
  # estimator =
14
15
  # SVMKit::PolynomialModel::FactorizationMachineRegressor.new(
15
- # n_factors: 10, reg_param_bias: 0.1, reg_param_weight: 0.1, reg_param_factor: 0.1,
16
+ # n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
16
17
  # max_iter: 5000, batch_size: 50, random_seed: 1)
17
18
  # estimator.fit(training_samples, traininig_values)
18
19
  # results = estimator.predict(testing_samples)
@@ -20,8 +21,6 @@ module SVMKit
20
21
  # *Reference*
21
22
  # - S. Rendle, "Factorization Machines with libFM," ACM Transactions on Intelligent Systems and Technology, vol. 3 (3), pp. 57:1--57:22, 2012.
22
23
  # - S. Rendle, "Factorization Machines," Proc. the 10th IEEE International Conference on Data Mining (ICDM'10), pp. 995--1000, 2010.
23
- # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. the 30th International Conference on Machine Learning (ICML' 13), pp. 1139--1147, 2013.
24
- # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
25
24
  class FactorizationMachineRegressor
26
25
  include Base::BaseEstimator
27
26
  include Base::Regressor
@@ -46,40 +45,27 @@ module SVMKit
46
45
  # Create a new regressor with Factorization Machine.
47
46
  #
48
47
  # @param n_factors [Integer] The maximum number of iterations.
49
- # @param reg_param_bias [Float] The regularization parameter for bias term.
50
- # @param reg_param_weight [Float] The regularization parameter for weight vector.
48
+ # @param reg_param_linear [Float] The regularization parameter for linear model.
51
49
  # @param reg_param_factor [Float] The regularization parameter for factor matrix.
52
- # @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
53
- # @param learning_rate [Float] The learning rate for optimization.
54
- # @param decay [Float] The discounting factor for RMS prop optimization.
55
- # @param momentum [Float] The Nesterov momentum for optimization.
56
50
  # @param max_iter [Integer] The maximum number of iterations.
57
51
  # @param batch_size [Integer] The size of the mini batches.
52
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
53
+ # Nadam is selected automatically on current version.
58
54
  # @param random_seed [Integer] The seed value using to initialize the random generator.
59
- def initialize(n_factors: 2,
60
- reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0, init_std: 0.01,
61
- learning_rate: 0.01, decay: 0.9, momentum: 0.9,
62
- max_iter: 1000, batch_size: 10, random_seed: nil)
63
- check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
64
- reg_param_factor: reg_param_factor, init_std: init_std,
65
- learning_rate: learning_rate, decay: decay, momentum: momentum)
55
+ def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
56
+ max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
57
+ check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
66
58
  check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
67
59
  check_params_type_or_nil(Integer, random_seed: random_seed)
68
- check_params_positive(n_factors: n_factors, reg_param_bias: reg_param_bias,
69
- reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
70
- learning_rate: learning_rate, decay: decay, momentum: momentum,
60
+ check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
71
61
  max_iter: max_iter, batch_size: batch_size)
72
62
  @params = {}
73
63
  @params[:n_factors] = n_factors
74
- @params[:reg_param_bias] = reg_param_bias
75
- @params[:reg_param_weight] = reg_param_weight
64
+ @params[:reg_param_linear] = reg_param_linear
76
65
  @params[:reg_param_factor] = reg_param_factor
77
- @params[:init_std] = init_std
78
- @params[:learning_rate] = learning_rate
79
- @params[:decay] = decay
80
- @params[:momentum] = momentum
81
66
  @params[:max_iter] = max_iter
82
67
  @params[:batch_size] = batch_size
68
+ @params[:optimizer] = optimizer
83
69
  @params[:random_seed] = random_seed
84
70
  @params[:random_seed] ||= srand
85
71
  @factor_mat = nil
@@ -160,74 +146,52 @@ module SVMKit
160
146
  # Initialize some variables.
161
147
  n_samples, n_features = x.shape
162
148
  rand_ids = [*0...n_samples].shuffle(random: @rng)
163
- factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
164
- factor_sqrsum = Numo::DFloat.zeros(factor_mat.shape)
165
- factor_update = Numo::DFloat.zeros(factor_mat.shape)
166
- weight_vec = Numo::DFloat.zeros(n_features)
167
- weight_sqrsum = Numo::DFloat.zeros(n_features)
168
- weight_update = Numo::DFloat.zeros(n_features)
169
- bias_term = 0.0
170
- bias_sqrsum = 0.0
171
- bias_update = 0.0
149
+ weight_vec = Numo::DFloat.zeros(n_features + 1)
150
+ factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
151
+ weight_optimizer = Optimizer::Nadam.new
152
+ factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
172
153
  # Start optimization.
173
154
  @params[:max_iter].times do |_t|
174
155
  # Random sampling.
175
156
  subset_ids = rand_ids.shift(@params[:batch_size])
176
157
  rand_ids.concat(subset_ids)
177
158
  data = x[subset_ids, true]
159
+ ex_data = expand_feature(data)
178
160
  values = y[subset_ids]
179
161
  # Calculate gradients for loss function.
180
- loss_grad = loss_gradient(data, values,
181
- factor_mat - @params[:momentum] * factor_update,
182
- weight_vec - @params[:momentum] * weight_update,
183
- bias_term - @params[:momentum] * bias_update)
162
+ loss_grad = loss_gradient(data, ex_data, values, factor_mat, weight_vec)
184
163
  next if loss_grad.ne(0.0).count.zero?
185
164
  # Update each parameter.
186
- bias_term, bias_sqrsum, bias_update =
187
- update_param(bias_term, bias_sqrsum, bias_update,
188
- bias_gradient(loss_grad, bias_term - @params[:momentum] * bias_update))
189
- weight_vec, weight_sqrsum, weight_update =
190
- update_param(weight_vec, weight_sqrsum, weight_update,
191
- weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
165
+ weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
192
166
  @params[:n_factors].times do |n|
193
- factor_update[n, true], factor_sqrsum[n, true], factor_update[n, true] =
194
- update_param(factor_update[n, true], factor_sqrsum[n, true], factor_update[n, true],
195
- factor_gradient(loss_grad, data, factor_mat[n, true] - @params[:momentum] * factor_update[n, true]))
167
+ factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
168
+ factor_gradient(loss_grad, data, factor_mat[n, true]))
196
169
  end
197
170
  end
198
- [factor_mat, weight_vec, bias_term]
171
+ [factor_mat, *split_weight_vec_bias(weight_vec)]
199
172
  end
200
173
 
201
- def loss_gradient(x, y, factor, weight, bias)
202
- z = bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
174
+ def loss_gradient(x, ex_x, y, factor, weight)
175
+ z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
203
176
  2.0 * (z - y)
204
177
  end
205
178
 
206
- def bias_gradient(loss_grad, bias)
207
- loss_grad.mean + @params[:reg_param_bias] * bias
208
- end
209
-
210
179
  def weight_gradient(loss_grad, data, weight)
211
- (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
180
+ (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
212
181
  end
213
182
 
214
183
  def factor_gradient(loss_grad, data, factor)
215
184
  (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
216
185
  end
217
186
 
218
- def update_param(param, sqrsum, update, gr)
219
- new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
220
- new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
221
- new_param = param - (new_update + @params[:momentum] * update)
222
- [new_param, new_sqrsum, new_update]
223
- end
224
-
225
- def rand_uniform(shape)
226
- Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
187
+ def expand_feature(x)
188
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
227
189
  end
228
190
 
229
- def rand_normal(shape, mu, sigma)
230
- mu + sigma * (Numo::NMath.sqrt(-2.0 * Numo::NMath.log(rand_uniform(shape))) * Numo::NMath.sin(2.0 * Math::PI * rand_uniform(shape)))
191
+ def split_weight_vec_bias(weight_vec)
192
+ weights = weight_vec[0...-1]
193
+ bias = weight_vec[-1]
194
+ [weights, bias]
231
195
  end
232
196
  end
233
197
  end
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.3.3'.freeze
6
+ VERSION = '0.4.0'.freeze
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-25 00:00:00.000000000 Z
11
+ date: 2018-06-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -139,6 +139,7 @@ files:
139
139
  - lib/svmkit/naive_bayes/naive_bayes.rb
140
140
  - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
141
141
  - lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
142
+ - lib/svmkit/optimizer/nadam.rb
142
143
  - lib/svmkit/pairwise_metric.rb
143
144
  - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
144
145
  - lib/svmkit/polynomial_model/factorization_machine_regressor.rb