svmkit 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +22 -0
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/linear_model/lasso.rb +14 -32
- data/lib/svmkit/linear_model/logistic_regression.rb +37 -36
- data/lib/svmkit/linear_model/ridge.rb +10 -32
- data/lib/svmkit/linear_model/svc.rb +40 -39
- data/lib/svmkit/linear_model/svr.rb +34 -31
- data/lib/svmkit/optimizer/nadam.rb +64 -0
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +53 -61
- data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +30 -66
- data/lib/svmkit/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cef050a2ac6b55583414cb3ce9c3678dd6d2d1c8b2be04a249222683e10465e1
|
4
|
+
data.tar.gz: 7c67ab0e90246f1d9b7e5d0bfb19ed76061d0edf17a05014f521b8ef41e41aed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15341450f3bf3ca49901ae55b507d647468261682c7fdb0b058c21a470c2eec261718b6721ca0e2ad7738cfdabd184128a588d68ad6d079e53c9b1e916efa2b1
|
7
|
+
data.tar.gz: fd562db538be12896c005840e065f867e342691e899b33f0524a4db26da33439bfc174141e022d4de3d805657d09e854a4593b9b05b2d9eb99f6cd41da064a1d
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
# 0.4.0
|
2
|
+
## Breaking changes
|
3
|
+
|
4
|
+
SVMKit introduces optimizer algorithm that calculates learning rates adaptively
|
5
|
+
on each iteration of stochastic gradient descent (SGD).
|
6
|
+
While Pegasos SGD runs fast, it sometimes fails to optimize complicated models
|
7
|
+
like Factorization Machine.
|
8
|
+
To solve this problem, in version 0.3.3, SVMKit introduced optimization with RMSProp on
|
9
|
+
FactorizationMachineRegressor, Ridge and Lasso.
|
10
|
+
This attempt realized stable optimization of those estimators.
|
11
|
+
Following the success of the attempt, author decided to use modern optimizer algorithms
|
12
|
+
with all SGD optimizations in SVMKit.
|
13
|
+
Through some preliminary experiments, author implemented Nadam as the default optimizer.
|
14
|
+
SVMKit plans to add other optimizer algorithms sequentially, so that users can select them.
|
15
|
+
|
16
|
+
- Fix to use Nadam for optimization on SVC, SVR, LogisticRegression, Ridge, Lasso, and Factorization Machine estimators.
|
17
|
+
- Combine reg_param_weight and reg_param_bias parameters on Factorization Machine estimators into the unified parameter named reg_param_linear.
|
18
|
+
- Remove init_std paramter on Factorization Machine estimators.
|
19
|
+
- Remove learning_rate, decay, and momentum parameters on Ridge, Lasso, and FactorizationMachineRegressor.
|
20
|
+
- Remove normalize parameter on SVC, SVR, and LogisticRegression.
|
21
|
+
|
22
|
+
|
1
23
|
# 0.3.3
|
2
24
|
- Add class for Ridge regressor.
|
3
25
|
- Add class for Lasso regressor.
|
data/lib/svmkit.rb
CHANGED
@@ -13,6 +13,7 @@ require 'svmkit/base/regressor'
|
|
13
13
|
require 'svmkit/base/transformer'
|
14
14
|
require 'svmkit/base/splitter'
|
15
15
|
require 'svmkit/base/evaluator'
|
16
|
+
require 'svmkit/optimizer/nadam'
|
16
17
|
require 'svmkit/kernel_approximation/rbf'
|
17
18
|
require 'svmkit/linear_model/svc'
|
18
19
|
require 'svmkit/linear_model/svr'
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module LinearModel
|
@@ -11,15 +12,13 @@ module SVMKit
|
|
11
12
|
#
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
|
-
# SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter:
|
15
|
+
# SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
16
|
# estimator.fit(training_samples, traininig_values)
|
16
17
|
# results = estimator.predict(testing_samples)
|
17
18
|
#
|
18
19
|
# *Reference*
|
19
20
|
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
20
21
|
# - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
21
|
-
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
|
22
|
-
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
23
22
|
class Lasso
|
24
23
|
include Base::BaseEstimator
|
25
24
|
include Base::Regressor
|
@@ -41,30 +40,23 @@ module SVMKit
|
|
41
40
|
#
|
42
41
|
# @param reg_param [Float] The regularization parameter.
|
43
42
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
44
|
-
# @param learning_rate [Float] The learning rate for optimization.
|
45
|
-
# @param decay [Float] The discounting factor for RMS prop optimization.
|
46
|
-
# @param momentum [Float] The momentum for optimization.
|
47
43
|
# @param max_iter [Integer] The maximum number of iterations.
|
48
44
|
# @param batch_size [Integer] The size of the mini batches.
|
45
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
46
|
+
# Nadam is selected automatically on current version.
|
49
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
50
|
-
def initialize(reg_param: 1.0, fit_bias: false,
|
51
|
-
|
52
|
-
check_params_float(reg_param: reg_param,
|
53
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum)
|
48
|
+
def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
49
|
+
check_params_float(reg_param: reg_param)
|
54
50
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
55
51
|
check_params_boolean(fit_bias: fit_bias)
|
56
52
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
57
|
-
check_params_positive(reg_param: reg_param,
|
58
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum,
|
59
|
-
max_iter: max_iter, batch_size: batch_size)
|
53
|
+
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
60
54
|
@params = {}
|
61
55
|
@params[:reg_param] = reg_param
|
62
56
|
@params[:fit_bias] = fit_bias
|
63
|
-
@params[:learning_rate] = learning_rate
|
64
|
-
@params[:decay] = decay
|
65
|
-
@params[:momentum] = momentum
|
66
57
|
@params[:max_iter] = max_iter
|
67
58
|
@params[:batch_size] = batch_size
|
59
|
+
@params[:optimizer] = optimizer
|
68
60
|
@params[:random_seed] = random_seed
|
69
61
|
@params[:random_seed] ||= srand
|
70
62
|
@weight_vec = nil
|
@@ -138,11 +130,9 @@ module SVMKit
|
|
138
130
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
139
131
|
weight_vec = Numo::DFloat.zeros(n_features)
|
140
132
|
left_weight_vec = Numo::DFloat.zeros(n_features)
|
141
|
-
left_weight_sqrsum = Numo::DFloat.zeros(n_features)
|
142
|
-
left_weight_update = Numo::DFloat.zeros(n_features)
|
143
133
|
right_weight_vec = Numo::DFloat.zeros(n_features)
|
144
|
-
|
145
|
-
|
134
|
+
left_optimizer = Optimizer::Nadam.new
|
135
|
+
right_optimizer = Optimizer::Nadam.new
|
146
136
|
# Start optimization.
|
147
137
|
@params[:max_iter].times do |_t|
|
148
138
|
# Random sampling.
|
@@ -154,12 +144,8 @@ module SVMKit
|
|
154
144
|
loss_grad = loss_gradient(data, values, weight_vec)
|
155
145
|
next if loss_grad.ne(0.0).count.zero?
|
156
146
|
# Update weight.
|
157
|
-
left_weight_vec,
|
158
|
-
|
159
|
-
left_weight_gradient(loss_grad, data))
|
160
|
-
right_weight_vec, right_weight_sqrsum, right_weight_update =
|
161
|
-
update_weight(right_weight_vec, right_weight_sqrsum, right_weight_update,
|
162
|
-
right_weight_gradient(loss_grad, data))
|
147
|
+
left_weight_vec = round_weight(left_optimizer.call(left_weight_vec, left_weight_gradient(loss_grad, data)))
|
148
|
+
right_weight_vec = round_weight(right_optimizer.call(right_weight_vec, right_weight_gradient(loss_grad, data)))
|
163
149
|
weight_vec = left_weight_vec - right_weight_vec
|
164
150
|
end
|
165
151
|
split_weight_vec_bias(weight_vec)
|
@@ -177,12 +163,8 @@ module SVMKit
|
|
177
163
|
((@params[:reg_param] - loss_grad).expand_dims(1) * data).mean(0)
|
178
164
|
end
|
179
165
|
|
180
|
-
def
|
181
|
-
|
182
|
-
new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
|
183
|
-
new_weight = weight - (new_update + @params[:momentum] * update)
|
184
|
-
new_weight = 0.5 * (new_weight + new_weight.abs)
|
185
|
-
[new_weight, new_sqrsum, new_update]
|
166
|
+
def round_weight(weight)
|
167
|
+
0.5 * (weight + weight.abs)
|
186
168
|
end
|
187
169
|
|
188
170
|
def expand_feature(x)
|
@@ -3,25 +3,26 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
|
-
# This module consists of the classes that implement generalized linear models.
|
9
9
|
module LinearModel
|
10
10
|
# LogisticRegression is a class that implements Logistic Regression
|
11
|
-
# with stochastic gradient descent
|
11
|
+
# with mini-batch stochastic gradient descent optimization.
|
12
12
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
13
13
|
#
|
14
14
|
# @example
|
15
15
|
# estimator =
|
16
|
-
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter:
|
16
|
+
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
|
17
17
|
# estimator.fit(training_samples, traininig_labels)
|
18
18
|
# results = estimator.predict(testing_samples)
|
19
19
|
#
|
20
20
|
# *Reference*
|
21
|
-
#
|
21
|
+
# - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
22
22
|
class LogisticRegression
|
23
23
|
include Base::BaseEstimator
|
24
24
|
include Base::Classifier
|
25
|
+
include Validation
|
25
26
|
|
26
27
|
# Return the weight vector for Logistic Regression.
|
27
28
|
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
@@ -47,23 +48,23 @@ module SVMKit
|
|
47
48
|
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
48
49
|
# @param max_iter [Integer] The maximum number of iterations.
|
49
50
|
# @param batch_size [Integer] The size of the mini batches.
|
50
|
-
# @param
|
51
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
52
|
+
# Nadam is selected automatically on current version.
|
51
53
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
52
54
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
53
|
-
max_iter:
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
batch_size: batch_size)
|
55
|
+
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
56
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
57
|
+
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
58
|
+
check_params_boolean(fit_bias: fit_bias)
|
59
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
60
|
+
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
60
61
|
@params = {}
|
61
62
|
@params[:reg_param] = reg_param
|
62
63
|
@params[:fit_bias] = fit_bias
|
63
64
|
@params[:bias_scale] = bias_scale
|
64
65
|
@params[:max_iter] = max_iter
|
65
66
|
@params[:batch_size] = batch_size
|
66
|
-
@params[:
|
67
|
+
@params[:optimizer] = optimizer
|
67
68
|
@params[:random_seed] = random_seed
|
68
69
|
@params[:random_seed] ||= srand
|
69
70
|
@weight_vec = nil
|
@@ -78,9 +79,9 @@ module SVMKit
|
|
78
79
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
79
80
|
# @return [LogisticRegression] The learned classifier itself.
|
80
81
|
def fit(x, y)
|
81
|
-
|
82
|
-
|
83
|
-
|
82
|
+
check_sample_array(x)
|
83
|
+
check_label_array(y)
|
84
|
+
check_sample_label_size(x, y)
|
84
85
|
|
85
86
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
86
87
|
n_classes = @classes.size
|
@@ -109,8 +110,7 @@ module SVMKit
|
|
109
110
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
110
111
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
111
112
|
def decision_function(x)
|
112
|
-
|
113
|
-
|
113
|
+
check_sample_array(x)
|
114
114
|
x.dot(@weight_vec.transpose) + @bias_term
|
115
115
|
end
|
116
116
|
|
@@ -119,7 +119,7 @@ module SVMKit
|
|
119
119
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
120
120
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
121
121
|
def predict(x)
|
122
|
-
|
122
|
+
check_sample_array(x)
|
123
123
|
|
124
124
|
return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
|
125
125
|
|
@@ -133,7 +133,7 @@ module SVMKit
|
|
133
133
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
134
134
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
135
135
|
def predict_proba(x)
|
136
|
-
|
136
|
+
check_sample_array(x)
|
137
137
|
|
138
138
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
139
139
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
@@ -168,40 +168,41 @@ module SVMKit
|
|
168
168
|
|
169
169
|
private
|
170
170
|
|
171
|
-
def binary_fit(x,
|
171
|
+
def binary_fit(x, y)
|
172
172
|
# Expand feature vectors for bias term.
|
173
173
|
samples = @params[:fit_bias] ? expand_feature(x) : x
|
174
174
|
# Initialize some variables.
|
175
175
|
n_samples, n_features = samples.shape
|
176
176
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
177
177
|
weight_vec = Numo::DFloat.zeros(n_features)
|
178
|
+
optimizer = Optimizer::Nadam.new
|
178
179
|
# Start optimization.
|
179
|
-
@params[:max_iter].times do |
|
180
|
+
@params[:max_iter].times do |_t|
|
180
181
|
# random sampling
|
181
182
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
182
183
|
rand_ids.concat(subset_ids)
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
normalize_weight_vec(weight_vec) if @params[:normalize]
|
184
|
+
data = samples[subset_ids, true]
|
185
|
+
labels = y[subset_ids]
|
186
|
+
# calculate gradient for loss function.
|
187
|
+
loss_grad = loss_gradient(data, labels, weight_vec)
|
188
|
+
# update weight.
|
189
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
190
190
|
end
|
191
191
|
split_weight_vec_bias(weight_vec)
|
192
192
|
end
|
193
193
|
|
194
|
-
def
|
195
|
-
|
194
|
+
def loss_gradient(x, y, weight)
|
195
|
+
z = x.dot(weight)
|
196
|
+
grad = y / (Numo::NMath.exp(-y * z) + 1.0) - y
|
197
|
+
grad
|
196
198
|
end
|
197
199
|
|
198
|
-
def
|
199
|
-
|
200
|
+
def weight_gradient(loss_grad, x, weight)
|
201
|
+
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
200
202
|
end
|
201
203
|
|
202
|
-
def
|
203
|
-
|
204
|
-
weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
|
204
|
+
def expand_feature(x)
|
205
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
205
206
|
end
|
206
207
|
|
207
208
|
def split_weight_vec_bias(weight_vec)
|
@@ -3,22 +3,19 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module LinearModel
|
9
10
|
# Ridge is a class that implements Ridge Regression
|
10
|
-
# with stochastic gradient descent
|
11
|
+
# with mini-batch stochastic gradient descent optimization.
|
11
12
|
#
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
|
-
# SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter:
|
15
|
+
# SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
16
|
# estimator.fit(training_samples, traininig_values)
|
16
17
|
# results = estimator.predict(testing_samples)
|
17
18
|
#
|
18
|
-
# *Reference*
|
19
|
-
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
20
|
-
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
|
21
|
-
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
22
19
|
class Ridge
|
23
20
|
include Base::BaseEstimator
|
24
21
|
include Base::Regressor
|
@@ -40,30 +37,21 @@ module SVMKit
|
|
40
37
|
#
|
41
38
|
# @param reg_param [Float] The regularization parameter.
|
42
39
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
43
|
-
# @param learning_rate [Float] The learning rate for optimization.
|
44
|
-
# @param decay [Float] The discounting factor for RMS prop optimization.
|
45
|
-
# @param momentum [Float] The Nesterov momentum for optimization.
|
46
40
|
# @param max_iter [Integer] The maximum number of iterations.
|
47
41
|
# @param batch_size [Integer] The size of the mini batches.
|
48
42
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
|
-
def initialize(reg_param: 1.0, fit_bias: false,
|
50
|
-
|
51
|
-
check_params_float(reg_param: reg_param,
|
52
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum)
|
43
|
+
def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
44
|
+
check_params_float(reg_param: reg_param)
|
53
45
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
54
46
|
check_params_boolean(fit_bias: fit_bias)
|
55
47
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
56
|
-
check_params_positive(reg_param: reg_param,
|
57
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum,
|
58
|
-
max_iter: max_iter, batch_size: batch_size)
|
48
|
+
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
59
49
|
@params = {}
|
60
50
|
@params[:reg_param] = reg_param
|
61
51
|
@params[:fit_bias] = fit_bias
|
62
|
-
@params[:learning_rate] = learning_rate
|
63
|
-
@params[:decay] = decay
|
64
|
-
@params[:momentum] = momentum
|
65
52
|
@params[:max_iter] = max_iter
|
66
53
|
@params[:batch_size] = batch_size
|
54
|
+
@params[:optimizer] = optimizer
|
67
55
|
@params[:random_seed] = random_seed
|
68
56
|
@params[:random_seed] ||= srand
|
69
57
|
@weight_vec = nil
|
@@ -136,8 +124,7 @@ module SVMKit
|
|
136
124
|
n_samples, n_features = samples.shape
|
137
125
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
138
126
|
weight_vec = Numo::DFloat.zeros(n_features)
|
139
|
-
|
140
|
-
weight_update = Numo::DFloat.zeros(n_features)
|
127
|
+
optimizer = Optimizer::Nadam.new
|
141
128
|
# Start optimization.
|
142
129
|
@params[:max_iter].times do |_t|
|
143
130
|
# Random sampling.
|
@@ -146,12 +133,10 @@ module SVMKit
|
|
146
133
|
data = samples[subset_ids, true]
|
147
134
|
values = y[subset_ids]
|
148
135
|
# Calculate gradients for loss function.
|
149
|
-
loss_grad = loss_gradient(data, values, weight_vec
|
136
|
+
loss_grad = loss_gradient(data, values, weight_vec)
|
150
137
|
next if loss_grad.ne(0.0).count.zero?
|
151
138
|
# Update weight.
|
152
|
-
weight_vec,
|
153
|
-
update_weight(weight_vec, weight_sqrsum, weight_update,
|
154
|
-
weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
|
139
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
155
140
|
end
|
156
141
|
split_weight_vec_bias(weight_vec)
|
157
142
|
end
|
@@ -164,13 +149,6 @@ module SVMKit
|
|
164
149
|
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param] * weight
|
165
150
|
end
|
166
151
|
|
167
|
-
def update_weight(weight, sqrsum, update, gr)
|
168
|
-
new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
|
169
|
-
new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
|
170
|
-
new_weight = weight - (new_update + @params[:momentum] * update)
|
171
|
-
[new_weight, new_sqrsum, new_update]
|
172
|
-
end
|
173
|
-
|
174
152
|
def expand_feature(x)
|
175
153
|
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
176
154
|
end
|
@@ -3,26 +3,28 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
require 'svmkit/probabilistic_output'
|
7
8
|
|
8
9
|
module SVMKit
|
9
10
|
# This module consists of the classes that implement generalized linear models.
|
10
11
|
module LinearModel
|
11
12
|
# SVC is a class that implements Support Vector Classifier
|
12
|
-
# with stochastic gradient descent
|
13
|
+
# with mini-batch stochastic gradient descent optimization.
|
13
14
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
14
15
|
#
|
15
16
|
# @example
|
16
17
|
# estimator =
|
17
|
-
# SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter:
|
18
|
+
# SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
|
18
19
|
# estimator.fit(training_samples, traininig_labels)
|
19
20
|
# results = estimator.predict(testing_samples)
|
20
21
|
#
|
21
22
|
# *Reference*
|
22
|
-
#
|
23
|
+
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
23
24
|
class SVC
|
24
25
|
include Base::BaseEstimator
|
25
26
|
include Base::Classifier
|
27
|
+
include Validation
|
26
28
|
|
27
29
|
# Return the weight vector for SVC.
|
28
30
|
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
@@ -48,16 +50,16 @@ module SVMKit
|
|
48
50
|
# @param max_iter [Integer] The maximum number of iterations.
|
49
51
|
# @param batch_size [Integer] The size of the mini batches.
|
50
52
|
# @param probability [Boolean] The flag indicating whether to perform probability estimation.
|
51
|
-
# @param
|
53
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
54
|
+
# Nadam is selected automatically on current version.
|
52
55
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
53
56
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
54
|
-
max_iter:
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
batch_size: batch_size)
|
57
|
+
max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
|
58
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
59
|
+
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
60
|
+
check_params_boolean(fit_bias: fit_bias, probability: probability)
|
61
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
62
|
+
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
61
63
|
@params = {}
|
62
64
|
@params[:reg_param] = reg_param
|
63
65
|
@params[:fit_bias] = fit_bias
|
@@ -65,7 +67,7 @@ module SVMKit
|
|
65
67
|
@params[:max_iter] = max_iter
|
66
68
|
@params[:batch_size] = batch_size
|
67
69
|
@params[:probability] = probability
|
68
|
-
@params[:
|
70
|
+
@params[:optimizer] = optimizer
|
69
71
|
@params[:random_seed] = random_seed
|
70
72
|
@params[:random_seed] ||= srand
|
71
73
|
@weight_vec = nil
|
@@ -81,9 +83,9 @@ module SVMKit
|
|
81
83
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
82
84
|
# @return [SVC] The learned classifier itself.
|
83
85
|
def fit(x, y)
|
84
|
-
|
85
|
-
|
86
|
-
|
86
|
+
check_sample_array(x)
|
87
|
+
check_label_array(y)
|
88
|
+
check_sample_label_size(x, y)
|
87
89
|
|
88
90
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
89
91
|
n_classes = @classes.size
|
@@ -123,8 +125,7 @@ module SVMKit
|
|
123
125
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
124
126
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
125
127
|
def decision_function(x)
|
126
|
-
|
127
|
-
|
128
|
+
check_sample_array(x)
|
128
129
|
x.dot(@weight_vec.transpose) + @bias_term
|
129
130
|
end
|
130
131
|
|
@@ -133,7 +134,7 @@ module SVMKit
|
|
133
134
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
134
135
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
135
136
|
def predict(x)
|
136
|
-
|
137
|
+
check_sample_array(x)
|
137
138
|
|
138
139
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
139
140
|
|
@@ -147,7 +148,7 @@ module SVMKit
|
|
147
148
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
148
149
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
149
150
|
def predict_proba(x)
|
150
|
-
|
151
|
+
check_sample_array(x)
|
151
152
|
|
152
153
|
if @classes.size > 2
|
153
154
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
@@ -186,43 +187,43 @@ module SVMKit
|
|
186
187
|
|
187
188
|
private
|
188
189
|
|
189
|
-
def binary_fit(x,
|
190
|
+
def binary_fit(x, y)
|
190
191
|
# Expand feature vectors for bias term.
|
191
192
|
samples = @params[:fit_bias] ? expand_feature(x) : x
|
192
193
|
# Initialize some variables.
|
193
194
|
n_samples, n_features = samples.shape
|
194
195
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
195
196
|
weight_vec = Numo::DFloat.zeros(n_features)
|
197
|
+
optimizer = Optimizer::Nadam.new
|
196
198
|
# Start optimization.
|
197
|
-
@params[:max_iter].times do |
|
198
|
-
# random sampling
|
199
|
+
@params[:max_iter].times do |_t|
|
200
|
+
# random sampling.
|
199
201
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
200
202
|
rand_ids.concat(subset_ids)
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
next if
|
206
|
-
# update
|
207
|
-
|
208
|
-
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
|
209
|
-
# scale the weight vector.
|
210
|
-
normalize_weight_vec(weight_vec) if @params[:normalize]
|
203
|
+
data = samples[subset_ids, true]
|
204
|
+
labels = y[subset_ids]
|
205
|
+
# calculate gradient for loss function.
|
206
|
+
loss_grad = loss_gradient(data, labels, weight_vec)
|
207
|
+
next if loss_grad.ne(0.0).count.zero?
|
208
|
+
# update weight.
|
209
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
211
210
|
end
|
212
211
|
split_weight_vec_bias(weight_vec)
|
213
212
|
end
|
214
213
|
|
215
|
-
def
|
216
|
-
|
214
|
+
def loss_gradient(x, y, weight)
|
215
|
+
target_ids = (x.dot(weight) * y).lt(1.0).where
|
216
|
+
grad = Numo::DFloat.zeros(@params[:batch_size])
|
217
|
+
grad[target_ids] = -y[target_ids]
|
218
|
+
grad
|
217
219
|
end
|
218
220
|
|
219
|
-
def
|
220
|
-
|
221
|
+
def weight_gradient(loss_grad, x, weight)
|
222
|
+
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
221
223
|
end
|
222
224
|
|
223
|
-
def
|
224
|
-
|
225
|
-
weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
|
225
|
+
def expand_feature(x)
|
226
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
226
227
|
end
|
227
228
|
|
228
229
|
def split_weight_vec_bias(weight_vec)
|
@@ -3,15 +3,16 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module LinearModel
|
9
10
|
# SVR is a class that implements Support Vector Regressor
|
10
|
-
# with stochastic gradient descent
|
11
|
+
# with mini-batch stochastic gradient descent optimization.
|
11
12
|
#
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
|
-
# SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter:
|
15
|
+
# SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
16
|
# estimator.fit(training_samples, traininig_target_values)
|
16
17
|
# results = estimator.predict(testing_samples)
|
17
18
|
#
|
@@ -20,6 +21,7 @@ module SVMKit
|
|
20
21
|
class SVR
|
21
22
|
include Base::BaseEstimator
|
22
23
|
include Base::Regressor
|
24
|
+
include Validation
|
23
25
|
|
24
26
|
# Return the weight vector for SVR.
|
25
27
|
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
@@ -41,16 +43,17 @@ module SVMKit
|
|
41
43
|
# @param epsilon [Float] The margin of tolerance.
|
42
44
|
# @param max_iter [Integer] The maximum number of iterations.
|
43
45
|
# @param batch_size [Integer] The size of the mini batches.
|
44
|
-
# @param
|
46
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
47
|
+
# Nadam is selected automatically on current version.
|
45
48
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
49
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
|
47
|
-
max_iter:
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
50
|
+
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
51
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
|
52
|
+
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
53
|
+
check_params_boolean(fit_bias: fit_bias)
|
54
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
55
|
+
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
56
|
+
max_iter: max_iter, batch_size: batch_size)
|
54
57
|
@params = {}
|
55
58
|
@params[:reg_param] = reg_param
|
56
59
|
@params[:fit_bias] = fit_bias
|
@@ -58,7 +61,7 @@ module SVMKit
|
|
58
61
|
@params[:epsilon] = epsilon
|
59
62
|
@params[:max_iter] = max_iter
|
60
63
|
@params[:batch_size] = batch_size
|
61
|
-
@params[:
|
64
|
+
@params[:optimizer] = optimizer
|
62
65
|
@params[:random_seed] = random_seed
|
63
66
|
@params[:random_seed] ||= srand
|
64
67
|
@weight_vec = nil
|
@@ -72,9 +75,9 @@ module SVMKit
|
|
72
75
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
73
76
|
# @return [SVR] The learned regressor itself.
|
74
77
|
def fit(x, y)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
+
check_sample_array(x)
|
79
|
+
check_tvalue_array(y)
|
80
|
+
check_sample_tvalue_size(x, y)
|
78
81
|
|
79
82
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
80
83
|
_n_samples, n_features = x.shape
|
@@ -99,7 +102,7 @@ module SVMKit
|
|
99
102
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
100
103
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
101
104
|
def predict(x)
|
102
|
-
|
105
|
+
check_sample_array(x)
|
103
106
|
x.dot(@weight_vec.transpose) + @bias_term
|
104
107
|
end
|
105
108
|
|
@@ -131,35 +134,35 @@ module SVMKit
|
|
131
134
|
n_samples, n_features = samples.shape
|
132
135
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
133
136
|
weight_vec = Numo::DFloat.zeros(n_features)
|
137
|
+
optimizer = Optimizer::Nadam.new
|
134
138
|
# Start optimization.
|
135
|
-
@params[:max_iter].times do |
|
139
|
+
@params[:max_iter].times do |_t|
|
136
140
|
# random sampling
|
137
141
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
138
142
|
rand_ids.concat(subset_ids)
|
143
|
+
data = samples[subset_ids, true]
|
144
|
+
values = y[subset_ids]
|
139
145
|
# update the weight vector.
|
140
|
-
|
141
|
-
|
142
|
-
coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
|
143
|
-
coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
|
144
|
-
mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
|
145
|
-
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
|
146
|
-
# scale the weight vector.
|
147
|
-
normalize_weight_vec(weight_vec) if @params[:normalize]
|
146
|
+
loss_grad = loss_gradient(data, values, weight_vec)
|
147
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
148
148
|
end
|
149
149
|
split_weight_vec_bias(weight_vec)
|
150
150
|
end
|
151
151
|
|
152
|
-
def
|
153
|
-
|
152
|
+
def loss_gradient(x, y, weight)
|
153
|
+
z = x.dot(weight)
|
154
|
+
grad = Numo::DFloat.zeros(@params[:batch_size])
|
155
|
+
grad[(z - y).gt(@params[:epsilon]).where] = 1
|
156
|
+
grad[(y - z).gt(@params[:epsilon]).where] = -1
|
157
|
+
grad
|
154
158
|
end
|
155
159
|
|
156
|
-
def
|
157
|
-
|
160
|
+
def weight_gradient(loss_grad, x, weight)
|
161
|
+
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
158
162
|
end
|
159
163
|
|
160
|
-
def
|
161
|
-
|
162
|
-
weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
|
164
|
+
def expand_feature(x)
|
165
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
163
166
|
end
|
164
167
|
|
165
168
|
def split_weight_vec_bias(weight_vec)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
|
5
|
+
module SVMKit
|
6
|
+
# This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
|
7
|
+
module Optimizer
|
8
|
+
# Nadam is a class that implements Nadam optimizer.
|
9
|
+
# This class is used for internal processes.
|
10
|
+
#
|
11
|
+
# *Reference*
|
12
|
+
# - T. Dozat, "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
|
13
|
+
class Nadam
|
14
|
+
include Validation
|
15
|
+
|
16
|
+
# Create a new optimizer with Nadam
|
17
|
+
#
|
18
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
19
|
+
# @param momentum [Float] The initial value of momentum.
|
20
|
+
# @param decay1 [Float] The smoothing parameter for the first moment.
|
21
|
+
# @param decay2 [Float] The smoothing parameter for the second moment.
|
22
|
+
# @param schedule_decay [Float] The smooting parameter.
|
23
|
+
def initialize(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
|
24
|
+
check_params_float(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
|
25
|
+
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
|
26
|
+
@params = {}
|
27
|
+
@params[:learning_rate] = learning_rate
|
28
|
+
@params[:momentum] = momentum
|
29
|
+
@params[:decay1] = decay1
|
30
|
+
@params[:decay2] = decay2
|
31
|
+
@fst_moment = nil
|
32
|
+
@sec_moment = nil
|
33
|
+
@decay1_prod = 1.0
|
34
|
+
@iter = 0
|
35
|
+
end
|
36
|
+
|
37
|
+
# Calculate the updated weight with Nadam adaptive learning rate.
|
38
|
+
#
|
39
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
40
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
41
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
42
|
+
def call(weight, gradient)
|
43
|
+
@fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
44
|
+
@sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
45
|
+
|
46
|
+
@iter += 1
|
47
|
+
|
48
|
+
decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
|
49
|
+
decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
|
50
|
+
decay1_prod_curr = @decay1_prod * decay1_curr
|
51
|
+
decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
|
52
|
+
@decay1_prod = decay1_prod_curr
|
53
|
+
|
54
|
+
@fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
|
55
|
+
@sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
|
56
|
+
nm_gradient = gradient / (1.0 - decay1_prod_curr)
|
57
|
+
nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
|
58
|
+
nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
|
59
|
+
|
60
|
+
weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
# This module consists of the classes that implement polynomial models.
|
@@ -14,7 +15,7 @@ module SVMKit
|
|
14
15
|
# @example
|
15
16
|
# estimator =
|
16
17
|
# SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
|
17
|
-
# n_factors: 10, loss: 'hinge',
|
18
|
+
# n_factors: 10, loss: 'hinge', reg_param_linear: 0.001, reg_param_factor: 0.001,
|
18
19
|
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
19
20
|
# estimator.fit(training_samples, traininig_labels)
|
20
21
|
# results = estimator.predict(testing_samples)
|
@@ -25,6 +26,7 @@ module SVMKit
|
|
25
26
|
class FactorizationMachineClassifier
|
26
27
|
include Base::BaseEstimator
|
27
28
|
include Base::Classifier
|
29
|
+
include Validation
|
28
30
|
|
29
31
|
# Return the factor matrix for Factorization Machine.
|
30
32
|
# @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
|
@@ -50,32 +52,30 @@ module SVMKit
|
|
50
52
|
#
|
51
53
|
# @param n_factors [Integer] The maximum number of iterations.
|
52
54
|
# @param loss [String] The loss function ('hinge' or 'logistic').
|
53
|
-
# @param
|
54
|
-
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
55
|
+
# @param reg_param_linear [Float] The regularization parameter for linear model.
|
55
56
|
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
56
|
-
# @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
|
57
57
|
# @param max_iter [Integer] The maximum number of iterations.
|
58
58
|
# @param batch_size [Integer] The size of the mini batches.
|
59
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
60
|
+
# Nadam is selected automatically on current version.
|
59
61
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
|
-
def initialize(n_factors: 2, loss: 'hinge',
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
max_iter: max_iter, batch_size: batch_size)
|
62
|
+
def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
|
63
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
64
|
+
check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
|
65
|
+
check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
66
|
+
check_params_string(loss: loss)
|
67
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
68
|
+
check_params_positive(n_factors: n_factors,
|
69
|
+
reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
70
|
+
max_iter: max_iter, batch_size: batch_size)
|
70
71
|
@params = {}
|
71
72
|
@params[:n_factors] = n_factors
|
72
73
|
@params[:loss] = loss
|
73
|
-
@params[:
|
74
|
-
@params[:reg_param_weight] = reg_param_weight
|
74
|
+
@params[:reg_param_linear] = reg_param_linear
|
75
75
|
@params[:reg_param_factor] = reg_param_factor
|
76
|
-
@params[:init_std] = init_std
|
77
76
|
@params[:max_iter] = max_iter
|
78
77
|
@params[:batch_size] = batch_size
|
78
|
+
@params[:optimizer] = optimizer
|
79
79
|
@params[:random_seed] = random_seed
|
80
80
|
@params[:random_seed] ||= srand
|
81
81
|
@factor_mat = nil
|
@@ -91,9 +91,9 @@ module SVMKit
|
|
91
91
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
92
92
|
# @return [FactorizationMachineClassifier] The learned classifier itself.
|
93
93
|
def fit(x, y)
|
94
|
-
|
95
|
-
|
96
|
-
|
94
|
+
check_sample_array(x)
|
95
|
+
check_label_array(y)
|
96
|
+
check_sample_label_size(x, y)
|
97
97
|
|
98
98
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
99
99
|
n_classes = @classes.size
|
@@ -124,7 +124,7 @@ module SVMKit
|
|
124
124
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
125
125
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
126
126
|
def decision_function(x)
|
127
|
-
|
127
|
+
check_sample_array(x)
|
128
128
|
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
129
129
|
factor_term = if @classes.size <= 2
|
130
130
|
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
@@ -139,7 +139,7 @@ module SVMKit
|
|
139
139
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
140
140
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
141
141
|
def predict(x)
|
142
|
-
|
142
|
+
check_sample_array(x)
|
143
143
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
144
144
|
|
145
145
|
n_samples, = x.shape
|
@@ -152,7 +152,7 @@ module SVMKit
|
|
152
152
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
153
153
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
154
154
|
def predict_proba(x)
|
155
|
-
|
155
|
+
check_sample_array(x)
|
156
156
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
157
157
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
158
158
|
|
@@ -188,84 +188,76 @@ module SVMKit
|
|
188
188
|
|
189
189
|
private
|
190
190
|
|
191
|
-
def binary_fit(x,
|
191
|
+
def binary_fit(x, y)
|
192
192
|
# Initialize some variables.
|
193
193
|
n_samples, n_features = x.shape
|
194
194
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
195
|
-
|
196
|
-
|
197
|
-
|
195
|
+
weight_vec = Numo::DFloat.zeros(n_features + 1)
|
196
|
+
factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
|
197
|
+
weight_optimizer = Optimizer::Nadam.new
|
198
|
+
factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
|
198
199
|
# Start optimization.
|
199
|
-
@params[:max_iter].times do |
|
200
|
+
@params[:max_iter].times do |_t|
|
200
201
|
# Random sampling.
|
201
202
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
202
203
|
rand_ids.concat(subset_ids)
|
203
204
|
data = x[subset_ids, true]
|
204
|
-
|
205
|
+
ex_data = expand_feature(data)
|
206
|
+
label = y[subset_ids]
|
205
207
|
# Calculate gradients for loss function.
|
206
|
-
loss_grad = loss_gradient(data, label, factor_mat, weight_vec
|
208
|
+
loss_grad = loss_gradient(data, ex_data, label, factor_mat, weight_vec)
|
207
209
|
next if loss_grad.ne(0.0).count.zero?
|
208
210
|
# Update each parameter.
|
209
|
-
|
210
|
-
weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
|
211
|
+
weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
|
211
212
|
@params[:n_factors].times do |n|
|
212
|
-
factor_mat[n, true]
|
213
|
-
|
213
|
+
factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
|
214
|
+
factor_gradient(loss_grad, data, factor_mat[n, true]))
|
214
215
|
end
|
215
216
|
end
|
216
|
-
[factor_mat, weight_vec
|
217
|
+
[factor_mat, *split_weight_vec_bias(weight_vec)]
|
217
218
|
end
|
218
219
|
|
219
|
-
def bin_decision_function(x, factor, weight
|
220
|
-
|
220
|
+
def bin_decision_function(x, ex_x, factor, weight)
|
221
|
+
ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
221
222
|
end
|
222
223
|
|
223
|
-
def hinge_loss_gradient(x, y, factor, weight
|
224
|
-
evaluated = y * bin_decision_function(x, factor, weight
|
224
|
+
def hinge_loss_gradient(x, ex_x, y, factor, weight)
|
225
|
+
evaluated = y * bin_decision_function(x, ex_x, factor, weight)
|
225
226
|
gradient = Numo::DFloat.zeros(evaluated.size)
|
226
227
|
gradient[evaluated < 1.0] = -y[evaluated < 1.0]
|
227
228
|
gradient
|
228
229
|
end
|
229
230
|
|
230
|
-
def logistic_loss_gradient(x, y, factor, weight
|
231
|
-
evaluated = y * bin_decision_function(x, factor, weight
|
231
|
+
def logistic_loss_gradient(x, ex_x, y, factor, weight)
|
232
|
+
evaluated = y * bin_decision_function(x, ex_x, factor, weight)
|
232
233
|
sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
|
233
234
|
(sigmoid_func - 1.0) * y
|
234
235
|
end
|
235
236
|
|
236
|
-
def loss_gradient(x, y, factor, weight
|
237
|
+
def loss_gradient(x, ex_x, y, factor, weight)
|
237
238
|
if @params[:loss] == 'hinge'
|
238
|
-
hinge_loss_gradient(x, y, factor, weight
|
239
|
+
hinge_loss_gradient(x, ex_x, y, factor, weight)
|
239
240
|
else
|
240
|
-
logistic_loss_gradient(x, y, factor, weight
|
241
|
+
logistic_loss_gradient(x, ex_x, y, factor, weight)
|
241
242
|
end
|
242
243
|
end
|
243
244
|
|
244
|
-
def learning_rate(reg_param, iter)
|
245
|
-
1.0 / (reg_param * (iter + 1))
|
246
|
-
end
|
247
|
-
|
248
|
-
def bias_gradient(loss_grad, bias)
|
249
|
-
loss_grad.mean + @params[:reg_param_bias] * bias
|
250
|
-
end
|
251
|
-
|
252
245
|
def weight_gradient(loss_grad, data, weight)
|
253
|
-
(loss_grad.expand_dims(1) * data).mean(0) + @params[:
|
246
|
+
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
|
254
247
|
end
|
255
248
|
|
256
249
|
def factor_gradient(loss_grad, data, factor)
|
257
|
-
|
258
|
-
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + reg_term
|
250
|
+
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
|
259
251
|
end
|
260
252
|
|
261
|
-
def
|
262
|
-
Numo::
|
253
|
+
def expand_feature(x)
|
254
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
263
255
|
end
|
264
256
|
|
265
|
-
def
|
266
|
-
|
267
|
-
|
268
|
-
|
257
|
+
def split_weight_vec_bias(weight_vec)
|
258
|
+
weights = weight_vec[0...-1]
|
259
|
+
bias = weight_vec[-1]
|
260
|
+
[weights, bias]
|
269
261
|
end
|
270
262
|
end
|
271
263
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module PolynomialModel
|
@@ -12,7 +13,7 @@ module SVMKit
|
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
15
|
# SVMKit::PolynomialModel::FactorizationMachineRegressor.new(
|
15
|
-
# n_factors: 10,
|
16
|
+
# n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
|
16
17
|
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
17
18
|
# estimator.fit(training_samples, traininig_values)
|
18
19
|
# results = estimator.predict(testing_samples)
|
@@ -20,8 +21,6 @@ module SVMKit
|
|
20
21
|
# *Reference*
|
21
22
|
# - S. Rendle, "Factorization Machines with libFM," ACM Transactions on Intelligent Systems and Technology, vol. 3 (3), pp. 57:1--57:22, 2012.
|
22
23
|
# - S. Rendle, "Factorization Machines," Proc. the 10th IEEE International Conference on Data Mining (ICDM'10), pp. 995--1000, 2010.
|
23
|
-
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. the 30th International Conference on Machine Learning (ICML' 13), pp. 1139--1147, 2013.
|
24
|
-
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
25
24
|
class FactorizationMachineRegressor
|
26
25
|
include Base::BaseEstimator
|
27
26
|
include Base::Regressor
|
@@ -46,40 +45,27 @@ module SVMKit
|
|
46
45
|
# Create a new regressor with Factorization Machine.
|
47
46
|
#
|
48
47
|
# @param n_factors [Integer] The maximum number of iterations.
|
49
|
-
# @param
|
50
|
-
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
48
|
+
# @param reg_param_linear [Float] The regularization parameter for linear model.
|
51
49
|
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
52
|
-
# @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
|
53
|
-
# @param learning_rate [Float] The learning rate for optimization.
|
54
|
-
# @param decay [Float] The discounting factor for RMS prop optimization.
|
55
|
-
# @param momentum [Float] The Nesterov momentum for optimization.
|
56
50
|
# @param max_iter [Integer] The maximum number of iterations.
|
57
51
|
# @param batch_size [Integer] The size of the mini batches.
|
52
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
53
|
+
# Nadam is selected automatically on current version.
|
58
54
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
|
-
def initialize(n_factors: 2,
|
60
|
-
|
61
|
-
|
62
|
-
max_iter: 1000, batch_size: 10, random_seed: nil)
|
63
|
-
check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
|
64
|
-
reg_param_factor: reg_param_factor, init_std: init_std,
|
65
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum)
|
55
|
+
def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
|
56
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
57
|
+
check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
|
66
58
|
check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
67
59
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
68
|
-
check_params_positive(n_factors: n_factors,
|
69
|
-
reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
|
70
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum,
|
60
|
+
check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
71
61
|
max_iter: max_iter, batch_size: batch_size)
|
72
62
|
@params = {}
|
73
63
|
@params[:n_factors] = n_factors
|
74
|
-
@params[:
|
75
|
-
@params[:reg_param_weight] = reg_param_weight
|
64
|
+
@params[:reg_param_linear] = reg_param_linear
|
76
65
|
@params[:reg_param_factor] = reg_param_factor
|
77
|
-
@params[:init_std] = init_std
|
78
|
-
@params[:learning_rate] = learning_rate
|
79
|
-
@params[:decay] = decay
|
80
|
-
@params[:momentum] = momentum
|
81
66
|
@params[:max_iter] = max_iter
|
82
67
|
@params[:batch_size] = batch_size
|
68
|
+
@params[:optimizer] = optimizer
|
83
69
|
@params[:random_seed] = random_seed
|
84
70
|
@params[:random_seed] ||= srand
|
85
71
|
@factor_mat = nil
|
@@ -160,74 +146,52 @@ module SVMKit
|
|
160
146
|
# Initialize some variables.
|
161
147
|
n_samples, n_features = x.shape
|
162
148
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
weight_sqrsum = Numo::DFloat.zeros(n_features)
|
168
|
-
weight_update = Numo::DFloat.zeros(n_features)
|
169
|
-
bias_term = 0.0
|
170
|
-
bias_sqrsum = 0.0
|
171
|
-
bias_update = 0.0
|
149
|
+
weight_vec = Numo::DFloat.zeros(n_features + 1)
|
150
|
+
factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
|
151
|
+
weight_optimizer = Optimizer::Nadam.new
|
152
|
+
factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
|
172
153
|
# Start optimization.
|
173
154
|
@params[:max_iter].times do |_t|
|
174
155
|
# Random sampling.
|
175
156
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
176
157
|
rand_ids.concat(subset_ids)
|
177
158
|
data = x[subset_ids, true]
|
159
|
+
ex_data = expand_feature(data)
|
178
160
|
values = y[subset_ids]
|
179
161
|
# Calculate gradients for loss function.
|
180
|
-
loss_grad = loss_gradient(data, values,
|
181
|
-
factor_mat - @params[:momentum] * factor_update,
|
182
|
-
weight_vec - @params[:momentum] * weight_update,
|
183
|
-
bias_term - @params[:momentum] * bias_update)
|
162
|
+
loss_grad = loss_gradient(data, ex_data, values, factor_mat, weight_vec)
|
184
163
|
next if loss_grad.ne(0.0).count.zero?
|
185
164
|
# Update each parameter.
|
186
|
-
|
187
|
-
update_param(bias_term, bias_sqrsum, bias_update,
|
188
|
-
bias_gradient(loss_grad, bias_term - @params[:momentum] * bias_update))
|
189
|
-
weight_vec, weight_sqrsum, weight_update =
|
190
|
-
update_param(weight_vec, weight_sqrsum, weight_update,
|
191
|
-
weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
|
165
|
+
weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
|
192
166
|
@params[:n_factors].times do |n|
|
193
|
-
|
194
|
-
|
195
|
-
factor_gradient(loss_grad, data, factor_mat[n, true] - @params[:momentum] * factor_update[n, true]))
|
167
|
+
factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
|
168
|
+
factor_gradient(loss_grad, data, factor_mat[n, true]))
|
196
169
|
end
|
197
170
|
end
|
198
|
-
[factor_mat, weight_vec
|
171
|
+
[factor_mat, *split_weight_vec_bias(weight_vec)]
|
199
172
|
end
|
200
173
|
|
201
|
-
def loss_gradient(x, y, factor, weight
|
202
|
-
z =
|
174
|
+
def loss_gradient(x, ex_x, y, factor, weight)
|
175
|
+
z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
203
176
|
2.0 * (z - y)
|
204
177
|
end
|
205
178
|
|
206
|
-
def bias_gradient(loss_grad, bias)
|
207
|
-
loss_grad.mean + @params[:reg_param_bias] * bias
|
208
|
-
end
|
209
|
-
|
210
179
|
def weight_gradient(loss_grad, data, weight)
|
211
|
-
(loss_grad.expand_dims(1) * data).mean(0) + @params[:
|
180
|
+
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
|
212
181
|
end
|
213
182
|
|
214
183
|
def factor_gradient(loss_grad, data, factor)
|
215
184
|
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
|
216
185
|
end
|
217
186
|
|
218
|
-
def
|
219
|
-
|
220
|
-
new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
|
221
|
-
new_param = param - (new_update + @params[:momentum] * update)
|
222
|
-
[new_param, new_sqrsum, new_update]
|
223
|
-
end
|
224
|
-
|
225
|
-
def rand_uniform(shape)
|
226
|
-
Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
|
187
|
+
def expand_feature(x)
|
188
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
227
189
|
end
|
228
190
|
|
229
|
-
def
|
230
|
-
|
191
|
+
def split_weight_vec_bias(weight_vec)
|
192
|
+
weights = weight_vec[0...-1]
|
193
|
+
bias = weight_vec[-1]
|
194
|
+
[weights, bias]
|
231
195
|
end
|
232
196
|
end
|
233
197
|
end
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- lib/svmkit/naive_bayes/naive_bayes.rb
|
140
140
|
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
141
141
|
- lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
|
142
|
+
- lib/svmkit/optimizer/nadam.rb
|
142
143
|
- lib/svmkit/pairwise_metric.rb
|
143
144
|
- lib/svmkit/polynomial_model/factorization_machine_classifier.rb
|
144
145
|
- lib/svmkit/polynomial_model/factorization_machine_regressor.rb
|