svmkit 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +22 -0
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/linear_model/lasso.rb +14 -32
- data/lib/svmkit/linear_model/logistic_regression.rb +37 -36
- data/lib/svmkit/linear_model/ridge.rb +10 -32
- data/lib/svmkit/linear_model/svc.rb +40 -39
- data/lib/svmkit/linear_model/svr.rb +34 -31
- data/lib/svmkit/optimizer/nadam.rb +64 -0
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +53 -61
- data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +30 -66
- data/lib/svmkit/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cef050a2ac6b55583414cb3ce9c3678dd6d2d1c8b2be04a249222683e10465e1
|
4
|
+
data.tar.gz: 7c67ab0e90246f1d9b7e5d0bfb19ed76061d0edf17a05014f521b8ef41e41aed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15341450f3bf3ca49901ae55b507d647468261682c7fdb0b058c21a470c2eec261718b6721ca0e2ad7738cfdabd184128a588d68ad6d079e53c9b1e916efa2b1
|
7
|
+
data.tar.gz: fd562db538be12896c005840e065f867e342691e899b33f0524a4db26da33439bfc174141e022d4de3d805657d09e854a4593b9b05b2d9eb99f6cd41da064a1d
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
# 0.4.0
|
2
|
+
## Breaking changes
|
3
|
+
|
4
|
+
SVMKit introduces optimizer algorithm that calculates learning rates adaptively
|
5
|
+
on each iteration of stochastic gradient descent (SGD).
|
6
|
+
While Pegasos SGD runs fast, it sometimes fails to optimize complicated models
|
7
|
+
like Factorization Machine.
|
8
|
+
To solve this problem, in version 0.3.3, SVMKit introduced optimization with RMSProp on
|
9
|
+
FactorizationMachineRegressor, Ridge and Lasso.
|
10
|
+
This attempt realized stable optimization of those estimators.
|
11
|
+
Following the success of the attempt, author decided to use modern optimizer algorithms
|
12
|
+
with all SGD optimizations in SVMKit.
|
13
|
+
Through some preliminary experiments, author implemented Nadam as the default optimizer.
|
14
|
+
SVMKit plans to add other optimizer algorithms sequentially, so that users can select them.
|
15
|
+
|
16
|
+
- Fix to use Nadam for optimization on SVC, SVR, LogisticRegression, Ridge, Lasso, and Factorization Machine estimators.
|
17
|
+
- Combine reg_param_weight and reg_param_bias parameters on Factorization Machine estimators into the unified parameter named reg_param_linear.
|
18
|
+
- Remove init_std paramter on Factorization Machine estimators.
|
19
|
+
- Remove learning_rate, decay, and momentum parameters on Ridge, Lasso, and FactorizationMachineRegressor.
|
20
|
+
- Remove normalize parameter on SVC, SVR, and LogisticRegression.
|
21
|
+
|
22
|
+
|
1
23
|
# 0.3.3
|
2
24
|
- Add class for Ridge regressor.
|
3
25
|
- Add class for Lasso regressor.
|
data/lib/svmkit.rb
CHANGED
@@ -13,6 +13,7 @@ require 'svmkit/base/regressor'
|
|
13
13
|
require 'svmkit/base/transformer'
|
14
14
|
require 'svmkit/base/splitter'
|
15
15
|
require 'svmkit/base/evaluator'
|
16
|
+
require 'svmkit/optimizer/nadam'
|
16
17
|
require 'svmkit/kernel_approximation/rbf'
|
17
18
|
require 'svmkit/linear_model/svc'
|
18
19
|
require 'svmkit/linear_model/svr'
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module LinearModel
|
@@ -11,15 +12,13 @@ module SVMKit
|
|
11
12
|
#
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
|
-
# SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter:
|
15
|
+
# SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
16
|
# estimator.fit(training_samples, traininig_values)
|
16
17
|
# results = estimator.predict(testing_samples)
|
17
18
|
#
|
18
19
|
# *Reference*
|
19
20
|
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
20
21
|
# - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
21
|
-
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
|
22
|
-
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
23
22
|
class Lasso
|
24
23
|
include Base::BaseEstimator
|
25
24
|
include Base::Regressor
|
@@ -41,30 +40,23 @@ module SVMKit
|
|
41
40
|
#
|
42
41
|
# @param reg_param [Float] The regularization parameter.
|
43
42
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
44
|
-
# @param learning_rate [Float] The learning rate for optimization.
|
45
|
-
# @param decay [Float] The discounting factor for RMS prop optimization.
|
46
|
-
# @param momentum [Float] The momentum for optimization.
|
47
43
|
# @param max_iter [Integer] The maximum number of iterations.
|
48
44
|
# @param batch_size [Integer] The size of the mini batches.
|
45
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
46
|
+
# Nadam is selected automatically on current version.
|
49
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
50
|
-
def initialize(reg_param: 1.0, fit_bias: false,
|
51
|
-
|
52
|
-
check_params_float(reg_param: reg_param,
|
53
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum)
|
48
|
+
def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
49
|
+
check_params_float(reg_param: reg_param)
|
54
50
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
55
51
|
check_params_boolean(fit_bias: fit_bias)
|
56
52
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
57
|
-
check_params_positive(reg_param: reg_param,
|
58
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum,
|
59
|
-
max_iter: max_iter, batch_size: batch_size)
|
53
|
+
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
60
54
|
@params = {}
|
61
55
|
@params[:reg_param] = reg_param
|
62
56
|
@params[:fit_bias] = fit_bias
|
63
|
-
@params[:learning_rate] = learning_rate
|
64
|
-
@params[:decay] = decay
|
65
|
-
@params[:momentum] = momentum
|
66
57
|
@params[:max_iter] = max_iter
|
67
58
|
@params[:batch_size] = batch_size
|
59
|
+
@params[:optimizer] = optimizer
|
68
60
|
@params[:random_seed] = random_seed
|
69
61
|
@params[:random_seed] ||= srand
|
70
62
|
@weight_vec = nil
|
@@ -138,11 +130,9 @@ module SVMKit
|
|
138
130
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
139
131
|
weight_vec = Numo::DFloat.zeros(n_features)
|
140
132
|
left_weight_vec = Numo::DFloat.zeros(n_features)
|
141
|
-
left_weight_sqrsum = Numo::DFloat.zeros(n_features)
|
142
|
-
left_weight_update = Numo::DFloat.zeros(n_features)
|
143
133
|
right_weight_vec = Numo::DFloat.zeros(n_features)
|
144
|
-
|
145
|
-
|
134
|
+
left_optimizer = Optimizer::Nadam.new
|
135
|
+
right_optimizer = Optimizer::Nadam.new
|
146
136
|
# Start optimization.
|
147
137
|
@params[:max_iter].times do |_t|
|
148
138
|
# Random sampling.
|
@@ -154,12 +144,8 @@ module SVMKit
|
|
154
144
|
loss_grad = loss_gradient(data, values, weight_vec)
|
155
145
|
next if loss_grad.ne(0.0).count.zero?
|
156
146
|
# Update weight.
|
157
|
-
left_weight_vec,
|
158
|
-
|
159
|
-
left_weight_gradient(loss_grad, data))
|
160
|
-
right_weight_vec, right_weight_sqrsum, right_weight_update =
|
161
|
-
update_weight(right_weight_vec, right_weight_sqrsum, right_weight_update,
|
162
|
-
right_weight_gradient(loss_grad, data))
|
147
|
+
left_weight_vec = round_weight(left_optimizer.call(left_weight_vec, left_weight_gradient(loss_grad, data)))
|
148
|
+
right_weight_vec = round_weight(right_optimizer.call(right_weight_vec, right_weight_gradient(loss_grad, data)))
|
163
149
|
weight_vec = left_weight_vec - right_weight_vec
|
164
150
|
end
|
165
151
|
split_weight_vec_bias(weight_vec)
|
@@ -177,12 +163,8 @@ module SVMKit
|
|
177
163
|
((@params[:reg_param] - loss_grad).expand_dims(1) * data).mean(0)
|
178
164
|
end
|
179
165
|
|
180
|
-
def
|
181
|
-
|
182
|
-
new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
|
183
|
-
new_weight = weight - (new_update + @params[:momentum] * update)
|
184
|
-
new_weight = 0.5 * (new_weight + new_weight.abs)
|
185
|
-
[new_weight, new_sqrsum, new_update]
|
166
|
+
def round_weight(weight)
|
167
|
+
0.5 * (weight + weight.abs)
|
186
168
|
end
|
187
169
|
|
188
170
|
def expand_feature(x)
|
@@ -3,25 +3,26 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
|
-
# This module consists of the classes that implement generalized linear models.
|
9
9
|
module LinearModel
|
10
10
|
# LogisticRegression is a class that implements Logistic Regression
|
11
|
-
# with stochastic gradient descent
|
11
|
+
# with mini-batch stochastic gradient descent optimization.
|
12
12
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
13
13
|
#
|
14
14
|
# @example
|
15
15
|
# estimator =
|
16
|
-
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter:
|
16
|
+
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
|
17
17
|
# estimator.fit(training_samples, traininig_labels)
|
18
18
|
# results = estimator.predict(testing_samples)
|
19
19
|
#
|
20
20
|
# *Reference*
|
21
|
-
#
|
21
|
+
# - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
22
22
|
class LogisticRegression
|
23
23
|
include Base::BaseEstimator
|
24
24
|
include Base::Classifier
|
25
|
+
include Validation
|
25
26
|
|
26
27
|
# Return the weight vector for Logistic Regression.
|
27
28
|
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
@@ -47,23 +48,23 @@ module SVMKit
|
|
47
48
|
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
48
49
|
# @param max_iter [Integer] The maximum number of iterations.
|
49
50
|
# @param batch_size [Integer] The size of the mini batches.
|
50
|
-
# @param
|
51
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
52
|
+
# Nadam is selected automatically on current version.
|
51
53
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
52
54
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
53
|
-
max_iter:
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
batch_size: batch_size)
|
55
|
+
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
56
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
57
|
+
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
58
|
+
check_params_boolean(fit_bias: fit_bias)
|
59
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
60
|
+
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
60
61
|
@params = {}
|
61
62
|
@params[:reg_param] = reg_param
|
62
63
|
@params[:fit_bias] = fit_bias
|
63
64
|
@params[:bias_scale] = bias_scale
|
64
65
|
@params[:max_iter] = max_iter
|
65
66
|
@params[:batch_size] = batch_size
|
66
|
-
@params[:
|
67
|
+
@params[:optimizer] = optimizer
|
67
68
|
@params[:random_seed] = random_seed
|
68
69
|
@params[:random_seed] ||= srand
|
69
70
|
@weight_vec = nil
|
@@ -78,9 +79,9 @@ module SVMKit
|
|
78
79
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
79
80
|
# @return [LogisticRegression] The learned classifier itself.
|
80
81
|
def fit(x, y)
|
81
|
-
|
82
|
-
|
83
|
-
|
82
|
+
check_sample_array(x)
|
83
|
+
check_label_array(y)
|
84
|
+
check_sample_label_size(x, y)
|
84
85
|
|
85
86
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
86
87
|
n_classes = @classes.size
|
@@ -109,8 +110,7 @@ module SVMKit
|
|
109
110
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
110
111
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
111
112
|
def decision_function(x)
|
112
|
-
|
113
|
-
|
113
|
+
check_sample_array(x)
|
114
114
|
x.dot(@weight_vec.transpose) + @bias_term
|
115
115
|
end
|
116
116
|
|
@@ -119,7 +119,7 @@ module SVMKit
|
|
119
119
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
120
120
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
121
121
|
def predict(x)
|
122
|
-
|
122
|
+
check_sample_array(x)
|
123
123
|
|
124
124
|
return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
|
125
125
|
|
@@ -133,7 +133,7 @@ module SVMKit
|
|
133
133
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
134
134
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
135
135
|
def predict_proba(x)
|
136
|
-
|
136
|
+
check_sample_array(x)
|
137
137
|
|
138
138
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
139
139
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
@@ -168,40 +168,41 @@ module SVMKit
|
|
168
168
|
|
169
169
|
private
|
170
170
|
|
171
|
-
def binary_fit(x,
|
171
|
+
def binary_fit(x, y)
|
172
172
|
# Expand feature vectors for bias term.
|
173
173
|
samples = @params[:fit_bias] ? expand_feature(x) : x
|
174
174
|
# Initialize some variables.
|
175
175
|
n_samples, n_features = samples.shape
|
176
176
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
177
177
|
weight_vec = Numo::DFloat.zeros(n_features)
|
178
|
+
optimizer = Optimizer::Nadam.new
|
178
179
|
# Start optimization.
|
179
|
-
@params[:max_iter].times do |
|
180
|
+
@params[:max_iter].times do |_t|
|
180
181
|
# random sampling
|
181
182
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
182
183
|
rand_ids.concat(subset_ids)
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
normalize_weight_vec(weight_vec) if @params[:normalize]
|
184
|
+
data = samples[subset_ids, true]
|
185
|
+
labels = y[subset_ids]
|
186
|
+
# calculate gradient for loss function.
|
187
|
+
loss_grad = loss_gradient(data, labels, weight_vec)
|
188
|
+
# update weight.
|
189
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
190
190
|
end
|
191
191
|
split_weight_vec_bias(weight_vec)
|
192
192
|
end
|
193
193
|
|
194
|
-
def
|
195
|
-
|
194
|
+
def loss_gradient(x, y, weight)
|
195
|
+
z = x.dot(weight)
|
196
|
+
grad = y / (Numo::NMath.exp(-y * z) + 1.0) - y
|
197
|
+
grad
|
196
198
|
end
|
197
199
|
|
198
|
-
def
|
199
|
-
|
200
|
+
def weight_gradient(loss_grad, x, weight)
|
201
|
+
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
200
202
|
end
|
201
203
|
|
202
|
-
def
|
203
|
-
|
204
|
-
weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
|
204
|
+
def expand_feature(x)
|
205
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
205
206
|
end
|
206
207
|
|
207
208
|
def split_weight_vec_bias(weight_vec)
|
@@ -3,22 +3,19 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module LinearModel
|
9
10
|
# Ridge is a class that implements Ridge Regression
|
10
|
-
# with stochastic gradient descent
|
11
|
+
# with mini-batch stochastic gradient descent optimization.
|
11
12
|
#
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
|
-
# SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter:
|
15
|
+
# SVMKit::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
16
|
# estimator.fit(training_samples, traininig_values)
|
16
17
|
# results = estimator.predict(testing_samples)
|
17
18
|
#
|
18
|
-
# *Reference*
|
19
|
-
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
20
|
-
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013.
|
21
|
-
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
22
19
|
class Ridge
|
23
20
|
include Base::BaseEstimator
|
24
21
|
include Base::Regressor
|
@@ -40,30 +37,21 @@ module SVMKit
|
|
40
37
|
#
|
41
38
|
# @param reg_param [Float] The regularization parameter.
|
42
39
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
43
|
-
# @param learning_rate [Float] The learning rate for optimization.
|
44
|
-
# @param decay [Float] The discounting factor for RMS prop optimization.
|
45
|
-
# @param momentum [Float] The Nesterov momentum for optimization.
|
46
40
|
# @param max_iter [Integer] The maximum number of iterations.
|
47
41
|
# @param batch_size [Integer] The size of the mini batches.
|
48
42
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
|
-
def initialize(reg_param: 1.0, fit_bias: false,
|
50
|
-
|
51
|
-
check_params_float(reg_param: reg_param,
|
52
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum)
|
43
|
+
def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
44
|
+
check_params_float(reg_param: reg_param)
|
53
45
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
54
46
|
check_params_boolean(fit_bias: fit_bias)
|
55
47
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
56
|
-
check_params_positive(reg_param: reg_param,
|
57
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum,
|
58
|
-
max_iter: max_iter, batch_size: batch_size)
|
48
|
+
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
59
49
|
@params = {}
|
60
50
|
@params[:reg_param] = reg_param
|
61
51
|
@params[:fit_bias] = fit_bias
|
62
|
-
@params[:learning_rate] = learning_rate
|
63
|
-
@params[:decay] = decay
|
64
|
-
@params[:momentum] = momentum
|
65
52
|
@params[:max_iter] = max_iter
|
66
53
|
@params[:batch_size] = batch_size
|
54
|
+
@params[:optimizer] = optimizer
|
67
55
|
@params[:random_seed] = random_seed
|
68
56
|
@params[:random_seed] ||= srand
|
69
57
|
@weight_vec = nil
|
@@ -136,8 +124,7 @@ module SVMKit
|
|
136
124
|
n_samples, n_features = samples.shape
|
137
125
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
138
126
|
weight_vec = Numo::DFloat.zeros(n_features)
|
139
|
-
|
140
|
-
weight_update = Numo::DFloat.zeros(n_features)
|
127
|
+
optimizer = Optimizer::Nadam.new
|
141
128
|
# Start optimization.
|
142
129
|
@params[:max_iter].times do |_t|
|
143
130
|
# Random sampling.
|
@@ -146,12 +133,10 @@ module SVMKit
|
|
146
133
|
data = samples[subset_ids, true]
|
147
134
|
values = y[subset_ids]
|
148
135
|
# Calculate gradients for loss function.
|
149
|
-
loss_grad = loss_gradient(data, values, weight_vec
|
136
|
+
loss_grad = loss_gradient(data, values, weight_vec)
|
150
137
|
next if loss_grad.ne(0.0).count.zero?
|
151
138
|
# Update weight.
|
152
|
-
weight_vec,
|
153
|
-
update_weight(weight_vec, weight_sqrsum, weight_update,
|
154
|
-
weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
|
139
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
155
140
|
end
|
156
141
|
split_weight_vec_bias(weight_vec)
|
157
142
|
end
|
@@ -164,13 +149,6 @@ module SVMKit
|
|
164
149
|
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param] * weight
|
165
150
|
end
|
166
151
|
|
167
|
-
def update_weight(weight, sqrsum, update, gr)
|
168
|
-
new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
|
169
|
-
new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
|
170
|
-
new_weight = weight - (new_update + @params[:momentum] * update)
|
171
|
-
[new_weight, new_sqrsum, new_update]
|
172
|
-
end
|
173
|
-
|
174
152
|
def expand_feature(x)
|
175
153
|
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
176
154
|
end
|
@@ -3,26 +3,28 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
require 'svmkit/probabilistic_output'
|
7
8
|
|
8
9
|
module SVMKit
|
9
10
|
# This module consists of the classes that implement generalized linear models.
|
10
11
|
module LinearModel
|
11
12
|
# SVC is a class that implements Support Vector Classifier
|
12
|
-
# with stochastic gradient descent
|
13
|
+
# with mini-batch stochastic gradient descent optimization.
|
13
14
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
14
15
|
#
|
15
16
|
# @example
|
16
17
|
# estimator =
|
17
|
-
# SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter:
|
18
|
+
# SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
|
18
19
|
# estimator.fit(training_samples, traininig_labels)
|
19
20
|
# results = estimator.predict(testing_samples)
|
20
21
|
#
|
21
22
|
# *Reference*
|
22
|
-
#
|
23
|
+
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
23
24
|
class SVC
|
24
25
|
include Base::BaseEstimator
|
25
26
|
include Base::Classifier
|
27
|
+
include Validation
|
26
28
|
|
27
29
|
# Return the weight vector for SVC.
|
28
30
|
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
@@ -48,16 +50,16 @@ module SVMKit
|
|
48
50
|
# @param max_iter [Integer] The maximum number of iterations.
|
49
51
|
# @param batch_size [Integer] The size of the mini batches.
|
50
52
|
# @param probability [Boolean] The flag indicating whether to perform probability estimation.
|
51
|
-
# @param
|
53
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
54
|
+
# Nadam is selected automatically on current version.
|
52
55
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
53
56
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
54
|
-
max_iter:
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
batch_size: batch_size)
|
57
|
+
max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
|
58
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
59
|
+
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
60
|
+
check_params_boolean(fit_bias: fit_bias, probability: probability)
|
61
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
62
|
+
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
61
63
|
@params = {}
|
62
64
|
@params[:reg_param] = reg_param
|
63
65
|
@params[:fit_bias] = fit_bias
|
@@ -65,7 +67,7 @@ module SVMKit
|
|
65
67
|
@params[:max_iter] = max_iter
|
66
68
|
@params[:batch_size] = batch_size
|
67
69
|
@params[:probability] = probability
|
68
|
-
@params[:
|
70
|
+
@params[:optimizer] = optimizer
|
69
71
|
@params[:random_seed] = random_seed
|
70
72
|
@params[:random_seed] ||= srand
|
71
73
|
@weight_vec = nil
|
@@ -81,9 +83,9 @@ module SVMKit
|
|
81
83
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
82
84
|
# @return [SVC] The learned classifier itself.
|
83
85
|
def fit(x, y)
|
84
|
-
|
85
|
-
|
86
|
-
|
86
|
+
check_sample_array(x)
|
87
|
+
check_label_array(y)
|
88
|
+
check_sample_label_size(x, y)
|
87
89
|
|
88
90
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
89
91
|
n_classes = @classes.size
|
@@ -123,8 +125,7 @@ module SVMKit
|
|
123
125
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
124
126
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
125
127
|
def decision_function(x)
|
126
|
-
|
127
|
-
|
128
|
+
check_sample_array(x)
|
128
129
|
x.dot(@weight_vec.transpose) + @bias_term
|
129
130
|
end
|
130
131
|
|
@@ -133,7 +134,7 @@ module SVMKit
|
|
133
134
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
134
135
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
135
136
|
def predict(x)
|
136
|
-
|
137
|
+
check_sample_array(x)
|
137
138
|
|
138
139
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
139
140
|
|
@@ -147,7 +148,7 @@ module SVMKit
|
|
147
148
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
148
149
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
149
150
|
def predict_proba(x)
|
150
|
-
|
151
|
+
check_sample_array(x)
|
151
152
|
|
152
153
|
if @classes.size > 2
|
153
154
|
probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
|
@@ -186,43 +187,43 @@ module SVMKit
|
|
186
187
|
|
187
188
|
private
|
188
189
|
|
189
|
-
def binary_fit(x,
|
190
|
+
def binary_fit(x, y)
|
190
191
|
# Expand feature vectors for bias term.
|
191
192
|
samples = @params[:fit_bias] ? expand_feature(x) : x
|
192
193
|
# Initialize some variables.
|
193
194
|
n_samples, n_features = samples.shape
|
194
195
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
195
196
|
weight_vec = Numo::DFloat.zeros(n_features)
|
197
|
+
optimizer = Optimizer::Nadam.new
|
196
198
|
# Start optimization.
|
197
|
-
@params[:max_iter].times do |
|
198
|
-
# random sampling
|
199
|
+
@params[:max_iter].times do |_t|
|
200
|
+
# random sampling.
|
199
201
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
200
202
|
rand_ids.concat(subset_ids)
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
next if
|
206
|
-
# update
|
207
|
-
|
208
|
-
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
|
209
|
-
# scale the weight vector.
|
210
|
-
normalize_weight_vec(weight_vec) if @params[:normalize]
|
203
|
+
data = samples[subset_ids, true]
|
204
|
+
labels = y[subset_ids]
|
205
|
+
# calculate gradient for loss function.
|
206
|
+
loss_grad = loss_gradient(data, labels, weight_vec)
|
207
|
+
next if loss_grad.ne(0.0).count.zero?
|
208
|
+
# update weight.
|
209
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
211
210
|
end
|
212
211
|
split_weight_vec_bias(weight_vec)
|
213
212
|
end
|
214
213
|
|
215
|
-
def
|
216
|
-
|
214
|
+
def loss_gradient(x, y, weight)
|
215
|
+
target_ids = (x.dot(weight) * y).lt(1.0).where
|
216
|
+
grad = Numo::DFloat.zeros(@params[:batch_size])
|
217
|
+
grad[target_ids] = -y[target_ids]
|
218
|
+
grad
|
217
219
|
end
|
218
220
|
|
219
|
-
def
|
220
|
-
|
221
|
+
def weight_gradient(loss_grad, x, weight)
|
222
|
+
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
221
223
|
end
|
222
224
|
|
223
|
-
def
|
224
|
-
|
225
|
-
weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
|
225
|
+
def expand_feature(x)
|
226
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
226
227
|
end
|
227
228
|
|
228
229
|
def split_weight_vec_bias(weight_vec)
|
@@ -3,15 +3,16 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module LinearModel
|
9
10
|
# SVR is a class that implements Support Vector Regressor
|
10
|
-
# with stochastic gradient descent
|
11
|
+
# with mini-batch stochastic gradient descent optimization.
|
11
12
|
#
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
|
-
# SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter:
|
15
|
+
# SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
|
15
16
|
# estimator.fit(training_samples, traininig_target_values)
|
16
17
|
# results = estimator.predict(testing_samples)
|
17
18
|
#
|
@@ -20,6 +21,7 @@ module SVMKit
|
|
20
21
|
class SVR
|
21
22
|
include Base::BaseEstimator
|
22
23
|
include Base::Regressor
|
24
|
+
include Validation
|
23
25
|
|
24
26
|
# Return the weight vector for SVR.
|
25
27
|
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
@@ -41,16 +43,17 @@ module SVMKit
|
|
41
43
|
# @param epsilon [Float] The margin of tolerance.
|
42
44
|
# @param max_iter [Integer] The maximum number of iterations.
|
43
45
|
# @param batch_size [Integer] The size of the mini batches.
|
44
|
-
# @param
|
46
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
47
|
+
# Nadam is selected automatically on current version.
|
45
48
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
49
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
|
47
|
-
max_iter:
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
50
|
+
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
51
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
|
52
|
+
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
53
|
+
check_params_boolean(fit_bias: fit_bias)
|
54
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
55
|
+
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
56
|
+
max_iter: max_iter, batch_size: batch_size)
|
54
57
|
@params = {}
|
55
58
|
@params[:reg_param] = reg_param
|
56
59
|
@params[:fit_bias] = fit_bias
|
@@ -58,7 +61,7 @@ module SVMKit
|
|
58
61
|
@params[:epsilon] = epsilon
|
59
62
|
@params[:max_iter] = max_iter
|
60
63
|
@params[:batch_size] = batch_size
|
61
|
-
@params[:
|
64
|
+
@params[:optimizer] = optimizer
|
62
65
|
@params[:random_seed] = random_seed
|
63
66
|
@params[:random_seed] ||= srand
|
64
67
|
@weight_vec = nil
|
@@ -72,9 +75,9 @@ module SVMKit
|
|
72
75
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
73
76
|
# @return [SVR] The learned regressor itself.
|
74
77
|
def fit(x, y)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
+
check_sample_array(x)
|
79
|
+
check_tvalue_array(y)
|
80
|
+
check_sample_tvalue_size(x, y)
|
78
81
|
|
79
82
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
80
83
|
_n_samples, n_features = x.shape
|
@@ -99,7 +102,7 @@ module SVMKit
|
|
99
102
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
100
103
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
101
104
|
def predict(x)
|
102
|
-
|
105
|
+
check_sample_array(x)
|
103
106
|
x.dot(@weight_vec.transpose) + @bias_term
|
104
107
|
end
|
105
108
|
|
@@ -131,35 +134,35 @@ module SVMKit
|
|
131
134
|
n_samples, n_features = samples.shape
|
132
135
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
133
136
|
weight_vec = Numo::DFloat.zeros(n_features)
|
137
|
+
optimizer = Optimizer::Nadam.new
|
134
138
|
# Start optimization.
|
135
|
-
@params[:max_iter].times do |
|
139
|
+
@params[:max_iter].times do |_t|
|
136
140
|
# random sampling
|
137
141
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
138
142
|
rand_ids.concat(subset_ids)
|
143
|
+
data = samples[subset_ids, true]
|
144
|
+
values = y[subset_ids]
|
139
145
|
# update the weight vector.
|
140
|
-
|
141
|
-
|
142
|
-
coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
|
143
|
-
coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
|
144
|
-
mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
|
145
|
-
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
|
146
|
-
# scale the weight vector.
|
147
|
-
normalize_weight_vec(weight_vec) if @params[:normalize]
|
146
|
+
loss_grad = loss_gradient(data, values, weight_vec)
|
147
|
+
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
148
148
|
end
|
149
149
|
split_weight_vec_bias(weight_vec)
|
150
150
|
end
|
151
151
|
|
152
|
-
def
|
153
|
-
|
152
|
+
def loss_gradient(x, y, weight)
|
153
|
+
z = x.dot(weight)
|
154
|
+
grad = Numo::DFloat.zeros(@params[:batch_size])
|
155
|
+
grad[(z - y).gt(@params[:epsilon]).where] = 1
|
156
|
+
grad[(y - z).gt(@params[:epsilon]).where] = -1
|
157
|
+
grad
|
154
158
|
end
|
155
159
|
|
156
|
-
def
|
157
|
-
|
160
|
+
def weight_gradient(loss_grad, x, weight)
|
161
|
+
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
158
162
|
end
|
159
163
|
|
160
|
-
def
|
161
|
-
|
162
|
-
weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
|
164
|
+
def expand_feature(x)
|
165
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
163
166
|
end
|
164
167
|
|
165
168
|
def split_weight_vec_bias(weight_vec)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
|
5
|
+
module SVMKit
|
6
|
+
# This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
|
7
|
+
module Optimizer
|
8
|
+
# Nadam is a class that implements Nadam optimizer.
|
9
|
+
# This class is used for internal processes.
|
10
|
+
#
|
11
|
+
# *Reference*
|
12
|
+
# - T. Dozat, "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
|
13
|
+
class Nadam
|
14
|
+
include Validation
|
15
|
+
|
16
|
+
# Create a new optimizer with Nadam
|
17
|
+
#
|
18
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
19
|
+
# @param momentum [Float] The initial value of momentum.
|
20
|
+
# @param decay1 [Float] The smoothing parameter for the first moment.
|
21
|
+
# @param decay2 [Float] The smoothing parameter for the second moment.
|
22
|
+
# @param schedule_decay [Float] The smooting parameter.
|
23
|
+
def initialize(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
|
24
|
+
check_params_float(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
|
25
|
+
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
|
26
|
+
@params = {}
|
27
|
+
@params[:learning_rate] = learning_rate
|
28
|
+
@params[:momentum] = momentum
|
29
|
+
@params[:decay1] = decay1
|
30
|
+
@params[:decay2] = decay2
|
31
|
+
@fst_moment = nil
|
32
|
+
@sec_moment = nil
|
33
|
+
@decay1_prod = 1.0
|
34
|
+
@iter = 0
|
35
|
+
end
|
36
|
+
|
37
|
+
# Calculate the updated weight with Nadam adaptive learning rate.
|
38
|
+
#
|
39
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
40
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
41
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
42
|
+
def call(weight, gradient)
|
43
|
+
@fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
44
|
+
@sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
45
|
+
|
46
|
+
@iter += 1
|
47
|
+
|
48
|
+
decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
|
49
|
+
decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
|
50
|
+
decay1_prod_curr = @decay1_prod * decay1_curr
|
51
|
+
decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
|
52
|
+
@decay1_prod = decay1_prod_curr
|
53
|
+
|
54
|
+
@fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
|
55
|
+
@sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
|
56
|
+
nm_gradient = gradient / (1.0 - decay1_prod_curr)
|
57
|
+
nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
|
58
|
+
nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
|
59
|
+
|
60
|
+
weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
# This module consists of the classes that implement polynomial models.
|
@@ -14,7 +15,7 @@ module SVMKit
|
|
14
15
|
# @example
|
15
16
|
# estimator =
|
16
17
|
# SVMKit::PolynomialModel::FactorizationMachineClassifier.new(
|
17
|
-
# n_factors: 10, loss: 'hinge',
|
18
|
+
# n_factors: 10, loss: 'hinge', reg_param_linear: 0.001, reg_param_factor: 0.001,
|
18
19
|
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
19
20
|
# estimator.fit(training_samples, traininig_labels)
|
20
21
|
# results = estimator.predict(testing_samples)
|
@@ -25,6 +26,7 @@ module SVMKit
|
|
25
26
|
class FactorizationMachineClassifier
|
26
27
|
include Base::BaseEstimator
|
27
28
|
include Base::Classifier
|
29
|
+
include Validation
|
28
30
|
|
29
31
|
# Return the factor matrix for Factorization Machine.
|
30
32
|
# @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
|
@@ -50,32 +52,30 @@ module SVMKit
|
|
50
52
|
#
|
51
53
|
# @param n_factors [Integer] The maximum number of iterations.
|
52
54
|
# @param loss [String] The loss function ('hinge' or 'logistic').
|
53
|
-
# @param
|
54
|
-
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
55
|
+
# @param reg_param_linear [Float] The regularization parameter for linear model.
|
55
56
|
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
56
|
-
# @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
|
57
57
|
# @param max_iter [Integer] The maximum number of iterations.
|
58
58
|
# @param batch_size [Integer] The size of the mini batches.
|
59
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
60
|
+
# Nadam is selected automatically on current version.
|
59
61
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
|
-
def initialize(n_factors: 2, loss: 'hinge',
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
max_iter: max_iter, batch_size: batch_size)
|
62
|
+
def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
|
63
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
64
|
+
check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
|
65
|
+
check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
66
|
+
check_params_string(loss: loss)
|
67
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
68
|
+
check_params_positive(n_factors: n_factors,
|
69
|
+
reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
70
|
+
max_iter: max_iter, batch_size: batch_size)
|
70
71
|
@params = {}
|
71
72
|
@params[:n_factors] = n_factors
|
72
73
|
@params[:loss] = loss
|
73
|
-
@params[:
|
74
|
-
@params[:reg_param_weight] = reg_param_weight
|
74
|
+
@params[:reg_param_linear] = reg_param_linear
|
75
75
|
@params[:reg_param_factor] = reg_param_factor
|
76
|
-
@params[:init_std] = init_std
|
77
76
|
@params[:max_iter] = max_iter
|
78
77
|
@params[:batch_size] = batch_size
|
78
|
+
@params[:optimizer] = optimizer
|
79
79
|
@params[:random_seed] = random_seed
|
80
80
|
@params[:random_seed] ||= srand
|
81
81
|
@factor_mat = nil
|
@@ -91,9 +91,9 @@ module SVMKit
|
|
91
91
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
92
92
|
# @return [FactorizationMachineClassifier] The learned classifier itself.
|
93
93
|
def fit(x, y)
|
94
|
-
|
95
|
-
|
96
|
-
|
94
|
+
check_sample_array(x)
|
95
|
+
check_label_array(y)
|
96
|
+
check_sample_label_size(x, y)
|
97
97
|
|
98
98
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
99
99
|
n_classes = @classes.size
|
@@ -124,7 +124,7 @@ module SVMKit
|
|
124
124
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
125
125
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
126
126
|
def decision_function(x)
|
127
|
-
|
127
|
+
check_sample_array(x)
|
128
128
|
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
129
129
|
factor_term = if @classes.size <= 2
|
130
130
|
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
@@ -139,7 +139,7 @@ module SVMKit
|
|
139
139
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
140
140
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
141
141
|
def predict(x)
|
142
|
-
|
142
|
+
check_sample_array(x)
|
143
143
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
144
144
|
|
145
145
|
n_samples, = x.shape
|
@@ -152,7 +152,7 @@ module SVMKit
|
|
152
152
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
153
153
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
154
154
|
def predict_proba(x)
|
155
|
-
|
155
|
+
check_sample_array(x)
|
156
156
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
157
157
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
158
158
|
|
@@ -188,84 +188,76 @@ module SVMKit
|
|
188
188
|
|
189
189
|
private
|
190
190
|
|
191
|
-
def binary_fit(x,
|
191
|
+
def binary_fit(x, y)
|
192
192
|
# Initialize some variables.
|
193
193
|
n_samples, n_features = x.shape
|
194
194
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
195
|
-
|
196
|
-
|
197
|
-
|
195
|
+
weight_vec = Numo::DFloat.zeros(n_features + 1)
|
196
|
+
factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
|
197
|
+
weight_optimizer = Optimizer::Nadam.new
|
198
|
+
factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
|
198
199
|
# Start optimization.
|
199
|
-
@params[:max_iter].times do |
|
200
|
+
@params[:max_iter].times do |_t|
|
200
201
|
# Random sampling.
|
201
202
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
202
203
|
rand_ids.concat(subset_ids)
|
203
204
|
data = x[subset_ids, true]
|
204
|
-
|
205
|
+
ex_data = expand_feature(data)
|
206
|
+
label = y[subset_ids]
|
205
207
|
# Calculate gradients for loss function.
|
206
|
-
loss_grad = loss_gradient(data, label, factor_mat, weight_vec
|
208
|
+
loss_grad = loss_gradient(data, ex_data, label, factor_mat, weight_vec)
|
207
209
|
next if loss_grad.ne(0.0).count.zero?
|
208
210
|
# Update each parameter.
|
209
|
-
|
210
|
-
weight_vec -= learning_rate(@params[:reg_param_weight], t) * weight_gradient(loss_grad, data, weight_vec)
|
211
|
+
weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
|
211
212
|
@params[:n_factors].times do |n|
|
212
|
-
factor_mat[n, true]
|
213
|
-
|
213
|
+
factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
|
214
|
+
factor_gradient(loss_grad, data, factor_mat[n, true]))
|
214
215
|
end
|
215
216
|
end
|
216
|
-
[factor_mat, weight_vec
|
217
|
+
[factor_mat, *split_weight_vec_bias(weight_vec)]
|
217
218
|
end
|
218
219
|
|
219
|
-
def bin_decision_function(x, factor, weight
|
220
|
-
|
220
|
+
def bin_decision_function(x, ex_x, factor, weight)
|
221
|
+
ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
221
222
|
end
|
222
223
|
|
223
|
-
def hinge_loss_gradient(x, y, factor, weight
|
224
|
-
evaluated = y * bin_decision_function(x, factor, weight
|
224
|
+
def hinge_loss_gradient(x, ex_x, y, factor, weight)
|
225
|
+
evaluated = y * bin_decision_function(x, ex_x, factor, weight)
|
225
226
|
gradient = Numo::DFloat.zeros(evaluated.size)
|
226
227
|
gradient[evaluated < 1.0] = -y[evaluated < 1.0]
|
227
228
|
gradient
|
228
229
|
end
|
229
230
|
|
230
|
-
def logistic_loss_gradient(x, y, factor, weight
|
231
|
-
evaluated = y * bin_decision_function(x, factor, weight
|
231
|
+
def logistic_loss_gradient(x, ex_x, y, factor, weight)
|
232
|
+
evaluated = y * bin_decision_function(x, ex_x, factor, weight)
|
232
233
|
sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
|
233
234
|
(sigmoid_func - 1.0) * y
|
234
235
|
end
|
235
236
|
|
236
|
-
def loss_gradient(x, y, factor, weight
|
237
|
+
def loss_gradient(x, ex_x, y, factor, weight)
|
237
238
|
if @params[:loss] == 'hinge'
|
238
|
-
hinge_loss_gradient(x, y, factor, weight
|
239
|
+
hinge_loss_gradient(x, ex_x, y, factor, weight)
|
239
240
|
else
|
240
|
-
logistic_loss_gradient(x, y, factor, weight
|
241
|
+
logistic_loss_gradient(x, ex_x, y, factor, weight)
|
241
242
|
end
|
242
243
|
end
|
243
244
|
|
244
|
-
def learning_rate(reg_param, iter)
|
245
|
-
1.0 / (reg_param * (iter + 1))
|
246
|
-
end
|
247
|
-
|
248
|
-
def bias_gradient(loss_grad, bias)
|
249
|
-
loss_grad.mean + @params[:reg_param_bias] * bias
|
250
|
-
end
|
251
|
-
|
252
245
|
def weight_gradient(loss_grad, data, weight)
|
253
|
-
(loss_grad.expand_dims(1) * data).mean(0) + @params[:
|
246
|
+
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
|
254
247
|
end
|
255
248
|
|
256
249
|
def factor_gradient(loss_grad, data, factor)
|
257
|
-
|
258
|
-
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + reg_term
|
250
|
+
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
|
259
251
|
end
|
260
252
|
|
261
|
-
def
|
262
|
-
Numo::
|
253
|
+
def expand_feature(x)
|
254
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
263
255
|
end
|
264
256
|
|
265
|
-
def
|
266
|
-
|
267
|
-
|
268
|
-
|
257
|
+
def split_weight_vec_bias(weight_vec)
|
258
|
+
weights = weight_vec[0...-1]
|
259
|
+
bias = weight_vec[-1]
|
260
|
+
[weights, bias]
|
269
261
|
end
|
270
262
|
end
|
271
263
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/optimizer/nadam'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
module PolynomialModel
|
@@ -12,7 +13,7 @@ module SVMKit
|
|
12
13
|
# @example
|
13
14
|
# estimator =
|
14
15
|
# SVMKit::PolynomialModel::FactorizationMachineRegressor.new(
|
15
|
-
# n_factors: 10,
|
16
|
+
# n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
|
16
17
|
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
17
18
|
# estimator.fit(training_samples, traininig_values)
|
18
19
|
# results = estimator.predict(testing_samples)
|
@@ -20,8 +21,6 @@ module SVMKit
|
|
20
21
|
# *Reference*
|
21
22
|
# - S. Rendle, "Factorization Machines with libFM," ACM Transactions on Intelligent Systems and Technology, vol. 3 (3), pp. 57:1--57:22, 2012.
|
22
23
|
# - S. Rendle, "Factorization Machines," Proc. the 10th IEEE International Conference on Data Mining (ICDM'10), pp. 995--1000, 2010.
|
23
|
-
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. the 30th International Conference on Machine Learning (ICML' 13), pp. 1139--1147, 2013.
|
24
|
-
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
25
24
|
class FactorizationMachineRegressor
|
26
25
|
include Base::BaseEstimator
|
27
26
|
include Base::Regressor
|
@@ -46,40 +45,27 @@ module SVMKit
|
|
46
45
|
# Create a new regressor with Factorization Machine.
|
47
46
|
#
|
48
47
|
# @param n_factors [Integer] The maximum number of iterations.
|
49
|
-
# @param
|
50
|
-
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
48
|
+
# @param reg_param_linear [Float] The regularization parameter for linear model.
|
51
49
|
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
52
|
-
# @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
|
53
|
-
# @param learning_rate [Float] The learning rate for optimization.
|
54
|
-
# @param decay [Float] The discounting factor for RMS prop optimization.
|
55
|
-
# @param momentum [Float] The Nesterov momentum for optimization.
|
56
50
|
# @param max_iter [Integer] The maximum number of iterations.
|
57
51
|
# @param batch_size [Integer] The size of the mini batches.
|
52
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
53
|
+
# Nadam is selected automatically on current version.
|
58
54
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
|
-
def initialize(n_factors: 2,
|
60
|
-
|
61
|
-
|
62
|
-
max_iter: 1000, batch_size: 10, random_seed: nil)
|
63
|
-
check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
|
64
|
-
reg_param_factor: reg_param_factor, init_std: init_std,
|
65
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum)
|
55
|
+
def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
|
56
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
57
|
+
check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
|
66
58
|
check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
67
59
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
68
|
-
check_params_positive(n_factors: n_factors,
|
69
|
-
reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
|
70
|
-
learning_rate: learning_rate, decay: decay, momentum: momentum,
|
60
|
+
check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
71
61
|
max_iter: max_iter, batch_size: batch_size)
|
72
62
|
@params = {}
|
73
63
|
@params[:n_factors] = n_factors
|
74
|
-
@params[:
|
75
|
-
@params[:reg_param_weight] = reg_param_weight
|
64
|
+
@params[:reg_param_linear] = reg_param_linear
|
76
65
|
@params[:reg_param_factor] = reg_param_factor
|
77
|
-
@params[:init_std] = init_std
|
78
|
-
@params[:learning_rate] = learning_rate
|
79
|
-
@params[:decay] = decay
|
80
|
-
@params[:momentum] = momentum
|
81
66
|
@params[:max_iter] = max_iter
|
82
67
|
@params[:batch_size] = batch_size
|
68
|
+
@params[:optimizer] = optimizer
|
83
69
|
@params[:random_seed] = random_seed
|
84
70
|
@params[:random_seed] ||= srand
|
85
71
|
@factor_mat = nil
|
@@ -160,74 +146,52 @@ module SVMKit
|
|
160
146
|
# Initialize some variables.
|
161
147
|
n_samples, n_features = x.shape
|
162
148
|
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
weight_sqrsum = Numo::DFloat.zeros(n_features)
|
168
|
-
weight_update = Numo::DFloat.zeros(n_features)
|
169
|
-
bias_term = 0.0
|
170
|
-
bias_sqrsum = 0.0
|
171
|
-
bias_update = 0.0
|
149
|
+
weight_vec = Numo::DFloat.zeros(n_features + 1)
|
150
|
+
factor_mat = Numo::DFloat.zeros(@params[:n_factors], n_features)
|
151
|
+
weight_optimizer = Optimizer::Nadam.new
|
152
|
+
factor_optimizers = Array.new(@params[:n_factors]) { Optimizer::Nadam.new }
|
172
153
|
# Start optimization.
|
173
154
|
@params[:max_iter].times do |_t|
|
174
155
|
# Random sampling.
|
175
156
|
subset_ids = rand_ids.shift(@params[:batch_size])
|
176
157
|
rand_ids.concat(subset_ids)
|
177
158
|
data = x[subset_ids, true]
|
159
|
+
ex_data = expand_feature(data)
|
178
160
|
values = y[subset_ids]
|
179
161
|
# Calculate gradients for loss function.
|
180
|
-
loss_grad = loss_gradient(data, values,
|
181
|
-
factor_mat - @params[:momentum] * factor_update,
|
182
|
-
weight_vec - @params[:momentum] * weight_update,
|
183
|
-
bias_term - @params[:momentum] * bias_update)
|
162
|
+
loss_grad = loss_gradient(data, ex_data, values, factor_mat, weight_vec)
|
184
163
|
next if loss_grad.ne(0.0).count.zero?
|
185
164
|
# Update each parameter.
|
186
|
-
|
187
|
-
update_param(bias_term, bias_sqrsum, bias_update,
|
188
|
-
bias_gradient(loss_grad, bias_term - @params[:momentum] * bias_update))
|
189
|
-
weight_vec, weight_sqrsum, weight_update =
|
190
|
-
update_param(weight_vec, weight_sqrsum, weight_update,
|
191
|
-
weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
|
165
|
+
weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_data, weight_vec))
|
192
166
|
@params[:n_factors].times do |n|
|
193
|
-
|
194
|
-
|
195
|
-
factor_gradient(loss_grad, data, factor_mat[n, true] - @params[:momentum] * factor_update[n, true]))
|
167
|
+
factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
|
168
|
+
factor_gradient(loss_grad, data, factor_mat[n, true]))
|
196
169
|
end
|
197
170
|
end
|
198
|
-
[factor_mat, weight_vec
|
171
|
+
[factor_mat, *split_weight_vec_bias(weight_vec)]
|
199
172
|
end
|
200
173
|
|
201
|
-
def loss_gradient(x, y, factor, weight
|
202
|
-
z =
|
174
|
+
def loss_gradient(x, ex_x, y, factor, weight)
|
175
|
+
z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
203
176
|
2.0 * (z - y)
|
204
177
|
end
|
205
178
|
|
206
|
-
def bias_gradient(loss_grad, bias)
|
207
|
-
loss_grad.mean + @params[:reg_param_bias] * bias
|
208
|
-
end
|
209
|
-
|
210
179
|
def weight_gradient(loss_grad, data, weight)
|
211
|
-
(loss_grad.expand_dims(1) * data).mean(0) + @params[:
|
180
|
+
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
|
212
181
|
end
|
213
182
|
|
214
183
|
def factor_gradient(loss_grad, data, factor)
|
215
184
|
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
|
216
185
|
end
|
217
186
|
|
218
|
-
def
|
219
|
-
|
220
|
-
new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
|
221
|
-
new_param = param - (new_update + @params[:momentum] * update)
|
222
|
-
[new_param, new_sqrsum, new_update]
|
223
|
-
end
|
224
|
-
|
225
|
-
def rand_uniform(shape)
|
226
|
-
Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
|
187
|
+
def expand_feature(x)
|
188
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
227
189
|
end
|
228
190
|
|
229
|
-
def
|
230
|
-
|
191
|
+
def split_weight_vec_bias(weight_vec)
|
192
|
+
weights = weight_vec[0...-1]
|
193
|
+
bias = weight_vec[-1]
|
194
|
+
[weights, bias]
|
231
195
|
end
|
232
196
|
end
|
233
197
|
end
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- lib/svmkit/naive_bayes/naive_bayes.rb
|
140
140
|
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
141
141
|
- lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
|
142
|
+
- lib/svmkit/optimizer/nadam.rb
|
142
143
|
- lib/svmkit/pairwise_metric.rb
|
143
144
|
- lib/svmkit/polynomial_model/factorization_machine_classifier.rb
|
144
145
|
- lib/svmkit/polynomial_model/factorization_machine_regressor.rb
|