svmkit 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -1
- data/HISTORY.md +5 -0
- data/lib/svmkit/decomposition/nmf.rb +2 -2
- data/lib/svmkit/linear_model/lasso.rb +23 -63
- data/lib/svmkit/linear_model/linear_regression.rb +10 -57
- data/lib/svmkit/linear_model/logistic_regression.rb +8 -63
- data/lib/svmkit/linear_model/ridge.rb +10 -60
- data/lib/svmkit/linear_model/sgd_linear_estimator.rb +89 -0
- data/lib/svmkit/linear_model/svc.rb +9 -62
- data/lib/svmkit/linear_model/svr.rb +8 -57
- data/lib/svmkit/tree/decision_tree_classifier.rb +6 -6
- data/lib/svmkit/tree/decision_tree_regressor.rb +1 -1
- data/lib/svmkit/version.rb +1 -1
- data/lib/svmkit.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d52bf496a762b096a5f6dbeec278a1cae8079b53d6c91cc13c07dca7a799fde
|
4
|
+
data.tar.gz: e5ca2fed307b82e88dfe816691a4715d62a3187c1cad71421a48bea65037b19c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 620370c119300f3f419550609444eba4aa34561a954e8ec26cf6a0d3522cd32cabf1f6875092de5ab0dd202ebf7b772c1d6d6421cd05d90cfeeeeadea3cd0565
|
7
|
+
data.tar.gz: a0d8b5a7b91c4a8e2ffb4312a8082096a2e4fbd411e37bd36752ac19a66f6b9accf22be15894d6a88421b7296bde90ead33b55604085d139e5faec64b97f0f55
|
data/.rubocop.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
inherit_from: .rubocop_todo.yml
|
2
2
|
|
3
3
|
AllCops:
|
4
|
-
TargetRubyVersion: 2.
|
4
|
+
TargetRubyVersion: 2.2
|
5
5
|
DisplayCopNames: true
|
6
6
|
DisplayStyleGuide: true
|
7
7
|
|
@@ -39,3 +39,6 @@ Naming/UncommunicativeMethodParamName:
|
|
39
39
|
|
40
40
|
Style/FormatStringToken:
|
41
41
|
Enabled: false
|
42
|
+
|
43
|
+
Style/NumericLiterals:
|
44
|
+
Enabled: false
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.6.1
|
2
|
+
- Add abstract class for linear estimator with stochastic gradient descent.
|
3
|
+
- Refactor linear estimators to use linear esitmator abstract class.
|
4
|
+
- Refactor decistion tree classes to avoid unneeded type conversion.
|
5
|
+
|
1
6
|
# 0.6.0
|
2
7
|
- Add class for Principal Component Analysis.
|
3
8
|
- Add class for Non-negative Matrix Factorization.
|
@@ -121,11 +121,11 @@ module SVMKit
|
|
121
121
|
# update
|
122
122
|
if update_comps
|
123
123
|
nume = coefficients.transpose.dot(x)
|
124
|
-
deno =
|
124
|
+
deno = coefficients.transpose.dot(coefficients).dot(@components) + @params[:eps]
|
125
125
|
@components *= (nume / deno)
|
126
126
|
end
|
127
127
|
nume = x.dot(@components.transpose)
|
128
|
-
deno =
|
128
|
+
deno = coefficients.dot(@components).dot(@components.transpose) + @params[:eps]
|
129
129
|
coefficients *= (nume / deno)
|
130
130
|
# normalize
|
131
131
|
norm = Numo::NMath.sqrt((@components**2).sum(1)) + @params[:eps]
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'svmkit/validation'
|
4
|
-
require 'svmkit/
|
4
|
+
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
-
require 'svmkit/optimizer/nadam'
|
7
6
|
|
8
7
|
module SVMKit
|
9
8
|
module LinearModel
|
@@ -19,8 +18,7 @@ module SVMKit
|
|
19
18
|
# *Reference*
|
20
19
|
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
21
20
|
# - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010.
|
22
|
-
class Lasso
|
23
|
-
include Base::BaseEstimator
|
21
|
+
class Lasso < SGDLinearEstimator
|
24
22
|
include Base::Regressor
|
25
23
|
include Validation
|
26
24
|
|
@@ -40,29 +38,19 @@ module SVMKit
|
|
40
38
|
#
|
41
39
|
# @param reg_param [Float] The regularization parameter.
|
42
40
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
41
|
+
# @param bias_scale [Float] The scale of the bias term.
|
43
42
|
# @param max_iter [Integer] The maximum number of iterations.
|
44
43
|
# @param batch_size [Integer] The size of the mini batches.
|
45
44
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
46
45
|
# If nil is given, Nadam is used.
|
47
46
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
48
|
-
def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
49
|
-
check_params_float(reg_param: reg_param)
|
47
|
+
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
48
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
50
49
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
51
50
|
check_params_boolean(fit_bias: fit_bias)
|
52
51
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
53
52
|
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
54
|
-
|
55
|
-
@params[:reg_param] = reg_param
|
56
|
-
@params[:fit_bias] = fit_bias
|
57
|
-
@params[:max_iter] = max_iter
|
58
|
-
@params[:batch_size] = batch_size
|
59
|
-
@params[:optimizer] = optimizer
|
60
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
61
|
-
@params[:random_seed] = random_seed
|
62
|
-
@params[:random_seed] ||= srand
|
63
|
-
@weight_vec = nil
|
64
|
-
@bias_term = nil
|
65
|
-
@rng = Random.new(@params[:random_seed])
|
53
|
+
super
|
66
54
|
end
|
67
55
|
|
68
56
|
# Fit the model with given training data.
|
@@ -76,14 +64,14 @@ module SVMKit
|
|
76
64
|
check_sample_tvalue_size(x, y)
|
77
65
|
|
78
66
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
79
|
-
|
67
|
+
n_features = x.shape[1]
|
80
68
|
|
81
69
|
if n_outputs > 1
|
82
70
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
83
71
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
84
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] =
|
72
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
85
73
|
else
|
86
|
-
@weight_vec, @bias_term =
|
74
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
87
75
|
end
|
88
76
|
|
89
77
|
self
|
@@ -119,60 +107,32 @@ module SVMKit
|
|
119
107
|
|
120
108
|
private
|
121
109
|
|
122
|
-
def
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
left_weight_vec = Numo::DFloat.zeros(n_features)
|
130
|
-
right_weight_vec = Numo::DFloat.zeros(n_features)
|
131
|
-
left_optimizer = @params[:optimizer].dup
|
132
|
-
right_optimizer = @params[:optimizer].dup
|
133
|
-
# Start optimization.
|
134
|
-
@params[:max_iter].times do |_t|
|
135
|
-
# Random sampling.
|
136
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
137
|
-
rand_ids.concat(subset_ids)
|
138
|
-
data = samples[subset_ids, true]
|
139
|
-
values = y[subset_ids]
|
140
|
-
# Calculate gradients for loss function.
|
141
|
-
loss_grad = loss_gradient(data, values, weight_vec)
|
142
|
-
next if loss_grad.ne(0.0).count.zero?
|
143
|
-
# Update weight.
|
144
|
-
left_weight_vec = round_weight(left_optimizer.call(left_weight_vec, left_weight_gradient(loss_grad, data)))
|
145
|
-
right_weight_vec = round_weight(right_optimizer.call(right_weight_vec, right_weight_gradient(loss_grad, data)))
|
146
|
-
weight_vec = left_weight_vec - right_weight_vec
|
147
|
-
end
|
148
|
-
split_weight_vec_bias(weight_vec)
|
110
|
+
def partial_fit(x, y)
|
111
|
+
n_features = @params[:fit_bias] ? x.shape[1] + 1 : x.shape[1]
|
112
|
+
@left_weight = Numo::DFloat.zeros(n_features)
|
113
|
+
@right_weight = Numo::DFloat.zeros(n_features)
|
114
|
+
@left_optimizer = @params[:optimizer].dup
|
115
|
+
@right_optimizer = @params[:optimizer].dup
|
116
|
+
super
|
149
117
|
end
|
150
118
|
|
151
|
-
def
|
119
|
+
def calc_loss_gradient(x, y, weight)
|
152
120
|
2.0 * (x.dot(weight) - y)
|
153
121
|
end
|
154
122
|
|
155
|
-
def
|
156
|
-
|
123
|
+
def calc_new_weight(_optimizer, x, _weight, loss_gradient)
|
124
|
+
@left_weight = round_weight(@left_optimizer.call(@left_weight, calc_weight_gradient(loss_gradient, x)))
|
125
|
+
@right_weight = round_weight(@right_optimizer.call(@right_weight, calc_weight_gradient(-loss_gradient, x)))
|
126
|
+
@left_weight - @right_weight
|
157
127
|
end
|
158
128
|
|
159
|
-
def
|
160
|
-
((@params[:reg_param]
|
129
|
+
def calc_weight_gradient(loss_gradient, data)
|
130
|
+
((@params[:reg_param] + loss_gradient).expand_dims(1) * data).mean(0)
|
161
131
|
end
|
162
132
|
|
163
133
|
def round_weight(weight)
|
164
134
|
0.5 * (weight + weight.abs)
|
165
135
|
end
|
166
|
-
|
167
|
-
def expand_feature(x)
|
168
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
169
|
-
end
|
170
|
-
|
171
|
-
def split_weight_vec_bias(weight_vec)
|
172
|
-
weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
|
173
|
-
bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
|
174
|
-
[weights, bias]
|
175
|
-
end
|
176
136
|
end
|
177
137
|
end
|
178
138
|
end
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'svmkit/validation'
|
4
|
-
require 'svmkit/
|
4
|
+
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
-
require 'svmkit/optimizer/nadam'
|
7
6
|
|
8
7
|
module SVMKit
|
9
8
|
module LinearModel
|
@@ -16,8 +15,7 @@ module SVMKit
|
|
16
15
|
# estimator.fit(training_samples, traininig_values)
|
17
16
|
# results = estimator.predict(testing_samples)
|
18
17
|
#
|
19
|
-
class LinearRegression
|
20
|
-
include Base::BaseEstimator
|
18
|
+
class LinearRegression < SGDLinearEstimator
|
21
19
|
include Base::Regressor
|
22
20
|
include Validation
|
23
21
|
|
@@ -36,27 +34,20 @@ module SVMKit
|
|
36
34
|
# Create a new ordinary least square linear regressor.
|
37
35
|
#
|
38
36
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
37
|
+
# @param bias_scale [Float] The scale of the bias term.
|
39
38
|
# @param max_iter [Integer] The maximum number of iterations.
|
40
39
|
# @param batch_size [Integer] The size of the mini batches.
|
41
40
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
42
41
|
# If nil is given, Nadam is used.
|
43
42
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
|
-
def initialize(fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
43
|
+
def initialize(fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
44
|
+
check_params_float(bias_scale: bias_scale)
|
45
45
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
46
46
|
check_params_boolean(fit_bias: fit_bias)
|
47
47
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
48
48
|
check_params_positive(max_iter: max_iter, batch_size: batch_size)
|
49
|
-
|
50
|
-
|
51
|
-
@params[:max_iter] = max_iter
|
52
|
-
@params[:batch_size] = batch_size
|
53
|
-
@params[:optimizer] = optimizer
|
54
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
55
|
-
@params[:random_seed] = random_seed
|
56
|
-
@params[:random_seed] ||= srand
|
57
|
-
@weight_vec = nil
|
58
|
-
@bias_term = nil
|
59
|
-
@rng = Random.new(@params[:random_seed])
|
49
|
+
super(reg_param: 0.0, fit_bias: fit_bias, bias_scale: bias_scale,
|
50
|
+
max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
|
60
51
|
end
|
61
52
|
|
62
53
|
# Fit the model with given training data.
|
@@ -75,9 +66,9 @@ module SVMKit
|
|
75
66
|
if n_outputs > 1
|
76
67
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
77
68
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
78
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] =
|
69
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
79
70
|
else
|
80
|
-
@weight_vec, @bias_term =
|
71
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
81
72
|
end
|
82
73
|
|
83
74
|
self
|
@@ -113,47 +104,9 @@ module SVMKit
|
|
113
104
|
|
114
105
|
private
|
115
106
|
|
116
|
-
def
|
117
|
-
# Expand feature vectors for bias term.
|
118
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
119
|
-
# Initialize some variables.
|
120
|
-
n_samples, n_features = samples.shape
|
121
|
-
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
122
|
-
weight_vec = Numo::DFloat.zeros(n_features)
|
123
|
-
optimizer = @params[:optimizer].dup
|
124
|
-
# Start optimization.
|
125
|
-
@params[:max_iter].times do |_t|
|
126
|
-
# Random sampling.
|
127
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
128
|
-
rand_ids.concat(subset_ids)
|
129
|
-
data = samples[subset_ids, true]
|
130
|
-
values = y[subset_ids]
|
131
|
-
# Calculate gradients for loss function.
|
132
|
-
loss_grad = loss_gradient(data, values, weight_vec)
|
133
|
-
next if loss_grad.ne(0.0).count.zero?
|
134
|
-
# Update weight.
|
135
|
-
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
136
|
-
end
|
137
|
-
split_weight_vec_bias(weight_vec)
|
138
|
-
end
|
139
|
-
|
140
|
-
def loss_gradient(x, y, weight)
|
107
|
+
def calc_loss_gradient(x, y, weight)
|
141
108
|
2.0 * (x.dot(weight) - y)
|
142
109
|
end
|
143
|
-
|
144
|
-
def weight_gradient(loss_grad, data, _weight)
|
145
|
-
(loss_grad.expand_dims(1) * data).mean(0)
|
146
|
-
end
|
147
|
-
|
148
|
-
def expand_feature(x)
|
149
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
150
|
-
end
|
151
|
-
|
152
|
-
def split_weight_vec_bias(weight_vec)
|
153
|
-
weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
|
154
|
-
bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
|
155
|
-
[weights, bias]
|
156
|
-
end
|
157
110
|
end
|
158
111
|
end
|
159
112
|
end
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'svmkit/validation'
|
4
|
-
require 'svmkit/
|
4
|
+
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
-
require 'svmkit/optimizer/nadam'
|
7
6
|
|
8
7
|
module SVMKit
|
9
8
|
module LinearModel
|
@@ -19,8 +18,7 @@ module SVMKit
|
|
19
18
|
#
|
20
19
|
# *Reference*
|
21
20
|
# - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
22
|
-
class LogisticRegression
|
23
|
-
include Base::BaseEstimator
|
21
|
+
class LogisticRegression < SGDLinearEstimator
|
24
22
|
include Base::Classifier
|
25
23
|
include Validation
|
26
24
|
|
@@ -58,20 +56,8 @@ module SVMKit
|
|
58
56
|
check_params_boolean(fit_bias: fit_bias)
|
59
57
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
60
58
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
61
|
-
|
62
|
-
@params[:reg_param] = reg_param
|
63
|
-
@params[:fit_bias] = fit_bias
|
64
|
-
@params[:bias_scale] = bias_scale
|
65
|
-
@params[:max_iter] = max_iter
|
66
|
-
@params[:batch_size] = batch_size
|
67
|
-
@params[:optimizer] = optimizer
|
68
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
69
|
-
@params[:random_seed] = random_seed
|
70
|
-
@params[:random_seed] ||= srand
|
71
|
-
@weight_vec = nil
|
72
|
-
@bias_term = nil
|
59
|
+
super
|
73
60
|
@classes = nil
|
74
|
-
@rng = Random.new(@params[:random_seed])
|
75
61
|
end
|
76
62
|
|
77
63
|
# Fit the model with given training data.
|
@@ -86,21 +72,19 @@ module SVMKit
|
|
86
72
|
|
87
73
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
88
74
|
n_classes = @classes.size
|
89
|
-
|
75
|
+
n_features = x.shape[1]
|
90
76
|
|
91
77
|
if n_classes > 2
|
92
78
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
93
79
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
94
80
|
n_classes.times do |n|
|
95
81
|
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
96
|
-
|
97
|
-
@weight_vec[n, true] = weight
|
98
|
-
@bias_term[n] = bias
|
82
|
+
@weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
99
83
|
end
|
100
84
|
else
|
101
85
|
negative_label = y.to_a.uniq.min
|
102
86
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
103
|
-
@weight_vec, @bias_term =
|
87
|
+
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
104
88
|
end
|
105
89
|
|
106
90
|
self
|
@@ -169,47 +153,8 @@ module SVMKit
|
|
169
153
|
|
170
154
|
private
|
171
155
|
|
172
|
-
def
|
173
|
-
|
174
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
175
|
-
# Initialize some variables.
|
176
|
-
n_samples, n_features = samples.shape
|
177
|
-
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
178
|
-
weight_vec = Numo::DFloat.zeros(n_features)
|
179
|
-
optimizer = @params[:optimizer].dup
|
180
|
-
# Start optimization.
|
181
|
-
@params[:max_iter].times do |_t|
|
182
|
-
# random sampling
|
183
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
184
|
-
rand_ids.concat(subset_ids)
|
185
|
-
data = samples[subset_ids, true]
|
186
|
-
labels = y[subset_ids]
|
187
|
-
# calculate gradient for loss function.
|
188
|
-
loss_grad = loss_gradient(data, labels, weight_vec)
|
189
|
-
# update weight.
|
190
|
-
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
191
|
-
end
|
192
|
-
split_weight_vec_bias(weight_vec)
|
193
|
-
end
|
194
|
-
|
195
|
-
def loss_gradient(x, y, weight)
|
196
|
-
z = x.dot(weight)
|
197
|
-
grad = y / (Numo::NMath.exp(-y * z) + 1.0) - y
|
198
|
-
grad
|
199
|
-
end
|
200
|
-
|
201
|
-
def weight_gradient(loss_grad, x, weight)
|
202
|
-
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
203
|
-
end
|
204
|
-
|
205
|
-
def expand_feature(x)
|
206
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
207
|
-
end
|
208
|
-
|
209
|
-
def split_weight_vec_bias(weight_vec)
|
210
|
-
weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
|
211
|
-
bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
|
212
|
-
[weights, bias]
|
156
|
+
def calc_loss_gradient(x, y, weight)
|
157
|
+
y / (Numo::NMath.exp(-y * x.dot(weight)) + 1.0) - y
|
213
158
|
end
|
214
159
|
end
|
215
160
|
end
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'svmkit/validation'
|
4
|
-
require 'svmkit/
|
4
|
+
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
-
require 'svmkit/optimizer/nadam'
|
7
6
|
|
8
7
|
module SVMKit
|
9
8
|
module LinearModel
|
@@ -16,8 +15,7 @@ module SVMKit
|
|
16
15
|
# estimator.fit(training_samples, traininig_values)
|
17
16
|
# results = estimator.predict(testing_samples)
|
18
17
|
#
|
19
|
-
class Ridge
|
20
|
-
include Base::BaseEstimator
|
18
|
+
class Ridge < SGDLinearEstimator
|
21
19
|
include Base::Regressor
|
22
20
|
include Validation
|
23
21
|
|
@@ -37,29 +35,19 @@ module SVMKit
|
|
37
35
|
#
|
38
36
|
# @param reg_param [Float] The regularization parameter.
|
39
37
|
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
38
|
+
# @param bias_scale [Float] The scale of the bias term.
|
40
39
|
# @param max_iter [Integer] The maximum number of iterations.
|
41
40
|
# @param batch_size [Integer] The size of the mini batches.
|
42
41
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
43
42
|
# If nil is given, Nadam is used.
|
44
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
45
|
-
def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
46
|
-
check_params_float(reg_param: reg_param)
|
44
|
+
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
45
|
+
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
47
46
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
48
47
|
check_params_boolean(fit_bias: fit_bias)
|
49
48
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
49
|
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
51
|
-
|
52
|
-
@params[:reg_param] = reg_param
|
53
|
-
@params[:fit_bias] = fit_bias
|
54
|
-
@params[:max_iter] = max_iter
|
55
|
-
@params[:batch_size] = batch_size
|
56
|
-
@params[:optimizer] = optimizer
|
57
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
58
|
-
@params[:random_seed] = random_seed
|
59
|
-
@params[:random_seed] ||= srand
|
60
|
-
@weight_vec = nil
|
61
|
-
@bias_term = nil
|
62
|
-
@rng = Random.new(@params[:random_seed])
|
50
|
+
super
|
63
51
|
end
|
64
52
|
|
65
53
|
# Fit the model with given training data.
|
@@ -73,14 +61,14 @@ module SVMKit
|
|
73
61
|
check_sample_tvalue_size(x, y)
|
74
62
|
|
75
63
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
76
|
-
|
64
|
+
n_features = x.shape[1]
|
77
65
|
|
78
66
|
if n_outputs > 1
|
79
67
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
80
68
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
81
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] =
|
69
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
82
70
|
else
|
83
|
-
@weight_vec, @bias_term =
|
71
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
84
72
|
end
|
85
73
|
|
86
74
|
self
|
@@ -116,47 +104,9 @@ module SVMKit
|
|
116
104
|
|
117
105
|
private
|
118
106
|
|
119
|
-
def
|
120
|
-
# Expand feature vectors for bias term.
|
121
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
122
|
-
# Initialize some variables.
|
123
|
-
n_samples, n_features = samples.shape
|
124
|
-
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
125
|
-
weight_vec = Numo::DFloat.zeros(n_features)
|
126
|
-
optimizer = @params[:optimizer].dup
|
127
|
-
# Start optimization.
|
128
|
-
@params[:max_iter].times do |_t|
|
129
|
-
# Random sampling.
|
130
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
131
|
-
rand_ids.concat(subset_ids)
|
132
|
-
data = samples[subset_ids, true]
|
133
|
-
values = y[subset_ids]
|
134
|
-
# Calculate gradients for loss function.
|
135
|
-
loss_grad = loss_gradient(data, values, weight_vec)
|
136
|
-
next if loss_grad.ne(0.0).count.zero?
|
137
|
-
# Update weight.
|
138
|
-
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
139
|
-
end
|
140
|
-
split_weight_vec_bias(weight_vec)
|
141
|
-
end
|
142
|
-
|
143
|
-
def loss_gradient(x, y, weight)
|
107
|
+
def calc_loss_gradient(x, y, weight)
|
144
108
|
2.0 * (x.dot(weight) - y)
|
145
109
|
end
|
146
|
-
|
147
|
-
def weight_gradient(loss_grad, data, weight)
|
148
|
-
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param] * weight
|
149
|
-
end
|
150
|
-
|
151
|
-
def expand_feature(x)
|
152
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
153
|
-
end
|
154
|
-
|
155
|
-
def split_weight_vec_bias(weight_vec)
|
156
|
-
weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
|
157
|
-
bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
|
158
|
-
[weights, bias]
|
159
|
-
end
|
160
110
|
end
|
161
111
|
end
|
162
112
|
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/base/base_estimator'
|
4
|
+
require 'svmkit/optimizer/nadam'
|
5
|
+
|
6
|
+
module SVMKit
|
7
|
+
module LinearModel
|
8
|
+
# SGDLinearEstimator is an abstract class for implementation of linear estimator
|
9
|
+
# with mini-batch stochastic gradient descent optimization.
|
10
|
+
# This class is used for internal process.
|
11
|
+
class SGDLinearEstimator
|
12
|
+
include Base::BaseEstimator
|
13
|
+
|
14
|
+
# Initialize a linear estimator.
|
15
|
+
#
|
16
|
+
# @param reg_param [Float] The regularization parameter.
|
17
|
+
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
18
|
+
# @param bias_scale [Float] The scale of the bias term.
|
19
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
20
|
+
# @param batch_size [Integer] The size of the mini batches.
|
21
|
+
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
22
|
+
# If nil is given, Nadam is used.
|
23
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
24
|
+
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
25
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
26
|
+
@params = {}
|
27
|
+
@params[:reg_param] = reg_param
|
28
|
+
@params[:fit_bias] = fit_bias
|
29
|
+
@params[:bias_scale] = bias_scale
|
30
|
+
@params[:max_iter] = max_iter
|
31
|
+
@params[:batch_size] = batch_size
|
32
|
+
@params[:optimizer] = optimizer
|
33
|
+
@params[:optimizer] ||= Optimizer::Nadam.new
|
34
|
+
@params[:random_seed] = random_seed
|
35
|
+
@params[:random_seed] ||= srand
|
36
|
+
@weight_vec = nil
|
37
|
+
@bias_term = nil
|
38
|
+
@rng = Random.new(@params[:random_seed])
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def partial_fit(x, y)
|
44
|
+
# Expand feature vectors for bias term.
|
45
|
+
samples = @params[:fit_bias] ? expand_feature(x) : x
|
46
|
+
# Initialize some variables.
|
47
|
+
n_samples, n_features = samples.shape
|
48
|
+
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
49
|
+
weight = Numo::DFloat.zeros(n_features)
|
50
|
+
optimizer = @params[:optimizer].dup
|
51
|
+
# Optimization.
|
52
|
+
@params[:max_iter].times do |_t|
|
53
|
+
# Random sampling
|
54
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
55
|
+
rand_ids.concat(subset_ids)
|
56
|
+
sub_samples = samples[subset_ids, true]
|
57
|
+
sub_targets = y[subset_ids]
|
58
|
+
# Update weight.
|
59
|
+
loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
|
60
|
+
next if loss_gradient.ne(0.0).count.zero?
|
61
|
+
weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
|
62
|
+
end
|
63
|
+
split_weight(weight)
|
64
|
+
end
|
65
|
+
|
66
|
+
def calc_loss_gradient(_x, _y, _weight)
|
67
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
68
|
+
end
|
69
|
+
|
70
|
+
def calc_new_weight(optimizer, x, weight, loss_gradient)
|
71
|
+
weight_gradient = x.transpose.dot(loss_gradient) / @params[:batch_size] + @params[:reg_param] * weight
|
72
|
+
optimizer.call(weight, weight_gradient)
|
73
|
+
end
|
74
|
+
|
75
|
+
def expand_feature(x)
|
76
|
+
n_samples = x.shape[0]
|
77
|
+
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
78
|
+
end
|
79
|
+
|
80
|
+
def split_weight(weight)
|
81
|
+
if @params[:fit_bias]
|
82
|
+
[weight[0...-1], weight[-1]]
|
83
|
+
else
|
84
|
+
[weight, 0.0]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'svmkit/validation'
|
4
|
-
require 'svmkit/
|
4
|
+
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
-
require 'svmkit/optimizer/nadam'
|
7
6
|
require 'svmkit/probabilistic_output'
|
8
7
|
|
9
8
|
module SVMKit
|
@@ -21,8 +20,7 @@ module SVMKit
|
|
21
20
|
#
|
22
21
|
# *Reference*
|
23
22
|
# - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
24
|
-
class SVC
|
25
|
-
include Base::BaseEstimator
|
23
|
+
class SVC < SGDLinearEstimator
|
26
24
|
include Base::Classifier
|
27
25
|
include Validation
|
28
26
|
|
@@ -60,22 +58,11 @@ module SVMKit
|
|
60
58
|
check_params_boolean(fit_bias: fit_bias, probability: probability)
|
61
59
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
62
60
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
63
|
-
|
64
|
-
|
65
|
-
@params[:fit_bias] = fit_bias
|
66
|
-
@params[:bias_scale] = bias_scale
|
67
|
-
@params[:max_iter] = max_iter
|
68
|
-
@params[:batch_size] = batch_size
|
61
|
+
super(reg_param: reg_param, fit_bias: fit_bias, bias_scale: bias_scale,
|
62
|
+
max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
|
69
63
|
@params[:probability] = probability
|
70
|
-
@params[:optimizer] = optimizer
|
71
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
72
|
-
@params[:random_seed] = random_seed
|
73
|
-
@params[:random_seed] ||= srand
|
74
|
-
@weight_vec = nil
|
75
|
-
@bias_term = nil
|
76
64
|
@prob_param = nil
|
77
65
|
@classes = nil
|
78
|
-
@rng = Random.new(@params[:random_seed])
|
79
66
|
end
|
80
67
|
|
81
68
|
# Fit the model with given training data.
|
@@ -90,7 +77,7 @@ module SVMKit
|
|
90
77
|
|
91
78
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
92
79
|
n_classes = @classes.size
|
93
|
-
|
80
|
+
n_features = x.shape[1]
|
94
81
|
|
95
82
|
if n_classes > 2
|
96
83
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
@@ -98,11 +85,9 @@ module SVMKit
|
|
98
85
|
@prob_param = Numo::DFloat.zeros(n_classes, 2)
|
99
86
|
n_classes.times do |n|
|
100
87
|
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
101
|
-
|
102
|
-
@weight_vec[n, true] = weight
|
103
|
-
@bias_term[n] = bias
|
88
|
+
@weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
104
89
|
@prob_param[n, true] = if @params[:probability]
|
105
|
-
SVMKit::ProbabilisticOutput.fit_sigmoid(x.dot(
|
90
|
+
SVMKit::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec[n, true].transpose) + @bias_term[n], bin_y)
|
106
91
|
else
|
107
92
|
Numo::DFloat[1, 0]
|
108
93
|
end
|
@@ -110,7 +95,7 @@ module SVMKit
|
|
110
95
|
else
|
111
96
|
negative_label = y.to_a.uniq.min
|
112
97
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
113
|
-
@weight_vec, @bias_term =
|
98
|
+
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
114
99
|
@prob_param = if @params[:probability]
|
115
100
|
SVMKit::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
|
116
101
|
else
|
@@ -188,50 +173,12 @@ module SVMKit
|
|
188
173
|
|
189
174
|
private
|
190
175
|
|
191
|
-
def
|
192
|
-
# Expand feature vectors for bias term.
|
193
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
194
|
-
# Initialize some variables.
|
195
|
-
n_samples, n_features = samples.shape
|
196
|
-
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
197
|
-
weight_vec = Numo::DFloat.zeros(n_features)
|
198
|
-
optimizer = @params[:optimizer].dup
|
199
|
-
# Start optimization.
|
200
|
-
@params[:max_iter].times do |_t|
|
201
|
-
# random sampling.
|
202
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
203
|
-
rand_ids.concat(subset_ids)
|
204
|
-
data = samples[subset_ids, true]
|
205
|
-
labels = y[subset_ids]
|
206
|
-
# calculate gradient for loss function.
|
207
|
-
loss_grad = loss_gradient(data, labels, weight_vec)
|
208
|
-
next if loss_grad.ne(0.0).count.zero?
|
209
|
-
# update weight.
|
210
|
-
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
211
|
-
end
|
212
|
-
split_weight_vec_bias(weight_vec)
|
213
|
-
end
|
214
|
-
|
215
|
-
def loss_gradient(x, y, weight)
|
176
|
+
def calc_loss_gradient(x, y, weight)
|
216
177
|
target_ids = (x.dot(weight) * y).lt(1.0).where
|
217
178
|
grad = Numo::DFloat.zeros(@params[:batch_size])
|
218
179
|
grad[target_ids] = -y[target_ids]
|
219
180
|
grad
|
220
181
|
end
|
221
|
-
|
222
|
-
def weight_gradient(loss_grad, x, weight)
|
223
|
-
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
224
|
-
end
|
225
|
-
|
226
|
-
def expand_feature(x)
|
227
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
228
|
-
end
|
229
|
-
|
230
|
-
def split_weight_vec_bias(weight_vec)
|
231
|
-
weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
|
232
|
-
bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
|
233
|
-
[weights, bias]
|
234
|
-
end
|
235
182
|
end
|
236
183
|
end
|
237
184
|
end
|
@@ -1,9 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'svmkit/validation'
|
4
|
-
require 'svmkit/
|
4
|
+
require 'svmkit/linear_model/sgd_linear_estimator'
|
5
5
|
require 'svmkit/base/regressor'
|
6
|
-
require 'svmkit/optimizer/nadam'
|
7
6
|
|
8
7
|
module SVMKit
|
9
8
|
module LinearModel
|
@@ -18,8 +17,7 @@ module SVMKit
|
|
18
17
|
#
|
19
18
|
# *Reference*
|
20
19
|
# 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
21
|
-
class SVR
|
22
|
-
include Base::BaseEstimator
|
20
|
+
class SVR < SGDLinearEstimator
|
23
21
|
include Base::Regressor
|
24
22
|
include Validation
|
25
23
|
|
@@ -54,20 +52,9 @@ module SVMKit
|
|
54
52
|
check_params_type_or_nil(Integer, random_seed: random_seed)
|
55
53
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
56
54
|
max_iter: max_iter, batch_size: batch_size)
|
57
|
-
|
58
|
-
|
59
|
-
@params[:fit_bias] = fit_bias
|
60
|
-
@params[:bias_scale] = bias_scale
|
55
|
+
super(reg_param: reg_param, fit_bias: fit_bias, bias_scale: bias_scale,
|
56
|
+
max_iter: max_iter, batch_size: batch_size, optimizer: optimizer, random_seed: random_seed)
|
61
57
|
@params[:epsilon] = epsilon
|
62
|
-
@params[:max_iter] = max_iter
|
63
|
-
@params[:batch_size] = batch_size
|
64
|
-
@params[:optimizer] = optimizer
|
65
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
66
|
-
@params[:random_seed] = random_seed
|
67
|
-
@params[:random_seed] ||= srand
|
68
|
-
@weight_vec = nil
|
69
|
-
@bias_term = nil
|
70
|
-
@rng = Random.new(@params[:random_seed])
|
71
58
|
end
|
72
59
|
|
73
60
|
# Fit the model with given training data.
|
@@ -81,14 +68,14 @@ module SVMKit
|
|
81
68
|
check_sample_tvalue_size(x, y)
|
82
69
|
|
83
70
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
84
|
-
|
71
|
+
n_features = x.shape[1]
|
85
72
|
|
86
73
|
if n_outputs > 1
|
87
74
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
88
75
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
89
|
-
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] =
|
76
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
90
77
|
else
|
91
|
-
@weight_vec, @bias_term =
|
78
|
+
@weight_vec, @bias_term = partial_fit(x, y)
|
92
79
|
end
|
93
80
|
|
94
81
|
self
|
@@ -124,49 +111,13 @@ module SVMKit
|
|
124
111
|
|
125
112
|
private
|
126
113
|
|
127
|
-
def
|
128
|
-
# Expand feature vectors for bias term.
|
129
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
130
|
-
# Initialize some variables.
|
131
|
-
n_samples, n_features = samples.shape
|
132
|
-
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
133
|
-
weight_vec = Numo::DFloat.zeros(n_features)
|
134
|
-
optimizer = @params[:optimizer].dup
|
135
|
-
# Start optimization.
|
136
|
-
@params[:max_iter].times do |_t|
|
137
|
-
# random sampling
|
138
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
139
|
-
rand_ids.concat(subset_ids)
|
140
|
-
data = samples[subset_ids, true]
|
141
|
-
values = y[subset_ids]
|
142
|
-
# update the weight vector.
|
143
|
-
loss_grad = loss_gradient(data, values, weight_vec)
|
144
|
-
weight_vec = optimizer.call(weight_vec, weight_gradient(loss_grad, data, weight_vec))
|
145
|
-
end
|
146
|
-
split_weight_vec_bias(weight_vec)
|
147
|
-
end
|
148
|
-
|
149
|
-
def loss_gradient(x, y, weight)
|
114
|
+
def calc_loss_gradient(x, y, weight)
|
150
115
|
z = x.dot(weight)
|
151
116
|
grad = Numo::DFloat.zeros(@params[:batch_size])
|
152
117
|
grad[(z - y).gt(@params[:epsilon]).where] = 1
|
153
118
|
grad[(y - z).gt(@params[:epsilon]).where] = -1
|
154
119
|
grad
|
155
120
|
end
|
156
|
-
|
157
|
-
def weight_gradient(loss_grad, x, weight)
|
158
|
-
x.transpose.dot(loss_grad) / @params[:batch_size] + @params[:reg_param] * weight
|
159
|
-
end
|
160
|
-
|
161
|
-
def expand_feature(x)
|
162
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
|
163
|
-
end
|
164
|
-
|
165
|
-
def split_weight_vec_bias(weight_vec)
|
166
|
-
weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
|
167
|
-
bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
|
168
|
-
[weights, bias]
|
169
|
-
end
|
170
121
|
end
|
171
122
|
end
|
172
123
|
end
|
@@ -213,7 +213,7 @@ module SVMKit
|
|
213
213
|
end
|
214
214
|
|
215
215
|
def put_leaf(node, y)
|
216
|
-
node.probs = Numo::DFloat
|
216
|
+
node.probs = Numo::DFloat.cast(@classes.map { |c| y.eq(c).count_true }) / node.n_samples
|
217
217
|
node.leaf = true
|
218
218
|
node.leaf_id = @n_leaves
|
219
219
|
@n_leaves += 1
|
@@ -234,18 +234,18 @@ module SVMKit
|
|
234
234
|
end
|
235
235
|
|
236
236
|
def splited_ids(features, threshold)
|
237
|
-
[features.le(threshold).where
|
237
|
+
[features.le(threshold).where, features.gt(threshold).where]
|
238
238
|
end
|
239
239
|
|
240
240
|
def gain(labels, labels_left, labels_right)
|
241
|
-
prob_left = labels_left.size
|
242
|
-
prob_right = labels_right.size
|
241
|
+
prob_left = labels_left.size.fdiv(labels.size)
|
242
|
+
prob_right = labels_right.size.fdiv(labels.size)
|
243
243
|
impurity(labels) - prob_left * impurity(labels_left) - prob_right * impurity(labels_right)
|
244
244
|
end
|
245
245
|
|
246
246
|
def impurity(labels)
|
247
|
-
|
248
|
-
send(@criterion,
|
247
|
+
cls = labels.to_a.uniq.sort
|
248
|
+
cls.size == 1 ? 0.0 : send(@criterion, Numo::DFloat[*(cls.map { |c| labels.eq(c).count_true.fdiv(labels.size) })])
|
249
249
|
end
|
250
250
|
|
251
251
|
def gini(posterior_probs)
|
@@ -208,7 +208,7 @@ module SVMKit
|
|
208
208
|
end
|
209
209
|
|
210
210
|
def splited_ids(features, threshold)
|
211
|
-
[features.le(threshold).where
|
211
|
+
[features.le(threshold).where, features.gt(threshold).where]
|
212
212
|
end
|
213
213
|
|
214
214
|
def gain(values, values_left, values_right)
|
data/lib/svmkit/version.rb
CHANGED
data/lib/svmkit.rb
CHANGED
@@ -19,6 +19,7 @@ require 'svmkit/optimizer/rmsprop'
|
|
19
19
|
require 'svmkit/optimizer/nadam'
|
20
20
|
require 'svmkit/optimizer/yellow_fin'
|
21
21
|
require 'svmkit/kernel_approximation/rbf'
|
22
|
+
require 'svmkit/linear_model/sgd_linear_estimator'
|
22
23
|
require 'svmkit/linear_model/svc'
|
23
24
|
require 'svmkit/linear_model/svr'
|
24
25
|
require 'svmkit/linear_model/logistic_regression'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- lib/svmkit/linear_model/linear_regression.rb
|
141
141
|
- lib/svmkit/linear_model/logistic_regression.rb
|
142
142
|
- lib/svmkit/linear_model/ridge.rb
|
143
|
+
- lib/svmkit/linear_model/sgd_linear_estimator.rb
|
143
144
|
- lib/svmkit/linear_model/svc.rb
|
144
145
|
- lib/svmkit/linear_model/svr.rb
|
145
146
|
- lib/svmkit/model_selection/cross_validation.rb
|