svmkit 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -0
- data/HISTORY.md +3 -0
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +13 -13
- data/lib/svmkit/kernel_machine/kernel_svc.rb +10 -10
- data/lib/svmkit/linear_model/logistic_regression.rb +20 -30
- data/lib/svmkit/linear_model/svc.rb +19 -29
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +5 -5
- data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +113 -0
- data/lib/svmkit/preprocessing/l2_normalizer.rb +2 -2
- data/lib/svmkit/preprocessing/min_max_scaler.rb +2 -2
- data/lib/svmkit/preprocessing/standard_scaler.rb +2 -2
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +3 -4
- metadata +6 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3cb57cc46849d792fff7b6c6500b9498d56fc71
|
4
|
+
data.tar.gz: 65c909ee0efaafc42df12dd24aa6c62d5b816d6a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25b52e63512393706f3f53ddf415a2e4ac07923f3d1bd909cca0ade9de66d5bbb63d32a932bce32f2fa2b6c4430bab73f483d94d620eb355540a91905320644a
|
7
|
+
data.tar.gz: a3b983cf6d75168cb6eda70ec5da113feb2bc52c7fc501af3a9328f569c6d793c19e4744a4a80f0c3c60b0ea4e5387db21451f2a004f8d4699605b8348c81bab
|
data/.rubocop.yml
CHANGED
@@ -4,6 +4,9 @@
|
|
4
4
|
Documentation:
|
5
5
|
Enabled: false
|
6
6
|
|
7
|
+
Metrics/AbcSize:
|
8
|
+
Max: 30
|
9
|
+
|
7
10
|
Metrics/LineLength:
|
8
11
|
Max: 120
|
9
12
|
|
@@ -13,5 +16,15 @@ Metrics/ModuleLength:
|
|
13
16
|
Metrics/ClassLength:
|
14
17
|
Max: 200
|
15
18
|
|
19
|
+
Metrics/MethodLength:
|
20
|
+
Max: 40
|
21
|
+
|
22
|
+
Metrics/BlockLength:
|
23
|
+
Exclude:
|
24
|
+
- 'spec/**/*'
|
25
|
+
|
26
|
+
ParameterLists:
|
27
|
+
Max: 10
|
28
|
+
|
16
29
|
Security/MarshalLoad:
|
17
30
|
Enabled: false
|
data/HISTORY.md
CHANGED
data/lib/svmkit.rb
CHANGED
@@ -12,6 +12,7 @@ require 'svmkit/linear_model/svc'
|
|
12
12
|
require 'svmkit/linear_model/logistic_regression'
|
13
13
|
require 'svmkit/kernel_machine/kernel_svc'
|
14
14
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
15
|
+
require 'svmkit/nearest_neighbors/k_neighbors_classifier'
|
15
16
|
require 'svmkit/preprocessing/l2_normalizer'
|
16
17
|
require 'svmkit/preprocessing/min_max_scaler'
|
17
18
|
require 'svmkit/preprocessing/standard_scaler'
|
@@ -35,12 +35,12 @@ module SVMKit
|
|
35
35
|
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
36
36
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
37
37
|
def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
@rng = Random.new(
|
38
|
+
@params = {}
|
39
|
+
@params[:gamma] = gamma
|
40
|
+
@params[:n_components] = n_components
|
41
|
+
@params[:random_seed] = random_seed
|
42
|
+
@params[:random_seed] ||= srand
|
43
|
+
@rng = Random.new(@params[:random_seed])
|
44
44
|
@random_mat = nil
|
45
45
|
@random_vec = nil
|
46
46
|
end
|
@@ -54,10 +54,10 @@ module SVMKit
|
|
54
54
|
# @return [RBF] The learned transformer itself.
|
55
55
|
def fit(x, _y = nil)
|
56
56
|
n_features = x.shape[1]
|
57
|
-
params[:n_components] = 2 * n_features if params[:n_components] <= 0
|
58
|
-
@random_mat = rand_normal([n_features, params[:n_components]]) * (2.0 * params[:gamma])**0.5
|
59
|
-
n_half_components = params[:n_components] / 2
|
60
|
-
@random_vec = Numo::DFloat.zeros(params[:n_components] - n_half_components).concatenate(
|
57
|
+
@params[:n_components] = 2 * n_features if @params[:n_components] <= 0
|
58
|
+
@random_mat = rand_normal([n_features, @params[:n_components]]) * (2.0 * @params[:gamma])**0.5
|
59
|
+
n_half_components = @params[:n_components] / 2
|
60
|
+
@random_vec = Numo::DFloat.zeros(@params[:n_components] - n_half_components).concatenate(
|
61
61
|
Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
|
62
62
|
)
|
63
63
|
self
|
@@ -82,13 +82,13 @@ module SVMKit
|
|
82
82
|
def transform(x)
|
83
83
|
n_samples, = x.shape
|
84
84
|
projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
|
85
|
-
Numo::NMath.sin(projection) * ((2.0 / params[:n_components])**0.5)
|
85
|
+
Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
|
86
86
|
end
|
87
87
|
|
88
88
|
# Dump marshal data.
|
89
89
|
# @return [Hash] The marshal data about RBF.
|
90
90
|
def marshal_dump
|
91
|
-
{ params: params,
|
91
|
+
{ params: @params,
|
92
92
|
random_mat: @random_mat,
|
93
93
|
random_vec: @random_vec,
|
94
94
|
rng: @rng }
|
@@ -97,7 +97,7 @@ module SVMKit
|
|
97
97
|
# Load marshal data.
|
98
98
|
# @return [nil]
|
99
99
|
def marshal_load(obj)
|
100
|
-
|
100
|
+
@params = obj[:params]
|
101
101
|
@random_mat = obj[:random_mat]
|
102
102
|
@random_vec = obj[:random_vec]
|
103
103
|
@rng = obj[:rng]
|
@@ -34,13 +34,13 @@ module SVMKit
|
|
34
34
|
# @param max_iter [Integer] The maximum number of iterations.
|
35
35
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
36
36
|
def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
@params = {}
|
38
|
+
@params[:reg_param] = reg_param
|
39
|
+
@params[:max_iter] = max_iter
|
40
|
+
@params[:random_seed] = random_seed
|
41
|
+
@params[:random_seed] ||= srand
|
42
42
|
@weight_vec = nil
|
43
|
-
@rng = Random.new(
|
43
|
+
@rng = Random.new(@params[:random_seed])
|
44
44
|
end
|
45
45
|
|
46
46
|
# Fit the model with given training data.
|
@@ -58,13 +58,13 @@ module SVMKit
|
|
58
58
|
rand_ids = []
|
59
59
|
weight_vec = Numo::DFloat.zeros(n_training_samples)
|
60
60
|
# Start optimization.
|
61
|
-
params[:max_iter].times do |t|
|
61
|
+
@params[:max_iter].times do |t|
|
62
62
|
# random sampling
|
63
63
|
rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
|
64
64
|
target_id = rand_ids.shift
|
65
65
|
# update the weight vector
|
66
66
|
func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
|
67
|
-
func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
|
67
|
+
func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
|
68
68
|
weight_vec[target_id] += 1.0 if func < 1.0
|
69
69
|
end
|
70
70
|
# Store the learned model.
|
@@ -105,13 +105,13 @@ module SVMKit
|
|
105
105
|
# Dump marshal data.
|
106
106
|
# @return [Hash] The marshal data about KernelSVC.
|
107
107
|
def marshal_dump
|
108
|
-
{ params: params, weight_vec: @weight_vec, rng: @rng }
|
108
|
+
{ params: @params, weight_vec: @weight_vec, rng: @rng }
|
109
109
|
end
|
110
110
|
|
111
111
|
# Load marshal data.
|
112
112
|
# @return [nil]
|
113
113
|
def marshal_load(obj)
|
114
|
-
|
114
|
+
@params = obj[:params]
|
115
115
|
@weight_vec = obj[:weight_vec]
|
116
116
|
@rng = obj[:rng]
|
117
117
|
nil
|
@@ -20,16 +20,6 @@ module SVMKit
|
|
20
20
|
include Base::BaseEstimator
|
21
21
|
include Base::Classifier
|
22
22
|
|
23
|
-
# @!visibility private
|
24
|
-
DEFAULT_PARAMS = {
|
25
|
-
reg_param: 1.0,
|
26
|
-
fit_bias: false,
|
27
|
-
bias_scale: 1.0,
|
28
|
-
max_iter: 100,
|
29
|
-
batch_size: 50,
|
30
|
-
random_seed: nil
|
31
|
-
}.freeze
|
32
|
-
|
33
23
|
# Return the weight vector for Logistic Regression.
|
34
24
|
# @return [Numo::DFloat] (shape: [n_features])
|
35
25
|
attr_reader :weight_vec
|
@@ -52,17 +42,17 @@ module SVMKit
|
|
52
42
|
# @param batch_size [Integer] The size of the mini batches.
|
53
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
54
44
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
45
|
+
@params = {}
|
46
|
+
@params[:reg_param] = reg_param
|
47
|
+
@params[:fit_bias] = fit_bias
|
48
|
+
@params[:bias_scale] = bias_scale
|
49
|
+
@params[:max_iter] = max_iter
|
50
|
+
@params[:batch_size] = batch_size
|
51
|
+
@params[:random_seed] = random_seed
|
52
|
+
@params[:random_seed] ||= srand
|
63
53
|
@weight_vec = nil
|
64
54
|
@bias_term = 0.0
|
65
|
-
@rng = Random.new(
|
55
|
+
@rng = Random.new(@params[:random_seed])
|
66
56
|
end
|
67
57
|
|
68
58
|
# Fit the model with given training data.
|
@@ -77,9 +67,9 @@ module SVMKit
|
|
77
67
|
bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
|
78
68
|
# Expand feature vectors for bias term.
|
79
69
|
samples = x
|
80
|
-
if params[:fit_bias]
|
70
|
+
if @params[:fit_bias]
|
81
71
|
samples = Numo::NArray.hstack(
|
82
|
-
[samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
|
72
|
+
[samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
|
83
73
|
)
|
84
74
|
end
|
85
75
|
# Initialize some variables.
|
@@ -87,27 +77,27 @@ module SVMKit
|
|
87
77
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
88
78
|
weight_vec = Numo::DFloat.zeros(n_features)
|
89
79
|
# Start optimization.
|
90
|
-
params[:max_iter].times do |t|
|
80
|
+
@params[:max_iter].times do |t|
|
91
81
|
# random sampling
|
92
|
-
subset_ids = rand_ids.shift(params[:batch_size])
|
82
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
93
83
|
rand_ids.concat(subset_ids)
|
94
84
|
# update the weight vector.
|
95
|
-
eta = 1.0 / (params[:reg_param] * (t + 1))
|
85
|
+
eta = 1.0 / (@params[:reg_param] * (t + 1))
|
96
86
|
mean_vec = Numo::DFloat.zeros(n_features)
|
97
87
|
subset_ids.each do |n|
|
98
88
|
z = weight_vec.dot(samples[n, true])
|
99
89
|
coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
|
100
90
|
mean_vec += samples[n, true] * coef
|
101
91
|
end
|
102
|
-
mean_vec *= eta / params[:batch_size]
|
103
|
-
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
92
|
+
mean_vec *= eta / @params[:batch_size]
|
93
|
+
weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
|
104
94
|
# scale the weight vector.
|
105
95
|
norm = Math.sqrt(weight_vec.dot(weight_vec))
|
106
|
-
scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
|
96
|
+
scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
|
107
97
|
weight_vec *= [1.0, scaler].min
|
108
98
|
end
|
109
99
|
# Store the learned model.
|
110
|
-
if params[:fit_bias]
|
100
|
+
if @params[:fit_bias]
|
111
101
|
@weight_vec = weight_vec[0...n_features - 1]
|
112
102
|
@bias_term = weight_vec[n_features - 1]
|
113
103
|
else
|
@@ -156,13 +146,13 @@ module SVMKit
|
|
156
146
|
# Dump marshal data.
|
157
147
|
# @return [Hash] The marshal data about LogisticRegression.
|
158
148
|
def marshal_dump
|
159
|
-
{ params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
149
|
+
{ params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
160
150
|
end
|
161
151
|
|
162
152
|
# Load marshal data.
|
163
153
|
# @return [nil]
|
164
154
|
def marshal_load(obj)
|
165
|
-
|
155
|
+
@params = obj[:params]
|
166
156
|
@weight_vec = obj[:weight_vec]
|
167
157
|
@bias_term = obj[:bias_term]
|
168
158
|
@rng = obj[:rng]
|
@@ -18,16 +18,6 @@ module SVMKit
|
|
18
18
|
include Base::BaseEstimator
|
19
19
|
include Base::Classifier
|
20
20
|
|
21
|
-
# @!visibility private
|
22
|
-
DEFAULT_PARAMS = {
|
23
|
-
reg_param: 1.0,
|
24
|
-
fit_bias: false,
|
25
|
-
bias_scale: 1.0,
|
26
|
-
max_iter: 100,
|
27
|
-
batch_size: 50,
|
28
|
-
random_seed: nil
|
29
|
-
}.freeze
|
30
|
-
|
31
21
|
# Return the weight vector for SVC.
|
32
22
|
# @return [Numo::DFloat] (shape: [n_features])
|
33
23
|
attr_reader :weight_vec
|
@@ -49,17 +39,17 @@ module SVMKit
|
|
49
39
|
# @param batch_size [Integer] The size of the mini batches.
|
50
40
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
41
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
42
|
+
@params = {}
|
43
|
+
@params[:reg_param] = reg_param
|
44
|
+
@params[:fit_bias] = fit_bias
|
45
|
+
@params[:bias_scale] = bias_scale
|
46
|
+
@params[:max_iter] = max_iter
|
47
|
+
@params[:batch_size] = batch_size
|
48
|
+
@params[:random_seed] = random_seed
|
49
|
+
@params[:random_seed] ||= srand
|
60
50
|
@weight_vec = nil
|
61
51
|
@bias_term = 0.0
|
62
|
-
@rng = Random.new(
|
52
|
+
@rng = Random.new(@params[:random_seed])
|
63
53
|
end
|
64
54
|
|
65
55
|
# Fit the model with given training data.
|
@@ -73,9 +63,9 @@ module SVMKit
|
|
73
63
|
bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
|
74
64
|
# Expand feature vectors for bias term.
|
75
65
|
samples = x
|
76
|
-
if params[:fit_bias]
|
66
|
+
if @params[:fit_bias]
|
77
67
|
samples = Numo::NArray.hstack(
|
78
|
-
[samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
|
68
|
+
[samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
|
79
69
|
)
|
80
70
|
end
|
81
71
|
# Initialize some variables.
|
@@ -83,26 +73,26 @@ module SVMKit
|
|
83
73
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
84
74
|
weight_vec = Numo::DFloat.zeros(n_features)
|
85
75
|
# Start optimization.
|
86
|
-
params[:max_iter].times do |t|
|
76
|
+
@params[:max_iter].times do |t|
|
87
77
|
# random sampling
|
88
|
-
subset_ids = rand_ids.shift(params[:batch_size])
|
78
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
89
79
|
rand_ids.concat(subset_ids)
|
90
80
|
target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
|
91
81
|
n_subsamples = target_ids.size
|
92
82
|
next if n_subsamples.zero?
|
93
83
|
# update the weight vector.
|
94
|
-
eta = 1.0 / (params[:reg_param] * (t + 1))
|
84
|
+
eta = 1.0 / (@params[:reg_param] * (t + 1))
|
95
85
|
mean_vec = Numo::DFloat.zeros(n_features)
|
96
86
|
target_ids.each { |n| mean_vec += samples[n, true] * bin_y[n] }
|
97
87
|
mean_vec *= eta / n_subsamples
|
98
|
-
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
88
|
+
weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
|
99
89
|
# scale the weight vector.
|
100
90
|
norm = Math.sqrt(weight_vec.dot(weight_vec))
|
101
|
-
scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
|
91
|
+
scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
|
102
92
|
weight_vec *= [1.0, scaler].min
|
103
93
|
end
|
104
94
|
# Store the learned model.
|
105
|
-
if params[:fit_bias]
|
95
|
+
if @params[:fit_bias]
|
106
96
|
@weight_vec = weight_vec[0...n_features - 1]
|
107
97
|
@bias_term = weight_vec[n_features - 1]
|
108
98
|
else
|
@@ -142,13 +132,13 @@ module SVMKit
|
|
142
132
|
# Dump marshal data.
|
143
133
|
# @return [Hash] The marshal data about SVC.
|
144
134
|
def marshal_dump
|
145
|
-
{ params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
135
|
+
{ params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
146
136
|
end
|
147
137
|
|
148
138
|
# Load marshal data.
|
149
139
|
# @return [nil]
|
150
140
|
def marshal_load(obj)
|
151
|
-
|
141
|
+
@params = obj[:params]
|
152
142
|
@weight_vec = obj[:weight_vec]
|
153
143
|
@bias_term = obj[:bias_term]
|
154
144
|
@rng = obj[:rng]
|
@@ -28,8 +28,8 @@ module SVMKit
|
|
28
28
|
#
|
29
29
|
# @param estimator [Classifier] The (binary) classifier for construction a multi-label classifier.
|
30
30
|
def initialize(estimator: nil)
|
31
|
-
|
32
|
-
|
31
|
+
@params = {}
|
32
|
+
@params[:estimator] = estimator
|
33
33
|
@estimators = nil
|
34
34
|
@classes = nil
|
35
35
|
end
|
@@ -44,7 +44,7 @@ module SVMKit
|
|
44
44
|
@classes = Numo::Int32.asarray(y_arr.uniq.sort)
|
45
45
|
@estimators = @classes.to_a.map do |label|
|
46
46
|
bin_y = Numo::Int32.asarray(y_arr.map { |l| l == label ? 1 : -1 })
|
47
|
-
params[:estimator].dup.fit(x, bin_y)
|
47
|
+
@params[:estimator].dup.fit(x, bin_y)
|
48
48
|
end
|
49
49
|
self
|
50
50
|
end
|
@@ -82,7 +82,7 @@ module SVMKit
|
|
82
82
|
# Dump marshal data.
|
83
83
|
# @return [Hash] The marshal data about OneVsRestClassifier.
|
84
84
|
def marshal_dump
|
85
|
-
{ params: params,
|
85
|
+
{ params: @params,
|
86
86
|
classes: @classes,
|
87
87
|
estimators: @estimators.map { |e| Marshal.dump(e) } }
|
88
88
|
end
|
@@ -90,7 +90,7 @@ module SVMKit
|
|
90
90
|
# Load marshal data.
|
91
91
|
# @return [nil]
|
92
92
|
def marshal_load(obj)
|
93
|
-
|
93
|
+
@params = obj[:params]
|
94
94
|
@classes = obj[:classes]
|
95
95
|
@estimators = obj[:estimators].map { |e| Marshal.load(e) }
|
96
96
|
nil
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement estimators based on nearest neighbors rule.
|
6
|
+
module NearestNeighbors
|
7
|
+
# KNeighborsClassifier is a class that implements the classifier with the k-nearest neighbors rule.
|
8
|
+
# The current implementation uses the Euclidean distance for finding the neighbors.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimator =
|
12
|
+
# SVMKit::NearestNeighbor::KNeighborsClassifier.new(n_neighbors = 5)
|
13
|
+
# estimator.fit(training_samples, traininig_labels)
|
14
|
+
# results = estimator.predict(testing_samples)
|
15
|
+
#
|
16
|
+
class KNeighborsClassifier
|
17
|
+
include Base::BaseEstimator
|
18
|
+
include Base::Classifier
|
19
|
+
|
20
|
+
# Return the prototypes for the nearest neighbor classifier.
|
21
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features])
|
22
|
+
attr_reader :prototypes
|
23
|
+
|
24
|
+
# Return the labels of the prototypes
|
25
|
+
# @return [Numo::Int32] (size: n_samples)
|
26
|
+
attr_reader :labels
|
27
|
+
|
28
|
+
# Return the class labels.
|
29
|
+
# @return [Numo::Int32] (size: n_classes)
|
30
|
+
attr_reader :classes
|
31
|
+
|
32
|
+
# Create a new classifier with the nearest neighbor rule.
|
33
|
+
#
|
34
|
+
# @param n_neighbors [Integer] The number of neighbors.
|
35
|
+
def initialize(n_neighbors: 5)
|
36
|
+
@params = {}
|
37
|
+
@params[:n_neighbors] = n_neighbors
|
38
|
+
@prototypes = nil
|
39
|
+
@labels = nil
|
40
|
+
@classes = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
# Fit the model with given training data.
|
44
|
+
#
|
45
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
47
|
+
# @return [KNeighborsClassifier] The learned classifier itself.
|
48
|
+
def fit(x, y)
|
49
|
+
@prototypes = Numo::DFloat.asarray(x.to_a)
|
50
|
+
@labels = Numo::Int32.asarray(y.to_a)
|
51
|
+
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
# Calculate confidence scores for samples.
|
56
|
+
#
|
57
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
58
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
59
|
+
def decision_function(x)
|
60
|
+
distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
|
61
|
+
n_samples, n_prototypes = distance_matrix.shape
|
62
|
+
n_classes = @classes.size
|
63
|
+
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
64
|
+
scores = Numo::DFloat.zeros(n_samples, n_classes)
|
65
|
+
n_samples.times do |m|
|
66
|
+
neighbor_ids = distance_matrix[m, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
67
|
+
neighbor_ids.each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
|
68
|
+
end
|
69
|
+
scores
|
70
|
+
end
|
71
|
+
|
72
|
+
# Predict class labels for samples.
|
73
|
+
#
|
74
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
75
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
76
|
+
def predict(x)
|
77
|
+
n_samples = x.shape.first
|
78
|
+
decision_values = decision_function(x)
|
79
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
80
|
+
end
|
81
|
+
|
82
|
+
# Claculate the mean accuracy of the given testing data.
|
83
|
+
#
|
84
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
85
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
86
|
+
# @return [Float] Mean accuracy
|
87
|
+
def score(x, y)
|
88
|
+
p = predict(x)
|
89
|
+
n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
90
|
+
n_hits / y.size.to_f
|
91
|
+
end
|
92
|
+
|
93
|
+
# Dump marshal data.
|
94
|
+
# @return [Hash] The marshal data about KNeighborsClassifier.
|
95
|
+
def marshal_dump
|
96
|
+
{ params: params,
|
97
|
+
prototypes: @prototypes,
|
98
|
+
labels: @labels,
|
99
|
+
classes: @classes }
|
100
|
+
end
|
101
|
+
|
102
|
+
# Load marshal data.
|
103
|
+
# @return [nil]
|
104
|
+
def marshal_load(obj)
|
105
|
+
@params = obj[:params]
|
106
|
+
@prototypes = obj[:prototypes]
|
107
|
+
@labels = obj[:labels]
|
108
|
+
@classes = obj[:classes]
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -26,8 +26,8 @@ module SVMKit
|
|
26
26
|
#
|
27
27
|
# @param feature_range [Array<Float>] The desired range of samples.
|
28
28
|
def initialize(feature_range: [0.0, 1.0])
|
29
|
-
|
30
|
-
|
29
|
+
@params = {}
|
30
|
+
@params[:feature_range] = feature_range
|
31
31
|
@min_vec = nil
|
32
32
|
@max_vec = nil
|
33
33
|
end
|
data/lib/svmkit/version.rb
CHANGED
data/svmkit.gemspec
CHANGED
@@ -29,21 +29,20 @@ Gem::Specification.new do |spec|
|
|
29
29
|
|
30
30
|
spec.required_ruby_version = '>= 2.1'
|
31
31
|
|
32
|
-
spec.add_runtime_dependency 'numo-narray', '
|
32
|
+
spec.add_runtime_dependency 'numo-narray', '~> 0.9.0.5'
|
33
33
|
|
34
34
|
spec.add_development_dependency 'bundler', '~> 1.15'
|
35
35
|
spec.add_development_dependency 'rake', '~> 10.0'
|
36
36
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
37
37
|
spec.add_development_dependency 'simplecov', '~> 0.15.1'
|
38
|
-
spec.add_development_dependency 'numo-narray', '~> 0.9.0.9'
|
39
38
|
|
40
|
-
spec.post_install_message =
|
39
|
+
spec.post_install_message = <<MSG
|
41
40
|
*************************************************************************
|
42
41
|
Thank you for installing SVMKit!!
|
43
42
|
|
44
43
|
Note that the SVMKit has been changed to use Numo::NArray for
|
45
44
|
linear algebra library from version 0.2.0.
|
46
45
|
*************************************************************************
|
47
|
-
|
46
|
+
MSG
|
48
47
|
|
49
48
|
end
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.9.0.5
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.9.0.5
|
27
27
|
- !ruby/object:Gem::Dependency
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.15.1
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: numo-narray
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: 0.9.0.9
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: 0.9.0.9
|
97
83
|
description: |
|
98
84
|
SVMKit is a library for machine learninig in Ruby.
|
99
85
|
SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
|
@@ -126,6 +112,7 @@ files:
|
|
126
112
|
- lib/svmkit/linear_model/logistic_regression.rb
|
127
113
|
- lib/svmkit/linear_model/svc.rb
|
128
114
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
115
|
+
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
129
116
|
- lib/svmkit/pairwise_metric.rb
|
130
117
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|
131
118
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
@@ -158,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
158
145
|
version: '0'
|
159
146
|
requirements: []
|
160
147
|
rubyforge_project:
|
161
|
-
rubygems_version: 2.
|
148
|
+
rubygems_version: 2.2.5
|
162
149
|
signing_key:
|
163
150
|
specification_version: 4
|
164
151
|
summary: SVMKit is an experimental library of machine learning in Ruby.
|