svmkit 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -0
- data/HISTORY.md +3 -0
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +13 -13
- data/lib/svmkit/kernel_machine/kernel_svc.rb +10 -10
- data/lib/svmkit/linear_model/logistic_regression.rb +20 -30
- data/lib/svmkit/linear_model/svc.rb +19 -29
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +5 -5
- data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +113 -0
- data/lib/svmkit/preprocessing/l2_normalizer.rb +2 -2
- data/lib/svmkit/preprocessing/min_max_scaler.rb +2 -2
- data/lib/svmkit/preprocessing/standard_scaler.rb +2 -2
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +3 -4
- metadata +6 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3cb57cc46849d792fff7b6c6500b9498d56fc71
|
4
|
+
data.tar.gz: 65c909ee0efaafc42df12dd24aa6c62d5b816d6a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25b52e63512393706f3f53ddf415a2e4ac07923f3d1bd909cca0ade9de66d5bbb63d32a932bce32f2fa2b6c4430bab73f483d94d620eb355540a91905320644a
|
7
|
+
data.tar.gz: a3b983cf6d75168cb6eda70ec5da113feb2bc52c7fc501af3a9328f569c6d793c19e4744a4a80f0c3c60b0ea4e5387db21451f2a004f8d4699605b8348c81bab
|
data/.rubocop.yml
CHANGED
@@ -4,6 +4,9 @@
|
|
4
4
|
Documentation:
|
5
5
|
Enabled: false
|
6
6
|
|
7
|
+
Metrics/AbcSize:
|
8
|
+
Max: 30
|
9
|
+
|
7
10
|
Metrics/LineLength:
|
8
11
|
Max: 120
|
9
12
|
|
@@ -13,5 +16,15 @@ Metrics/ModuleLength:
|
|
13
16
|
Metrics/ClassLength:
|
14
17
|
Max: 200
|
15
18
|
|
19
|
+
Metrics/MethodLength:
|
20
|
+
Max: 40
|
21
|
+
|
22
|
+
Metrics/BlockLength:
|
23
|
+
Exclude:
|
24
|
+
- 'spec/**/*'
|
25
|
+
|
26
|
+
ParameterLists:
|
27
|
+
Max: 10
|
28
|
+
|
16
29
|
Security/MarshalLoad:
|
17
30
|
Enabled: false
|
data/HISTORY.md
CHANGED
data/lib/svmkit.rb
CHANGED
@@ -12,6 +12,7 @@ require 'svmkit/linear_model/svc'
|
|
12
12
|
require 'svmkit/linear_model/logistic_regression'
|
13
13
|
require 'svmkit/kernel_machine/kernel_svc'
|
14
14
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
15
|
+
require 'svmkit/nearest_neighbors/k_neighbors_classifier'
|
15
16
|
require 'svmkit/preprocessing/l2_normalizer'
|
16
17
|
require 'svmkit/preprocessing/min_max_scaler'
|
17
18
|
require 'svmkit/preprocessing/standard_scaler'
|
@@ -35,12 +35,12 @@ module SVMKit
|
|
35
35
|
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
36
36
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
37
37
|
def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
@rng = Random.new(
|
38
|
+
@params = {}
|
39
|
+
@params[:gamma] = gamma
|
40
|
+
@params[:n_components] = n_components
|
41
|
+
@params[:random_seed] = random_seed
|
42
|
+
@params[:random_seed] ||= srand
|
43
|
+
@rng = Random.new(@params[:random_seed])
|
44
44
|
@random_mat = nil
|
45
45
|
@random_vec = nil
|
46
46
|
end
|
@@ -54,10 +54,10 @@ module SVMKit
|
|
54
54
|
# @return [RBF] The learned transformer itself.
|
55
55
|
def fit(x, _y = nil)
|
56
56
|
n_features = x.shape[1]
|
57
|
-
params[:n_components] = 2 * n_features if params[:n_components] <= 0
|
58
|
-
@random_mat = rand_normal([n_features, params[:n_components]]) * (2.0 * params[:gamma])**0.5
|
59
|
-
n_half_components = params[:n_components] / 2
|
60
|
-
@random_vec = Numo::DFloat.zeros(params[:n_components] - n_half_components).concatenate(
|
57
|
+
@params[:n_components] = 2 * n_features if @params[:n_components] <= 0
|
58
|
+
@random_mat = rand_normal([n_features, @params[:n_components]]) * (2.0 * @params[:gamma])**0.5
|
59
|
+
n_half_components = @params[:n_components] / 2
|
60
|
+
@random_vec = Numo::DFloat.zeros(@params[:n_components] - n_half_components).concatenate(
|
61
61
|
Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
|
62
62
|
)
|
63
63
|
self
|
@@ -82,13 +82,13 @@ module SVMKit
|
|
82
82
|
def transform(x)
|
83
83
|
n_samples, = x.shape
|
84
84
|
projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
|
85
|
-
Numo::NMath.sin(projection) * ((2.0 / params[:n_components])**0.5)
|
85
|
+
Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
|
86
86
|
end
|
87
87
|
|
88
88
|
# Dump marshal data.
|
89
89
|
# @return [Hash] The marshal data about RBF.
|
90
90
|
def marshal_dump
|
91
|
-
{ params: params,
|
91
|
+
{ params: @params,
|
92
92
|
random_mat: @random_mat,
|
93
93
|
random_vec: @random_vec,
|
94
94
|
rng: @rng }
|
@@ -97,7 +97,7 @@ module SVMKit
|
|
97
97
|
# Load marshal data.
|
98
98
|
# @return [nil]
|
99
99
|
def marshal_load(obj)
|
100
|
-
|
100
|
+
@params = obj[:params]
|
101
101
|
@random_mat = obj[:random_mat]
|
102
102
|
@random_vec = obj[:random_vec]
|
103
103
|
@rng = obj[:rng]
|
@@ -34,13 +34,13 @@ module SVMKit
|
|
34
34
|
# @param max_iter [Integer] The maximum number of iterations.
|
35
35
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
36
36
|
def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
@params = {}
|
38
|
+
@params[:reg_param] = reg_param
|
39
|
+
@params[:max_iter] = max_iter
|
40
|
+
@params[:random_seed] = random_seed
|
41
|
+
@params[:random_seed] ||= srand
|
42
42
|
@weight_vec = nil
|
43
|
-
@rng = Random.new(
|
43
|
+
@rng = Random.new(@params[:random_seed])
|
44
44
|
end
|
45
45
|
|
46
46
|
# Fit the model with given training data.
|
@@ -58,13 +58,13 @@ module SVMKit
|
|
58
58
|
rand_ids = []
|
59
59
|
weight_vec = Numo::DFloat.zeros(n_training_samples)
|
60
60
|
# Start optimization.
|
61
|
-
params[:max_iter].times do |t|
|
61
|
+
@params[:max_iter].times do |t|
|
62
62
|
# random sampling
|
63
63
|
rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
|
64
64
|
target_id = rand_ids.shift
|
65
65
|
# update the weight vector
|
66
66
|
func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
|
67
|
-
func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
|
67
|
+
func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
|
68
68
|
weight_vec[target_id] += 1.0 if func < 1.0
|
69
69
|
end
|
70
70
|
# Store the learned model.
|
@@ -105,13 +105,13 @@ module SVMKit
|
|
105
105
|
# Dump marshal data.
|
106
106
|
# @return [Hash] The marshal data about KernelSVC.
|
107
107
|
def marshal_dump
|
108
|
-
{ params: params, weight_vec: @weight_vec, rng: @rng }
|
108
|
+
{ params: @params, weight_vec: @weight_vec, rng: @rng }
|
109
109
|
end
|
110
110
|
|
111
111
|
# Load marshal data.
|
112
112
|
# @return [nil]
|
113
113
|
def marshal_load(obj)
|
114
|
-
|
114
|
+
@params = obj[:params]
|
115
115
|
@weight_vec = obj[:weight_vec]
|
116
116
|
@rng = obj[:rng]
|
117
117
|
nil
|
@@ -20,16 +20,6 @@ module SVMKit
|
|
20
20
|
include Base::BaseEstimator
|
21
21
|
include Base::Classifier
|
22
22
|
|
23
|
-
# @!visibility private
|
24
|
-
DEFAULT_PARAMS = {
|
25
|
-
reg_param: 1.0,
|
26
|
-
fit_bias: false,
|
27
|
-
bias_scale: 1.0,
|
28
|
-
max_iter: 100,
|
29
|
-
batch_size: 50,
|
30
|
-
random_seed: nil
|
31
|
-
}.freeze
|
32
|
-
|
33
23
|
# Return the weight vector for Logistic Regression.
|
34
24
|
# @return [Numo::DFloat] (shape: [n_features])
|
35
25
|
attr_reader :weight_vec
|
@@ -52,17 +42,17 @@ module SVMKit
|
|
52
42
|
# @param batch_size [Integer] The size of the mini batches.
|
53
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
54
44
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
45
|
+
@params = {}
|
46
|
+
@params[:reg_param] = reg_param
|
47
|
+
@params[:fit_bias] = fit_bias
|
48
|
+
@params[:bias_scale] = bias_scale
|
49
|
+
@params[:max_iter] = max_iter
|
50
|
+
@params[:batch_size] = batch_size
|
51
|
+
@params[:random_seed] = random_seed
|
52
|
+
@params[:random_seed] ||= srand
|
63
53
|
@weight_vec = nil
|
64
54
|
@bias_term = 0.0
|
65
|
-
@rng = Random.new(
|
55
|
+
@rng = Random.new(@params[:random_seed])
|
66
56
|
end
|
67
57
|
|
68
58
|
# Fit the model with given training data.
|
@@ -77,9 +67,9 @@ module SVMKit
|
|
77
67
|
bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
|
78
68
|
# Expand feature vectors for bias term.
|
79
69
|
samples = x
|
80
|
-
if params[:fit_bias]
|
70
|
+
if @params[:fit_bias]
|
81
71
|
samples = Numo::NArray.hstack(
|
82
|
-
[samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
|
72
|
+
[samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
|
83
73
|
)
|
84
74
|
end
|
85
75
|
# Initialize some variables.
|
@@ -87,27 +77,27 @@ module SVMKit
|
|
87
77
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
88
78
|
weight_vec = Numo::DFloat.zeros(n_features)
|
89
79
|
# Start optimization.
|
90
|
-
params[:max_iter].times do |t|
|
80
|
+
@params[:max_iter].times do |t|
|
91
81
|
# random sampling
|
92
|
-
subset_ids = rand_ids.shift(params[:batch_size])
|
82
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
93
83
|
rand_ids.concat(subset_ids)
|
94
84
|
# update the weight vector.
|
95
|
-
eta = 1.0 / (params[:reg_param] * (t + 1))
|
85
|
+
eta = 1.0 / (@params[:reg_param] * (t + 1))
|
96
86
|
mean_vec = Numo::DFloat.zeros(n_features)
|
97
87
|
subset_ids.each do |n|
|
98
88
|
z = weight_vec.dot(samples[n, true])
|
99
89
|
coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
|
100
90
|
mean_vec += samples[n, true] * coef
|
101
91
|
end
|
102
|
-
mean_vec *= eta / params[:batch_size]
|
103
|
-
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
92
|
+
mean_vec *= eta / @params[:batch_size]
|
93
|
+
weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
|
104
94
|
# scale the weight vector.
|
105
95
|
norm = Math.sqrt(weight_vec.dot(weight_vec))
|
106
|
-
scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
|
96
|
+
scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
|
107
97
|
weight_vec *= [1.0, scaler].min
|
108
98
|
end
|
109
99
|
# Store the learned model.
|
110
|
-
if params[:fit_bias]
|
100
|
+
if @params[:fit_bias]
|
111
101
|
@weight_vec = weight_vec[0...n_features - 1]
|
112
102
|
@bias_term = weight_vec[n_features - 1]
|
113
103
|
else
|
@@ -156,13 +146,13 @@ module SVMKit
|
|
156
146
|
# Dump marshal data.
|
157
147
|
# @return [Hash] The marshal data about LogisticRegression.
|
158
148
|
def marshal_dump
|
159
|
-
{ params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
149
|
+
{ params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
160
150
|
end
|
161
151
|
|
162
152
|
# Load marshal data.
|
163
153
|
# @return [nil]
|
164
154
|
def marshal_load(obj)
|
165
|
-
|
155
|
+
@params = obj[:params]
|
166
156
|
@weight_vec = obj[:weight_vec]
|
167
157
|
@bias_term = obj[:bias_term]
|
168
158
|
@rng = obj[:rng]
|
@@ -18,16 +18,6 @@ module SVMKit
|
|
18
18
|
include Base::BaseEstimator
|
19
19
|
include Base::Classifier
|
20
20
|
|
21
|
-
# @!visibility private
|
22
|
-
DEFAULT_PARAMS = {
|
23
|
-
reg_param: 1.0,
|
24
|
-
fit_bias: false,
|
25
|
-
bias_scale: 1.0,
|
26
|
-
max_iter: 100,
|
27
|
-
batch_size: 50,
|
28
|
-
random_seed: nil
|
29
|
-
}.freeze
|
30
|
-
|
31
21
|
# Return the weight vector for SVC.
|
32
22
|
# @return [Numo::DFloat] (shape: [n_features])
|
33
23
|
attr_reader :weight_vec
|
@@ -49,17 +39,17 @@ module SVMKit
|
|
49
39
|
# @param batch_size [Integer] The size of the mini batches.
|
50
40
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
41
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
42
|
+
@params = {}
|
43
|
+
@params[:reg_param] = reg_param
|
44
|
+
@params[:fit_bias] = fit_bias
|
45
|
+
@params[:bias_scale] = bias_scale
|
46
|
+
@params[:max_iter] = max_iter
|
47
|
+
@params[:batch_size] = batch_size
|
48
|
+
@params[:random_seed] = random_seed
|
49
|
+
@params[:random_seed] ||= srand
|
60
50
|
@weight_vec = nil
|
61
51
|
@bias_term = 0.0
|
62
|
-
@rng = Random.new(
|
52
|
+
@rng = Random.new(@params[:random_seed])
|
63
53
|
end
|
64
54
|
|
65
55
|
# Fit the model with given training data.
|
@@ -73,9 +63,9 @@ module SVMKit
|
|
73
63
|
bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
|
74
64
|
# Expand feature vectors for bias term.
|
75
65
|
samples = x
|
76
|
-
if params[:fit_bias]
|
66
|
+
if @params[:fit_bias]
|
77
67
|
samples = Numo::NArray.hstack(
|
78
|
-
[samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
|
68
|
+
[samples, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]]
|
79
69
|
)
|
80
70
|
end
|
81
71
|
# Initialize some variables.
|
@@ -83,26 +73,26 @@ module SVMKit
|
|
83
73
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
84
74
|
weight_vec = Numo::DFloat.zeros(n_features)
|
85
75
|
# Start optimization.
|
86
|
-
params[:max_iter].times do |t|
|
76
|
+
@params[:max_iter].times do |t|
|
87
77
|
# random sampling
|
88
|
-
subset_ids = rand_ids.shift(params[:batch_size])
|
78
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
89
79
|
rand_ids.concat(subset_ids)
|
90
80
|
target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
|
91
81
|
n_subsamples = target_ids.size
|
92
82
|
next if n_subsamples.zero?
|
93
83
|
# update the weight vector.
|
94
|
-
eta = 1.0 / (params[:reg_param] * (t + 1))
|
84
|
+
eta = 1.0 / (@params[:reg_param] * (t + 1))
|
95
85
|
mean_vec = Numo::DFloat.zeros(n_features)
|
96
86
|
target_ids.each { |n| mean_vec += samples[n, true] * bin_y[n] }
|
97
87
|
mean_vec *= eta / n_subsamples
|
98
|
-
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
88
|
+
weight_vec = weight_vec * (1.0 - eta * @params[:reg_param]) + mean_vec
|
99
89
|
# scale the weight vector.
|
100
90
|
norm = Math.sqrt(weight_vec.dot(weight_vec))
|
101
|
-
scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
|
91
|
+
scaler = (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)
|
102
92
|
weight_vec *= [1.0, scaler].min
|
103
93
|
end
|
104
94
|
# Store the learned model.
|
105
|
-
if params[:fit_bias]
|
95
|
+
if @params[:fit_bias]
|
106
96
|
@weight_vec = weight_vec[0...n_features - 1]
|
107
97
|
@bias_term = weight_vec[n_features - 1]
|
108
98
|
else
|
@@ -142,13 +132,13 @@ module SVMKit
|
|
142
132
|
# Dump marshal data.
|
143
133
|
# @return [Hash] The marshal data about SVC.
|
144
134
|
def marshal_dump
|
145
|
-
{ params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
135
|
+
{ params: @params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
146
136
|
end
|
147
137
|
|
148
138
|
# Load marshal data.
|
149
139
|
# @return [nil]
|
150
140
|
def marshal_load(obj)
|
151
|
-
|
141
|
+
@params = obj[:params]
|
152
142
|
@weight_vec = obj[:weight_vec]
|
153
143
|
@bias_term = obj[:bias_term]
|
154
144
|
@rng = obj[:rng]
|
@@ -28,8 +28,8 @@ module SVMKit
|
|
28
28
|
#
|
29
29
|
# @param estimator [Classifier] The (binary) classifier for construction a multi-label classifier.
|
30
30
|
def initialize(estimator: nil)
|
31
|
-
|
32
|
-
|
31
|
+
@params = {}
|
32
|
+
@params[:estimator] = estimator
|
33
33
|
@estimators = nil
|
34
34
|
@classes = nil
|
35
35
|
end
|
@@ -44,7 +44,7 @@ module SVMKit
|
|
44
44
|
@classes = Numo::Int32.asarray(y_arr.uniq.sort)
|
45
45
|
@estimators = @classes.to_a.map do |label|
|
46
46
|
bin_y = Numo::Int32.asarray(y_arr.map { |l| l == label ? 1 : -1 })
|
47
|
-
params[:estimator].dup.fit(x, bin_y)
|
47
|
+
@params[:estimator].dup.fit(x, bin_y)
|
48
48
|
end
|
49
49
|
self
|
50
50
|
end
|
@@ -82,7 +82,7 @@ module SVMKit
|
|
82
82
|
# Dump marshal data.
|
83
83
|
# @return [Hash] The marshal data about OneVsRestClassifier.
|
84
84
|
def marshal_dump
|
85
|
-
{ params: params,
|
85
|
+
{ params: @params,
|
86
86
|
classes: @classes,
|
87
87
|
estimators: @estimators.map { |e| Marshal.dump(e) } }
|
88
88
|
end
|
@@ -90,7 +90,7 @@ module SVMKit
|
|
90
90
|
# Load marshal data.
|
91
91
|
# @return [nil]
|
92
92
|
def marshal_load(obj)
|
93
|
-
|
93
|
+
@params = obj[:params]
|
94
94
|
@classes = obj[:classes]
|
95
95
|
@estimators = obj[:estimators].map { |e| Marshal.load(e) }
|
96
96
|
nil
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement estimators based on nearest neighbors rule.
|
6
|
+
module NearestNeighbors
|
7
|
+
# KNeighborsClassifier is a class that implements the classifier with the k-nearest neighbors rule.
|
8
|
+
# The current implementation uses the Euclidean distance for finding the neighbors.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# estimator =
|
12
|
+
# SVMKit::NearestNeighbor::KNeighborsClassifier.new(n_neighbors = 5)
|
13
|
+
# estimator.fit(training_samples, traininig_labels)
|
14
|
+
# results = estimator.predict(testing_samples)
|
15
|
+
#
|
16
|
+
class KNeighborsClassifier
|
17
|
+
include Base::BaseEstimator
|
18
|
+
include Base::Classifier
|
19
|
+
|
20
|
+
# Return the prototypes for the nearest neighbor classifier.
|
21
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features])
|
22
|
+
attr_reader :prototypes
|
23
|
+
|
24
|
+
# Return the labels of the prototypes
|
25
|
+
# @return [Numo::Int32] (size: n_samples)
|
26
|
+
attr_reader :labels
|
27
|
+
|
28
|
+
# Return the class labels.
|
29
|
+
# @return [Numo::Int32] (size: n_classes)
|
30
|
+
attr_reader :classes
|
31
|
+
|
32
|
+
# Create a new classifier with the nearest neighbor rule.
|
33
|
+
#
|
34
|
+
# @param n_neighbors [Integer] The number of neighbors.
|
35
|
+
def initialize(n_neighbors: 5)
|
36
|
+
@params = {}
|
37
|
+
@params[:n_neighbors] = n_neighbors
|
38
|
+
@prototypes = nil
|
39
|
+
@labels = nil
|
40
|
+
@classes = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
# Fit the model with given training data.
|
44
|
+
#
|
45
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
47
|
+
# @return [KNeighborsClassifier] The learned classifier itself.
|
48
|
+
def fit(x, y)
|
49
|
+
@prototypes = Numo::DFloat.asarray(x.to_a)
|
50
|
+
@labels = Numo::Int32.asarray(y.to_a)
|
51
|
+
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
# Calculate confidence scores for samples.
|
56
|
+
#
|
57
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
58
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
59
|
+
def decision_function(x)
|
60
|
+
distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
|
61
|
+
n_samples, n_prototypes = distance_matrix.shape
|
62
|
+
n_classes = @classes.size
|
63
|
+
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
64
|
+
scores = Numo::DFloat.zeros(n_samples, n_classes)
|
65
|
+
n_samples.times do |m|
|
66
|
+
neighbor_ids = distance_matrix[m, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
67
|
+
neighbor_ids.each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
|
68
|
+
end
|
69
|
+
scores
|
70
|
+
end
|
71
|
+
|
72
|
+
# Predict class labels for samples.
|
73
|
+
#
|
74
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
75
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
76
|
+
def predict(x)
|
77
|
+
n_samples = x.shape.first
|
78
|
+
decision_values = decision_function(x)
|
79
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
80
|
+
end
|
81
|
+
|
82
|
+
# Claculate the mean accuracy of the given testing data.
|
83
|
+
#
|
84
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
85
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
86
|
+
# @return [Float] Mean accuracy
|
87
|
+
def score(x, y)
|
88
|
+
p = predict(x)
|
89
|
+
n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
90
|
+
n_hits / y.size.to_f
|
91
|
+
end
|
92
|
+
|
93
|
+
# Dump marshal data.
|
94
|
+
# @return [Hash] The marshal data about KNeighborsClassifier.
|
95
|
+
def marshal_dump
|
96
|
+
{ params: params,
|
97
|
+
prototypes: @prototypes,
|
98
|
+
labels: @labels,
|
99
|
+
classes: @classes }
|
100
|
+
end
|
101
|
+
|
102
|
+
# Load marshal data.
|
103
|
+
# @return [nil]
|
104
|
+
def marshal_load(obj)
|
105
|
+
@params = obj[:params]
|
106
|
+
@prototypes = obj[:prototypes]
|
107
|
+
@labels = obj[:labels]
|
108
|
+
@classes = obj[:classes]
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -26,8 +26,8 @@ module SVMKit
|
|
26
26
|
#
|
27
27
|
# @param feature_range [Array<Float>] The desired range of samples.
|
28
28
|
def initialize(feature_range: [0.0, 1.0])
|
29
|
-
|
30
|
-
|
29
|
+
@params = {}
|
30
|
+
@params[:feature_range] = feature_range
|
31
31
|
@min_vec = nil
|
32
32
|
@max_vec = nil
|
33
33
|
end
|
data/lib/svmkit/version.rb
CHANGED
data/svmkit.gemspec
CHANGED
@@ -29,21 +29,20 @@ Gem::Specification.new do |spec|
|
|
29
29
|
|
30
30
|
spec.required_ruby_version = '>= 2.1'
|
31
31
|
|
32
|
-
spec.add_runtime_dependency 'numo-narray', '
|
32
|
+
spec.add_runtime_dependency 'numo-narray', '~> 0.9.0.5'
|
33
33
|
|
34
34
|
spec.add_development_dependency 'bundler', '~> 1.15'
|
35
35
|
spec.add_development_dependency 'rake', '~> 10.0'
|
36
36
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
37
37
|
spec.add_development_dependency 'simplecov', '~> 0.15.1'
|
38
|
-
spec.add_development_dependency 'numo-narray', '~> 0.9.0.9'
|
39
38
|
|
40
|
-
spec.post_install_message =
|
39
|
+
spec.post_install_message = <<MSG
|
41
40
|
*************************************************************************
|
42
41
|
Thank you for installing SVMKit!!
|
43
42
|
|
44
43
|
Note that the SVMKit has been changed to use Numo::NArray for
|
45
44
|
linear algebra library from version 0.2.0.
|
46
45
|
*************************************************************************
|
47
|
-
|
46
|
+
MSG
|
48
47
|
|
49
48
|
end
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.9.0.5
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.9.0.5
|
27
27
|
- !ruby/object:Gem::Dependency
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 0.15.1
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: numo-narray
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: 0.9.0.9
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: 0.9.0.9
|
97
83
|
description: |
|
98
84
|
SVMKit is a library for machine learninig in Ruby.
|
99
85
|
SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
|
@@ -126,6 +112,7 @@ files:
|
|
126
112
|
- lib/svmkit/linear_model/logistic_regression.rb
|
127
113
|
- lib/svmkit/linear_model/svc.rb
|
128
114
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
115
|
+
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
129
116
|
- lib/svmkit/pairwise_metric.rb
|
130
117
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|
131
118
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
@@ -158,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
158
145
|
version: '0'
|
159
146
|
requirements: []
|
160
147
|
rubyforge_project:
|
161
|
-
rubygems_version: 2.
|
148
|
+
rubygems_version: 2.2.5
|
162
149
|
signing_key:
|
163
150
|
specification_version: 4
|
164
151
|
summary: SVMKit is an experimental library of machine learning in Ruby.
|