svmkit 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/lib/svmkit.rb +2 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +4 -3
- data/lib/svmkit/kernel_machine/kernel_svc.rb +128 -0
- data/lib/svmkit/linear_model/logistic_regression.rb +7 -6
- data/lib/svmkit/linear_model/pegasos_svc.rb +7 -6
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +3 -2
- data/lib/svmkit/pairwise_metric.rb +72 -0
- data/lib/svmkit/preprocessing/min_max_scaler.rb +2 -1
- data/lib/svmkit/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a6941fcd2f0f465de1d6a3b6f658e7ee0fdc8fb
|
4
|
+
data.tar.gz: b9dc50c6fa8e3d7470adf89ffc950b2ae63db4e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4795995b936f2902dc50e19dc30c46fdb2a1b6a732869a311efa791da8ec09305f6ea6dbfd9aab7da8c934465c8eebe9c45dcaab57090b09f0cc20c1ccacff77
|
7
|
+
data.tar.gz: b8afdc306dba4a10922e63756bb6d18731e54a4a5f04293a478b7c897b6a58622c9f88eb6bdb47837fa7114ab80d879e6e1ea3e993a5937f291d69e5d72f1699
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.1.3
|
2
|
+
- Added class for Kernel Support Vector Machine with Pegasos algorithm.
|
3
|
+
- Added module for calculating pairwise kernel fuctions and euclidean distances.
|
4
|
+
|
1
5
|
# 0.1.2
|
2
6
|
- Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
|
3
7
|
- Rewrited the document with yard notation.
|
data/lib/svmkit.rb
CHANGED
@@ -5,12 +5,14 @@ end
|
|
5
5
|
|
6
6
|
require 'svmkit/version'
|
7
7
|
require 'svmkit/utils'
|
8
|
+
require 'svmkit/pairwise_metric'
|
8
9
|
require 'svmkit/base/base_estimator'
|
9
10
|
require 'svmkit/base/classifier'
|
10
11
|
require 'svmkit/base/transformer'
|
11
12
|
require 'svmkit/kernel_approximation/rbf'
|
12
13
|
require 'svmkit/linear_model/pegasos_svc'
|
13
14
|
require 'svmkit/linear_model/logistic_regression'
|
15
|
+
require 'svmkit/kernel_machine/kernel_svc'
|
14
16
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
15
17
|
require 'svmkit/preprocessing/l2_normalizer'
|
16
18
|
require 'svmkit/preprocessing/min_max_scaler'
|
@@ -40,9 +40,10 @@ module SVMKit
|
|
40
40
|
#
|
41
41
|
# @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
|
42
42
|
#
|
43
|
-
# @param
|
44
|
-
# @
|
45
|
-
# @
|
43
|
+
# @param params [Hash] The parameters for RBF kernel approximation.
|
44
|
+
# @option params [Float] :gamma (1.0) The parameter of RBF kernel: exp(-gamma * x^2).
|
45
|
+
# @option params [Integer] :n_components (128) The number of dimensions of the RBF kernel feature space.
|
46
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
46
47
|
def initialize(params = {})
|
47
48
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
48
49
|
self.params[:random_seed] ||= srand
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement generalized linear models.
|
6
|
+
module KernelMachine
|
7
|
+
# KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
|
11
|
+
# estimator =
|
12
|
+
# SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
|
13
|
+
# estimator.fit(training_kernel_matrix, traininig_labels)
|
14
|
+
# testing_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
|
15
|
+
# results = estimator.predict(testing_kernel_matrix)
|
16
|
+
#
|
17
|
+
# *Reference*
|
18
|
+
# 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
19
|
+
class KernelSVC
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::Classifier
|
22
|
+
|
23
|
+
# @!visibility private
|
24
|
+
DEFAULT_PARAMS = {
|
25
|
+
reg_param: 1.0,
|
26
|
+
max_iter: 1000,
|
27
|
+
random_seed: nil
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
# Return the weight vector for Kernel SVC.
|
31
|
+
# @return [NMatrix] (shape: [1, n_trainig_sample])
|
32
|
+
attr_reader :weight_vec
|
33
|
+
|
34
|
+
# Return the random generator for performing random sampling in the Pegasos algorithm.
|
35
|
+
# @return [Random]
|
36
|
+
attr_reader :rng
|
37
|
+
|
38
|
+
# Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
|
39
|
+
#
|
40
|
+
# @overload new(reg_param: 1.0, max_iter: 1000, random_seed: 1) -> KernelSVC
|
41
|
+
#
|
42
|
+
# @param params [Hash] The parameters for Kernel SVC.
|
43
|
+
# @option params [Float] :reg_param (1.0) The regularization parameter.
|
44
|
+
# @option params [Integer] :max_iter (1000) The maximum number of iterations.
|
45
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
46
|
+
def initialize(params = {})
|
47
|
+
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
48
|
+
self.params[:random_seed] ||= srand
|
49
|
+
@weight_vec = nil
|
50
|
+
@rng = Random.new(self.params[:random_seed])
|
51
|
+
end
|
52
|
+
|
53
|
+
# Fit the model with given training data.
|
54
|
+
#
|
55
|
+
# @param x [NMatrix] (shape: [n_training_samples, n_training_samples])
|
56
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
57
|
+
# @param y [NMatrix] (shape: [1, n_training_samples]) The labels to be used for fitting the model.
|
58
|
+
# @return [KernelSVC] The learned classifier itself.
|
59
|
+
def fit(x, y)
|
60
|
+
# Generate binary labels
|
61
|
+
negative_label = y.uniq.sort.shift
|
62
|
+
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
|
63
|
+
# Initialize some variables.
|
64
|
+
n_training_samples = x.shape[0]
|
65
|
+
rand_ids = []
|
66
|
+
weight_vec = NMatrix.zeros([1, n_training_samples])
|
67
|
+
# Start optimization.
|
68
|
+
params[:max_iter].times do |t|
|
69
|
+
# random sampling
|
70
|
+
rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
|
71
|
+
target_id = rand_ids.shift
|
72
|
+
# update the weight vector
|
73
|
+
func = (weight_vec * bin_y[target_id]).dot(x.row(target_id).transpose).to_f
|
74
|
+
func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
|
75
|
+
weight_vec[target_id] += 1.0 if func < 1.0
|
76
|
+
end
|
77
|
+
# Store the learned model.
|
78
|
+
@weight_vec = weight_vec * NMatrix.new([1, n_training_samples], bin_y)
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
# Calculate confidence scores for samples.
|
83
|
+
#
|
84
|
+
# @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
|
85
|
+
# The kernel matrix between testing samples and training samples to compute the scores.
|
86
|
+
# @return [NMatrix] (shape: [1, n_testing_samples]) Confidence score per sample.
|
87
|
+
def decision_function(x)
|
88
|
+
@weight_vec.dot(x.transpose)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Predict class labels for samples.
|
92
|
+
#
|
93
|
+
# @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
|
94
|
+
# The kernel matrix between testing samples and training samples to predict the labels.
|
95
|
+
# @return [NMatrix] (shape: [1, n_testing_samples]) Predicted class label per sample.
|
96
|
+
def predict(x)
|
97
|
+
decision_function(x).map { |v| v >= 0 ? 1 : -1 }
|
98
|
+
end
|
99
|
+
|
100
|
+
# Claculate the mean accuracy of the given testing data.
|
101
|
+
#
|
102
|
+
# @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
|
103
|
+
# The kernel matrix between testing samples and training samples.
|
104
|
+
# @param y [NMatrix] (shape: [1, n_testing_samples]) True labels for testing data.
|
105
|
+
# @return [Float] Mean accuracy
|
106
|
+
def score(x, y)
|
107
|
+
p = predict(x)
|
108
|
+
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
109
|
+
n_hits / y.size.to_f
|
110
|
+
end
|
111
|
+
|
112
|
+
# Dump marshal data.
|
113
|
+
# @return [Hash] The marshal data about KernelSVC.
|
114
|
+
def marshal_dump
|
115
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
|
116
|
+
end
|
117
|
+
|
118
|
+
# Load marshal data.
|
119
|
+
# @return [nil]
|
120
|
+
def marshal_load(obj)
|
121
|
+
self.params = obj[:params]
|
122
|
+
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
123
|
+
@rng = obj[:rng]
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -46,13 +46,14 @@ module SVMKit
|
|
46
46
|
#
|
47
47
|
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
48
48
|
#
|
49
|
-
# @param
|
50
|
-
# @
|
51
|
-
# @
|
49
|
+
# @param params [Hash] The parameters for Logistic Regression.
|
50
|
+
# @option params [Float] :reg_param (1.0) The regularization parameter.
|
51
|
+
# @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
|
52
|
+
# @option params [Float] :bias_scale (1.0) The scale of the bias term.
|
52
53
|
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
53
|
-
# @
|
54
|
-
# @
|
55
|
-
# @
|
54
|
+
# @option params [Integer] :max_iter (100) The maximum number of iterations.
|
55
|
+
# @option params [Integer] :batch_size (50) The size of the mini batches.
|
56
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
56
57
|
def initialize(params = {})
|
57
58
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
58
59
|
self.params[:random_seed] ||= srand
|
@@ -44,12 +44,13 @@ module SVMKit
|
|
44
44
|
#
|
45
45
|
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
|
46
46
|
#
|
47
|
-
# @param
|
48
|
-
# @
|
49
|
-
# @
|
50
|
-
# @
|
51
|
-
# @
|
52
|
-
# @
|
47
|
+
# @param params [Hash] The parameters for SVC.
|
48
|
+
# @option params [Float] :reg_param (1.0) The regularization parameter.
|
49
|
+
# @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
|
50
|
+
# @option params [Float] :bias_scale (1.0) The scale of the bias term.
|
51
|
+
# @option params [Integer] :max_iter (100) The maximum number of iterations.
|
52
|
+
# @option params [Integer] :batch_size (50) The size of the mini batches.
|
53
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
53
54
|
def initialize(params = {})
|
54
55
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
55
56
|
self.params[:random_seed] ||= srand
|
@@ -32,8 +32,9 @@ module SVMKit
|
|
32
32
|
# Create a new multi-label classifier with the one-vs-rest startegy.
|
33
33
|
#
|
34
34
|
# @overload new(estimator: base_estimator) -> OneVsRestClassifier
|
35
|
-
#
|
36
|
-
#
|
35
|
+
#
|
36
|
+
# @param params [Hash] The parameters for OneVsRestClassifier.
|
37
|
+
# @option params [Classifier] :estimator (nil) The (binary) classifier for construction a multi-label classifier.
|
37
38
|
def initialize(params = {})
|
38
39
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
39
40
|
@estimators = nil
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module SVMKit
|
2
|
+
# Module for calculating pairwise distances, similarities, and kernels.
|
3
|
+
module PairwiseMetric
|
4
|
+
class << self
|
5
|
+
# Calculate the pairwise euclidean distances between x and y.
|
6
|
+
#
|
7
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
8
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
9
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
10
|
+
def euclidean_distance(x, y = nil)
|
11
|
+
y = x if y.nil?
|
12
|
+
sum_x_vec = (x**2).sum(1)
|
13
|
+
sum_y_vec = (y**2).sum(1)
|
14
|
+
dot_xy_mat = x.dot(y.transpose)
|
15
|
+
distance_matrix = dot_xy_mat * -2.0 +
|
16
|
+
sum_x_vec.repeat(y.shape[0], 1) +
|
17
|
+
sum_y_vec.transpose.repeat(x.shape[0], 0)
|
18
|
+
distance_matrix.abs.sqrt
|
19
|
+
end
|
20
|
+
|
21
|
+
# Calculate the rbf kernel between x and y.
|
22
|
+
#
|
23
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
24
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
25
|
+
# @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
|
26
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
27
|
+
def rbf_kernel(x, y = nil, gamma = nil)
|
28
|
+
y = x if y.nil?
|
29
|
+
gamma ||= 1.0 / x.shape[1]
|
30
|
+
distance_matrix = euclidean_distance(x, y)
|
31
|
+
((distance_matrix**2) * -gamma).exp
|
32
|
+
end
|
33
|
+
|
34
|
+
# Calculate the linear kernel between x and y.
|
35
|
+
#
|
36
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
37
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
38
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
39
|
+
def linear_kernel(x, y = nil)
|
40
|
+
y = x if y.nil?
|
41
|
+
x.dot(y.transpose)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Calculate the polynomial kernel between x and y.
|
45
|
+
#
|
46
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
47
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
48
|
+
# @param degree [Integer] The parameter of polynomial kernel.
|
49
|
+
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
50
|
+
# @param coef [Integer] The parameter of polynomial kernel.
|
51
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
52
|
+
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
53
|
+
y = x if y.nil?
|
54
|
+
gamma ||= 1.0 / x.shape[1]
|
55
|
+
(x.dot(y.transpose) * gamma + coef)**degree
|
56
|
+
end
|
57
|
+
|
58
|
+
# Calculate the sigmoid kernel between x and y.
|
59
|
+
#
|
60
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
61
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
62
|
+
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
63
|
+
# @param coef [Integer] The parameter of polynomial kernel.
|
64
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
65
|
+
def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
|
66
|
+
y = x if y.nil?
|
67
|
+
gamma ||= 1.0 / x.shape[1]
|
68
|
+
(x.dot(y.transpose) * gamma + coef).tanh
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -31,7 +31,8 @@ module SVMKit
|
|
31
31
|
#
|
32
32
|
# @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
|
33
33
|
#
|
34
|
-
# @param
|
34
|
+
# @param params [Hash] The parameters for MinMaxScaler.
|
35
|
+
# @option params [Array<Float>] :feature_range ([0.0, 1.0]) The desired range of samples.
|
35
36
|
def initialize(params = {})
|
36
37
|
@params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
37
38
|
@min_vec = nil
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,9 +93,11 @@ files:
|
|
93
93
|
- lib/svmkit/base/classifier.rb
|
94
94
|
- lib/svmkit/base/transformer.rb
|
95
95
|
- lib/svmkit/kernel_approximation/rbf.rb
|
96
|
+
- lib/svmkit/kernel_machine/kernel_svc.rb
|
96
97
|
- lib/svmkit/linear_model/logistic_regression.rb
|
97
98
|
- lib/svmkit/linear_model/pegasos_svc.rb
|
98
99
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
100
|
+
- lib/svmkit/pairwise_metric.rb
|
99
101
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|
100
102
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
101
103
|
- lib/svmkit/preprocessing/standard_scaler.rb
|