svmkit 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/lib/svmkit.rb +2 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +4 -3
- data/lib/svmkit/kernel_machine/kernel_svc.rb +128 -0
- data/lib/svmkit/linear_model/logistic_regression.rb +7 -6
- data/lib/svmkit/linear_model/pegasos_svc.rb +7 -6
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +3 -2
- data/lib/svmkit/pairwise_metric.rb +72 -0
- data/lib/svmkit/preprocessing/min_max_scaler.rb +2 -1
- data/lib/svmkit/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a6941fcd2f0f465de1d6a3b6f658e7ee0fdc8fb
|
4
|
+
data.tar.gz: b9dc50c6fa8e3d7470adf89ffc950b2ae63db4e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4795995b936f2902dc50e19dc30c46fdb2a1b6a732869a311efa791da8ec09305f6ea6dbfd9aab7da8c934465c8eebe9c45dcaab57090b09f0cc20c1ccacff77
|
7
|
+
data.tar.gz: b8afdc306dba4a10922e63756bb6d18731e54a4a5f04293a478b7c897b6a58622c9f88eb6bdb47837fa7114ab80d879e6e1ea3e993a5937f291d69e5d72f1699
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.1.3
|
2
|
+
- Added class for Kernel Support Vector Machine with Pegasos algorithm.
|
3
|
+
- Added module for calculating pairwise kernel fuctions and euclidean distances.
|
4
|
+
|
1
5
|
# 0.1.2
|
2
6
|
- Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
|
3
7
|
- Rewrited the document with yard notation.
|
data/lib/svmkit.rb
CHANGED
@@ -5,12 +5,14 @@ end
|
|
5
5
|
|
6
6
|
require 'svmkit/version'
|
7
7
|
require 'svmkit/utils'
|
8
|
+
require 'svmkit/pairwise_metric'
|
8
9
|
require 'svmkit/base/base_estimator'
|
9
10
|
require 'svmkit/base/classifier'
|
10
11
|
require 'svmkit/base/transformer'
|
11
12
|
require 'svmkit/kernel_approximation/rbf'
|
12
13
|
require 'svmkit/linear_model/pegasos_svc'
|
13
14
|
require 'svmkit/linear_model/logistic_regression'
|
15
|
+
require 'svmkit/kernel_machine/kernel_svc'
|
14
16
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
15
17
|
require 'svmkit/preprocessing/l2_normalizer'
|
16
18
|
require 'svmkit/preprocessing/min_max_scaler'
|
@@ -40,9 +40,10 @@ module SVMKit
|
|
40
40
|
#
|
41
41
|
# @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
|
42
42
|
#
|
43
|
-
# @param
|
44
|
-
# @
|
45
|
-
# @
|
43
|
+
# @param params [Hash] The parameters for RBF kernel approximation.
|
44
|
+
# @option params [Float] :gamma (1.0) The parameter of RBF kernel: exp(-gamma * x^2).
|
45
|
+
# @option params [Integer] :n_components (128) The number of dimensions of the RBF kernel feature space.
|
46
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
46
47
|
def initialize(params = {})
|
47
48
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
48
49
|
self.params[:random_seed] ||= srand
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'svmkit/base/base_estimator'
|
2
|
+
require 'svmkit/base/classifier'
|
3
|
+
|
4
|
+
module SVMKit
|
5
|
+
# This module consists of the classes that implement generalized linear models.
|
6
|
+
module KernelMachine
|
7
|
+
# KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
|
11
|
+
# estimator =
|
12
|
+
# SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
|
13
|
+
# estimator.fit(training_kernel_matrix, traininig_labels)
|
14
|
+
# testing_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
|
15
|
+
# results = estimator.predict(testing_kernel_matrix)
|
16
|
+
#
|
17
|
+
# *Reference*
|
18
|
+
# 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
19
|
+
class KernelSVC
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::Classifier
|
22
|
+
|
23
|
+
# @!visibility private
|
24
|
+
DEFAULT_PARAMS = {
|
25
|
+
reg_param: 1.0,
|
26
|
+
max_iter: 1000,
|
27
|
+
random_seed: nil
|
28
|
+
}.freeze
|
29
|
+
|
30
|
+
# Return the weight vector for Kernel SVC.
|
31
|
+
# @return [NMatrix] (shape: [1, n_trainig_sample])
|
32
|
+
attr_reader :weight_vec
|
33
|
+
|
34
|
+
# Return the random generator for performing random sampling in the Pegasos algorithm.
|
35
|
+
# @return [Random]
|
36
|
+
attr_reader :rng
|
37
|
+
|
38
|
+
# Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
|
39
|
+
#
|
40
|
+
# @overload new(reg_param: 1.0, max_iter: 1000, random_seed: 1) -> KernelSVC
|
41
|
+
#
|
42
|
+
# @param params [Hash] The parameters for Kernel SVC.
|
43
|
+
# @option params [Float] :reg_param (1.0) The regularization parameter.
|
44
|
+
# @option params [Integer] :max_iter (1000) The maximum number of iterations.
|
45
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
46
|
+
def initialize(params = {})
|
47
|
+
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
48
|
+
self.params[:random_seed] ||= srand
|
49
|
+
@weight_vec = nil
|
50
|
+
@rng = Random.new(self.params[:random_seed])
|
51
|
+
end
|
52
|
+
|
53
|
+
# Fit the model with given training data.
|
54
|
+
#
|
55
|
+
# @param x [NMatrix] (shape: [n_training_samples, n_training_samples])
|
56
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
57
|
+
# @param y [NMatrix] (shape: [1, n_training_samples]) The labels to be used for fitting the model.
|
58
|
+
# @return [KernelSVC] The learned classifier itself.
|
59
|
+
def fit(x, y)
|
60
|
+
# Generate binary labels
|
61
|
+
negative_label = y.uniq.sort.shift
|
62
|
+
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
|
63
|
+
# Initialize some variables.
|
64
|
+
n_training_samples = x.shape[0]
|
65
|
+
rand_ids = []
|
66
|
+
weight_vec = NMatrix.zeros([1, n_training_samples])
|
67
|
+
# Start optimization.
|
68
|
+
params[:max_iter].times do |t|
|
69
|
+
# random sampling
|
70
|
+
rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
|
71
|
+
target_id = rand_ids.shift
|
72
|
+
# update the weight vector
|
73
|
+
func = (weight_vec * bin_y[target_id]).dot(x.row(target_id).transpose).to_f
|
74
|
+
func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
|
75
|
+
weight_vec[target_id] += 1.0 if func < 1.0
|
76
|
+
end
|
77
|
+
# Store the learned model.
|
78
|
+
@weight_vec = weight_vec * NMatrix.new([1, n_training_samples], bin_y)
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
# Calculate confidence scores for samples.
|
83
|
+
#
|
84
|
+
# @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
|
85
|
+
# The kernel matrix between testing samples and training samples to compute the scores.
|
86
|
+
# @return [NMatrix] (shape: [1, n_testing_samples]) Confidence score per sample.
|
87
|
+
def decision_function(x)
|
88
|
+
@weight_vec.dot(x.transpose)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Predict class labels for samples.
|
92
|
+
#
|
93
|
+
# @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
|
94
|
+
# The kernel matrix between testing samples and training samples to predict the labels.
|
95
|
+
# @return [NMatrix] (shape: [1, n_testing_samples]) Predicted class label per sample.
|
96
|
+
def predict(x)
|
97
|
+
decision_function(x).map { |v| v >= 0 ? 1 : -1 }
|
98
|
+
end
|
99
|
+
|
100
|
+
# Claculate the mean accuracy of the given testing data.
|
101
|
+
#
|
102
|
+
# @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
|
103
|
+
# The kernel matrix between testing samples and training samples.
|
104
|
+
# @param y [NMatrix] (shape: [1, n_testing_samples]) True labels for testing data.
|
105
|
+
# @return [Float] Mean accuracy
|
106
|
+
def score(x, y)
|
107
|
+
p = predict(x)
|
108
|
+
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
109
|
+
n_hits / y.size.to_f
|
110
|
+
end
|
111
|
+
|
112
|
+
# Dump marshal data.
|
113
|
+
# @return [Hash] The marshal data about KernelSVC.
|
114
|
+
def marshal_dump
|
115
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
|
116
|
+
end
|
117
|
+
|
118
|
+
# Load marshal data.
|
119
|
+
# @return [nil]
|
120
|
+
def marshal_load(obj)
|
121
|
+
self.params = obj[:params]
|
122
|
+
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
123
|
+
@rng = obj[:rng]
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -46,13 +46,14 @@ module SVMKit
|
|
46
46
|
#
|
47
47
|
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
48
48
|
#
|
49
|
-
# @param
|
50
|
-
# @
|
51
|
-
# @
|
49
|
+
# @param params [Hash] The parameters for Logistic Regression.
|
50
|
+
# @option params [Float] :reg_param (1.0) The regularization parameter.
|
51
|
+
# @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
|
52
|
+
# @option params [Float] :bias_scale (1.0) The scale of the bias term.
|
52
53
|
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
53
|
-
# @
|
54
|
-
# @
|
55
|
-
# @
|
54
|
+
# @option params [Integer] :max_iter (100) The maximum number of iterations.
|
55
|
+
# @option params [Integer] :batch_size (50) The size of the mini batches.
|
56
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
56
57
|
def initialize(params = {})
|
57
58
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
58
59
|
self.params[:random_seed] ||= srand
|
@@ -44,12 +44,13 @@ module SVMKit
|
|
44
44
|
#
|
45
45
|
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
|
46
46
|
#
|
47
|
-
# @param
|
48
|
-
# @
|
49
|
-
# @
|
50
|
-
# @
|
51
|
-
# @
|
52
|
-
# @
|
47
|
+
# @param params [Hash] The parameters for SVC.
|
48
|
+
# @option params [Float] :reg_param (1.0) The regularization parameter.
|
49
|
+
# @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
|
50
|
+
# @option params [Float] :bias_scale (1.0) The scale of the bias term.
|
51
|
+
# @option params [Integer] :max_iter (100) The maximum number of iterations.
|
52
|
+
# @option params [Integer] :batch_size (50) The size of the mini batches.
|
53
|
+
# @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
|
53
54
|
def initialize(params = {})
|
54
55
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
55
56
|
self.params[:random_seed] ||= srand
|
@@ -32,8 +32,9 @@ module SVMKit
|
|
32
32
|
# Create a new multi-label classifier with the one-vs-rest startegy.
|
33
33
|
#
|
34
34
|
# @overload new(estimator: base_estimator) -> OneVsRestClassifier
|
35
|
-
#
|
36
|
-
#
|
35
|
+
#
|
36
|
+
# @param params [Hash] The parameters for OneVsRestClassifier.
|
37
|
+
# @option params [Classifier] :estimator (nil) The (binary) classifier for construction a multi-label classifier.
|
37
38
|
def initialize(params = {})
|
38
39
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
39
40
|
@estimators = nil
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module SVMKit
|
2
|
+
# Module for calculating pairwise distances, similarities, and kernels.
|
3
|
+
module PairwiseMetric
|
4
|
+
class << self
|
5
|
+
# Calculate the pairwise euclidean distances between x and y.
|
6
|
+
#
|
7
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
8
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
9
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
10
|
+
def euclidean_distance(x, y = nil)
|
11
|
+
y = x if y.nil?
|
12
|
+
sum_x_vec = (x**2).sum(1)
|
13
|
+
sum_y_vec = (y**2).sum(1)
|
14
|
+
dot_xy_mat = x.dot(y.transpose)
|
15
|
+
distance_matrix = dot_xy_mat * -2.0 +
|
16
|
+
sum_x_vec.repeat(y.shape[0], 1) +
|
17
|
+
sum_y_vec.transpose.repeat(x.shape[0], 0)
|
18
|
+
distance_matrix.abs.sqrt
|
19
|
+
end
|
20
|
+
|
21
|
+
# Calculate the rbf kernel between x and y.
|
22
|
+
#
|
23
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
24
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
25
|
+
# @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
|
26
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
27
|
+
def rbf_kernel(x, y = nil, gamma = nil)
|
28
|
+
y = x if y.nil?
|
29
|
+
gamma ||= 1.0 / x.shape[1]
|
30
|
+
distance_matrix = euclidean_distance(x, y)
|
31
|
+
((distance_matrix**2) * -gamma).exp
|
32
|
+
end
|
33
|
+
|
34
|
+
# Calculate the linear kernel between x and y.
|
35
|
+
#
|
36
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
37
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
38
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
39
|
+
def linear_kernel(x, y = nil)
|
40
|
+
y = x if y.nil?
|
41
|
+
x.dot(y.transpose)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Calculate the polynomial kernel between x and y.
|
45
|
+
#
|
46
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
47
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
48
|
+
# @param degree [Integer] The parameter of polynomial kernel.
|
49
|
+
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
50
|
+
# @param coef [Integer] The parameter of polynomial kernel.
|
51
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
52
|
+
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
53
|
+
y = x if y.nil?
|
54
|
+
gamma ||= 1.0 / x.shape[1]
|
55
|
+
(x.dot(y.transpose) * gamma + coef)**degree
|
56
|
+
end
|
57
|
+
|
58
|
+
# Calculate the sigmoid kernel between x and y.
|
59
|
+
#
|
60
|
+
# @param x [NMatrix] (shape: [n_samples_x, n_features])
|
61
|
+
# @param y [NMatrix] (shape: [n_samples_y, n_features])
|
62
|
+
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
63
|
+
# @param coef [Integer] The parameter of polynomial kernel.
|
64
|
+
# @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
65
|
+
def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
|
66
|
+
y = x if y.nil?
|
67
|
+
gamma ||= 1.0 / x.shape[1]
|
68
|
+
(x.dot(y.transpose) * gamma + coef).tanh
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -31,7 +31,8 @@ module SVMKit
|
|
31
31
|
#
|
32
32
|
# @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
|
33
33
|
#
|
34
|
-
# @param
|
34
|
+
# @param params [Hash] The parameters for MinMaxScaler.
|
35
|
+
# @option params [Array<Float>] :feature_range ([0.0, 1.0]) The desired range of samples.
|
35
36
|
def initialize(params = {})
|
36
37
|
@params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
37
38
|
@min_vec = nil
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,9 +93,11 @@ files:
|
|
93
93
|
- lib/svmkit/base/classifier.rb
|
94
94
|
- lib/svmkit/base/transformer.rb
|
95
95
|
- lib/svmkit/kernel_approximation/rbf.rb
|
96
|
+
- lib/svmkit/kernel_machine/kernel_svc.rb
|
96
97
|
- lib/svmkit/linear_model/logistic_regression.rb
|
97
98
|
- lib/svmkit/linear_model/pegasos_svc.rb
|
98
99
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
100
|
+
- lib/svmkit/pairwise_metric.rb
|
99
101
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|
100
102
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
101
103
|
- lib/svmkit/preprocessing/standard_scaler.rb
|