svmkit 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +8 -1
- data/README.md +6 -0
- data/lib/svmkit/base/base_estimator.rb +2 -1
- data/lib/svmkit/kernel_approximation/rbf.rb +37 -41
- data/lib/svmkit/linear_model/logistic_regression.rb +68 -66
- data/lib/svmkit/linear_model/pegasos_svc.rb +60 -56
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +26 -43
- data/lib/svmkit/preprocessing/l2_normalizer.rb +12 -17
- data/lib/svmkit/preprocessing/min_max_scaler.rb +25 -32
- data/lib/svmkit/preprocessing/standard_scaler.rb +24 -29
- data/lib/svmkit/utils.rb +4 -13
- data/lib/svmkit/version.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c2538f07704c57e4dc90caa85411d0795ac23763
|
4
|
+
data.tar.gz: 3815cf50ee5978cca24d944c6e7f577216dae0e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e448dd5f8fddb4a2a805b1dcddb7ab9c53d7c3db3460760b3165940d0ab93ae82ba1b0fec089e7a1d6651154b5f0437f3d4400531cc11017fd16f9e2029e2611
|
7
|
+
data.tar.gz: 1416d8c3ea1f55abd1fb269bdaf86f80faaa31be298d0ed1349f6b708d05e64545bf0ad4c3865c7ced26057441ff0999dd82ca77eae54209190527b87ba4ec27
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
# 0.1.2
|
2
|
+
- Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
|
3
|
+
- Rewrited the document with yard notation.
|
4
|
+
|
5
|
+
# 0.1.1
|
6
|
+
- Added class for Logistic Regression with SGD optimization.
|
7
|
+
- Fixed some mistakes on the document.
|
8
|
+
|
1
9
|
# 0.1.0
|
2
10
|
- Added basic classes.
|
3
11
|
- Added an utility module.
|
@@ -5,4 +13,3 @@
|
|
5
13
|
- Added class for Support Vector Machine with Pegasos alogrithm.
|
6
14
|
- Added class that performs mutlclass classification with one-vs.-rest strategy.
|
7
15
|
- Added classes for preprocessing such as min-max scaling, standardization, and L2 normalization.
|
8
|
-
|
data/README.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# SVMKit
|
2
2
|
|
3
|
+
[](https://travis-ci.org/yoshoku/SVMKit)
|
4
|
+
[](https://badge.fury.io/rb/svmkit)
|
5
|
+
[](https://github.com/yoshoku/SVMKit/blob/master/LICENSE.txt)
|
6
|
+
|
3
7
|
SVMKit is a library for machine learninig in Ruby.
|
4
8
|
SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
|
5
9
|
However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
|
@@ -23,6 +27,7 @@ Or install it yourself as:
|
|
23
27
|
## Usage
|
24
28
|
|
25
29
|
Training phase:
|
30
|
+
|
26
31
|
```ruby
|
27
32
|
require 'svmkit'
|
28
33
|
require 'libsvmloader'
|
@@ -46,6 +51,7 @@ File.open('trained_classifier.dat', 'wb') { |f| f.write(Marshal.dump(classifier)
|
|
46
51
|
```
|
47
52
|
|
48
53
|
Testing phase:
|
54
|
+
|
49
55
|
```ruby
|
50
56
|
require 'svmkit'
|
51
57
|
require 'libsvmloader'
|
@@ -6,40 +6,43 @@ module SVMKit
|
|
6
6
|
module KernelApproximation
|
7
7
|
# Class for RBF kernel feature mapping.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# transformer = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
|
10
11
|
# new_training_samples = transformer.fit_transform(training_samples)
|
11
12
|
# new_testing_samples = transformer.transform(testing_samples)
|
12
13
|
#
|
13
|
-
# *
|
14
|
-
#
|
14
|
+
# *Refernce*:
|
15
|
+
# 1. A. Rahimi and B. Recht, "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
|
15
16
|
class RBF
|
16
17
|
include Base::BaseEstimator
|
17
18
|
include Base::Transformer
|
18
19
|
|
19
|
-
|
20
|
+
# @!visibility private
|
21
|
+
DEFAULT_PARAMS = {
|
20
22
|
gamma: 1.0,
|
21
23
|
n_components: 128,
|
22
24
|
random_seed: nil
|
23
25
|
}.freeze
|
24
26
|
|
25
|
-
#
|
26
|
-
|
27
|
+
# Return the random matrix for transformation.
|
28
|
+
# @return [NMatrix] (shape: [n_features, n_components])
|
29
|
+
attr_reader :random_mat
|
27
30
|
|
28
|
-
#
|
29
|
-
|
31
|
+
# Return the random vector for transformation.
|
32
|
+
# @return [NMatrix] (shape: [1, n_components])
|
33
|
+
attr_reader :random_vec
|
30
34
|
|
31
|
-
#
|
32
|
-
|
35
|
+
# Return the random generator for transformation.
|
36
|
+
# @return [Random]
|
37
|
+
attr_reader :rng
|
33
38
|
|
34
|
-
#
|
39
|
+
# Create a new transformer for mapping to RBF kernel feature space.
|
35
40
|
#
|
36
|
-
#
|
37
|
-
# new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
|
41
|
+
# @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
|
38
42
|
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
# - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
|
43
|
+
# @param gamma [Float] (defaults to: 1.0) The parameter of RBF kernel: exp(-gamma * x^2).
|
44
|
+
# @param n_components [Integer] (defaults to: 128) The number of dimensions of the RBF kernel feature space.
|
45
|
+
# @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
|
43
46
|
def initialize(params = {})
|
44
47
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
45
48
|
self.params[:random_seed] ||= srand
|
@@ -50,13 +53,11 @@ module SVMKit
|
|
50
53
|
|
51
54
|
# Fit the model with given training data.
|
52
55
|
#
|
53
|
-
#
|
54
|
-
# fit(x) -> RBF
|
56
|
+
# @overload fit(x) -> RBF
|
55
57
|
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
#
|
59
|
-
# - The learned transformer itself.
|
58
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
59
|
+
# This method uses only the number of features of the data.
|
60
|
+
# @return [RBF] The learned transformer itself.
|
60
61
|
def fit(x, _y = nil)
|
61
62
|
n_features = x.shape[1]
|
62
63
|
params[:n_components] = 2 * n_features if params[:n_components] <= 0
|
@@ -65,48 +66,43 @@ module SVMKit
|
|
65
66
|
@random_vec = NMatrix.zeros([1, params[:n_components] - n_half_components]).hconcat(
|
66
67
|
NMatrix.ones([1, n_half_components]) * (0.5 * Math::PI)
|
67
68
|
)
|
68
|
-
#@random_vec = rand_uniform([1, self.params[:n_components]]) * (2.0 * Math::PI)
|
69
69
|
self
|
70
70
|
end
|
71
71
|
|
72
72
|
# Fit the model with training data, and then transform them with the learned model.
|
73
73
|
#
|
74
|
-
#
|
75
|
-
# fit_transform(x) -> NMatrix
|
74
|
+
# @overload fit_transform(x) -> NMatrix
|
76
75
|
#
|
77
|
-
#
|
78
|
-
#
|
79
|
-
# * *Returns* :
|
80
|
-
# - The transformed data (NMatrix, shape: [n_samples, n_components]).
|
76
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
77
|
+
# @return [NMatrix] (shape: [n_samples, n_components]) The transformed data
|
81
78
|
def fit_transform(x, _y = nil)
|
82
79
|
fit(x).transform(x)
|
83
80
|
end
|
84
81
|
|
85
82
|
# Transform the given data with the learned model.
|
86
83
|
#
|
87
|
-
#
|
88
|
-
# transform(x) -> NMatrix
|
84
|
+
# @overload transform(x) -> NMatrix
|
89
85
|
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
# * *Returns* :
|
93
|
-
# - The transformed data (NMatrix, shape: [n_samples, n_components]).
|
86
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
87
|
+
# @return [NMatrix] (shape: [n_samples, n_components]) The transformed data.
|
94
88
|
def transform(x)
|
95
89
|
n_samples, = x.shape
|
96
90
|
projection = x.dot(@random_mat) + @random_vec.repeat(n_samples, 0)
|
97
91
|
projection.sin * ((2.0 / params[:n_components])**0.5)
|
98
92
|
end
|
99
93
|
|
100
|
-
#
|
101
|
-
|
94
|
+
# Dump marshal data.
|
95
|
+
# @return [Hash] The marshal data about RBF.
|
96
|
+
def marshal_dump
|
102
97
|
{ params: params,
|
103
98
|
random_mat: Utils.dump_nmatrix(@random_mat),
|
104
99
|
random_vec: Utils.dump_nmatrix(@random_vec),
|
105
100
|
rng: @rng }
|
106
101
|
end
|
107
102
|
|
108
|
-
#
|
109
|
-
|
103
|
+
# Load marshal data.
|
104
|
+
# @return [nil]
|
105
|
+
def marshal_load(obj)
|
110
106
|
self.params = obj[:params]
|
111
107
|
@random_mat = Utils.restore_nmatrix(obj[:random_mat])
|
112
108
|
@random_vec = Utils.restore_nmatrix(obj[:random_vec])
|
@@ -117,13 +113,13 @@ module SVMKit
|
|
117
113
|
protected
|
118
114
|
|
119
115
|
# Generate the uniform random matrix with the given shape.
|
120
|
-
def rand_uniform(shape)
|
116
|
+
def rand_uniform(shape)
|
121
117
|
rnd_vals = Array.new(NMatrix.size(shape)) { @rng.rand }
|
122
118
|
NMatrix.new(shape, rnd_vals, dtype: :float64, stype: :dense)
|
123
119
|
end
|
124
120
|
|
125
121
|
# Generate the normal random matrix with the given shape, mean, and standard deviation.
|
126
|
-
def rand_normal(shape, mu = 0.0, sigma = 1.0)
|
122
|
+
def rand_normal(shape, mu = 0.0, sigma = 1.0)
|
127
123
|
a = rand_uniform(shape)
|
128
124
|
b = rand_uniform(shape)
|
129
125
|
((a.log * -2.0).sqrt * (b * 2.0 * Math::PI).sin) * sigma + mu
|
@@ -4,69 +4,80 @@ require 'svmkit/base/classifier'
|
|
4
4
|
module SVMKit
|
5
5
|
# This module consists of the classes that implement generalized linear models.
|
6
6
|
module LinearModel
|
7
|
-
# LogisticRegression is a class that implements Logistic Regression
|
8
|
-
#
|
7
|
+
# LogisticRegression is a class that implements Logistic Regression
|
8
|
+
# with stochastic gradient descent (SGD) optimization.
|
9
|
+
# Note that the class performs as a binary classifier.
|
9
10
|
#
|
11
|
+
# @example
|
10
12
|
# estimator =
|
11
13
|
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
12
14
|
# estimator.fit(training_samples, traininig_labels)
|
13
15
|
# results = estimator.predict(testing_samples)
|
14
16
|
#
|
15
|
-
# *
|
16
|
-
#
|
17
|
-
#
|
17
|
+
# *Reference*
|
18
|
+
# 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
18
19
|
class LogisticRegression
|
19
20
|
include Base::BaseEstimator
|
20
21
|
include Base::Classifier
|
21
22
|
|
22
|
-
|
23
|
+
# @!visibility private
|
24
|
+
DEFAULT_PARAMS = {
|
23
25
|
reg_param: 1.0,
|
26
|
+
fit_bias: false,
|
27
|
+
bias_scale: 1.0,
|
24
28
|
max_iter: 100,
|
25
29
|
batch_size: 50,
|
26
30
|
random_seed: nil
|
27
31
|
}.freeze
|
28
32
|
|
29
|
-
#
|
33
|
+
# Return the weight vector for Logistic Regression.
|
34
|
+
# @return [NMatrix] (shape: [1, n_features])
|
30
35
|
attr_reader :weight_vec
|
31
36
|
|
32
|
-
#
|
37
|
+
# Return the bias term (a.k.a. intercept) for Logistic Regression.
|
38
|
+
# @return [Float]
|
39
|
+
attr_reader :bias_term
|
40
|
+
|
41
|
+
# Return the random generator for transformation.
|
42
|
+
# @return [Random]
|
33
43
|
attr_reader :rng
|
34
44
|
|
35
45
|
# Create a new classifier with Logisitc Regression by the SGD optimization.
|
36
46
|
#
|
37
|
-
# :
|
38
|
-
# new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
47
|
+
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
39
48
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
49
|
+
# @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
|
50
|
+
# @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
|
51
|
+
# @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
|
52
|
+
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
53
|
+
# @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
|
54
|
+
# @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
|
55
|
+
# @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
|
45
56
|
def initialize(params = {})
|
46
57
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
47
58
|
self.params[:random_seed] ||= srand
|
48
59
|
@weight_vec = nil
|
60
|
+
@bias_term = 0.0
|
49
61
|
@rng = Random.new(self.params[:random_seed])
|
50
62
|
end
|
51
63
|
|
52
64
|
# Fit the model with given training data.
|
53
65
|
#
|
54
|
-
# :
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
59
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
|
60
|
-
# * *Returns* :
|
61
|
-
# - The learned classifier itself.
|
66
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
67
|
+
# @param y [NMatrix] (shape: [1, n_samples]) The categorical variables (e.g. labels)
|
68
|
+
# to be used for fitting the model.
|
69
|
+
# @return [LogisticRegression] The learned classifier itself.
|
62
70
|
def fit(x, y)
|
63
|
-
# Generate binary labels
|
71
|
+
# Generate binary labels.
|
64
72
|
negative_label = y.uniq.sort.shift
|
65
73
|
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
|
74
|
+
# Expand feature vectors for bias term.
|
75
|
+
samples = x
|
76
|
+
samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
|
66
77
|
# Initialize some variables.
|
67
|
-
n_samples, n_features =
|
78
|
+
n_samples, n_features = samples.shape
|
68
79
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
69
|
-
|
80
|
+
weight_vec = NMatrix.zeros([1, n_features])
|
70
81
|
# Start optimization.
|
71
82
|
params[:max_iter].times do |t|
|
72
83
|
# random sampling
|
@@ -76,84 +87,75 @@ module SVMKit
|
|
76
87
|
eta = 1.0 / (params[:reg_param] * (t + 1))
|
77
88
|
mean_vec = NMatrix.zeros([1, n_features])
|
78
89
|
subset_ids.each do |n|
|
79
|
-
z =
|
90
|
+
z = weight_vec.dot(samples.row(n).transpose)[0]
|
80
91
|
coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
|
81
|
-
mean_vec +=
|
92
|
+
mean_vec += samples.row(n) * coef
|
82
93
|
end
|
83
94
|
mean_vec *= eta / params[:batch_size]
|
84
|
-
|
95
|
+
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
85
96
|
# scale the weight vector.
|
86
|
-
scaler = (1.0 / params[:reg_param]**0.5) /
|
87
|
-
|
97
|
+
scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
|
98
|
+
weight_vec *= [1.0, scaler].min
|
99
|
+
end
|
100
|
+
# Store the learned model.
|
101
|
+
if params[:fit_bias]
|
102
|
+
@weight_vec = weight_vec[0...n_features - 1]
|
103
|
+
@bias_term = weight_vec[n_features - 1]
|
104
|
+
else
|
105
|
+
@weight_vec = weight_vec[0...n_features]
|
106
|
+
@bias_term = 0.0
|
88
107
|
end
|
89
108
|
self
|
90
109
|
end
|
91
110
|
|
92
111
|
# Calculate confidence scores for samples.
|
93
112
|
#
|
94
|
-
# :
|
95
|
-
#
|
96
|
-
#
|
97
|
-
# * *Arguments* :
|
98
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
99
|
-
# * *Returns* :
|
100
|
-
# - Confidence score per sample.
|
113
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
|
114
|
+
# @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
|
101
115
|
def decision_function(x)
|
102
|
-
w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
|
116
|
+
w = ((@weight_vec.dot(x.transpose) + @bias_term) * -1.0).exp + 1.0
|
103
117
|
w.map { |v| 1.0 / v }
|
104
118
|
end
|
105
119
|
|
106
120
|
# Predict class labels for samples.
|
107
121
|
#
|
108
|
-
# :
|
109
|
-
#
|
110
|
-
#
|
111
|
-
# * *Arguments* :
|
112
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
113
|
-
# * *Returns* :
|
114
|
-
# - Predicted class label per sample.
|
122
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
|
123
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
|
115
124
|
def predict(x)
|
116
125
|
decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
|
117
126
|
end
|
118
127
|
|
119
128
|
# Predict probability for samples.
|
120
129
|
#
|
121
|
-
# :
|
122
|
-
#
|
123
|
-
#
|
124
|
-
# * *Arguments* :
|
125
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
|
126
|
-
# * *Returns* :
|
127
|
-
# - Predicted probability per sample.
|
130
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
131
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted probability per sample.
|
128
132
|
def predict_proba(x)
|
129
133
|
decision_function(x)
|
130
134
|
end
|
131
135
|
|
132
136
|
# Claculate the mean accuracy of the given testing data.
|
133
137
|
#
|
134
|
-
# :
|
135
|
-
#
|
136
|
-
#
|
137
|
-
# * *Arguments* :
|
138
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
139
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
140
|
-
# * *Returns* :
|
141
|
-
# - Mean accuracy
|
138
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
|
139
|
+
# @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
|
140
|
+
# @return [Float] Mean accuracy
|
142
141
|
def score(x, y)
|
143
142
|
p = predict(x)
|
144
143
|
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
145
144
|
n_hits / y.size.to_f
|
146
145
|
end
|
147
146
|
|
148
|
-
#
|
149
|
-
|
150
|
-
|
147
|
+
# Dump marshal data.
|
148
|
+
# @return [Hash] The marshal data about LogisticRegression.
|
149
|
+
def marshal_dump
|
150
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
|
151
151
|
end
|
152
152
|
|
153
|
-
#
|
154
|
-
|
153
|
+
# Load marshal data.
|
154
|
+
# @return [nil]
|
155
|
+
def marshal_load(obj)
|
155
156
|
self.params = obj[:params]
|
156
157
|
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
158
|
+
@bias_term = obj[:bias_term]
|
157
159
|
@rng = obj[:rng]
|
158
160
|
nil
|
159
161
|
end
|
@@ -6,140 +6,144 @@ module SVMKit
|
|
6
6
|
module LinearModel
|
7
7
|
# PegasosSVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# estimator =
|
10
11
|
# SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
11
12
|
# estimator.fit(training_samples, traininig_labels)
|
12
13
|
# results = estimator.predict(testing_samples)
|
13
14
|
#
|
14
|
-
# *
|
15
|
-
#
|
16
|
-
#
|
15
|
+
# *Reference*
|
16
|
+
# 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
17
17
|
class PegasosSVC
|
18
18
|
include Base::BaseEstimator
|
19
19
|
include Base::Classifier
|
20
20
|
|
21
|
-
|
21
|
+
# @!visibility private
|
22
|
+
DEFAULT_PARAMS = {
|
22
23
|
reg_param: 1.0,
|
24
|
+
fit_bias: false,
|
25
|
+
bias_scale: 1.0,
|
23
26
|
max_iter: 100,
|
24
27
|
batch_size: 50,
|
25
28
|
random_seed: nil
|
26
29
|
}.freeze
|
27
30
|
|
28
|
-
#
|
31
|
+
# Return the weight vector for SVC.
|
32
|
+
# @return [NMatrix] (shape: [1, n_features])
|
29
33
|
attr_reader :weight_vec
|
30
34
|
|
31
|
-
#
|
35
|
+
# Return the bias term (a.k.a. intercept) for SVC.
|
36
|
+
# @return [Float]
|
37
|
+
attr_reader :bias_term
|
38
|
+
|
39
|
+
# Return the random generator for performing random sampling in the Pegasos algorithm.
|
40
|
+
# @return [Random]
|
32
41
|
attr_reader :rng
|
33
42
|
|
34
43
|
# Create a new classifier with Support Vector Machine by the Pegasos algorithm.
|
35
44
|
#
|
36
|
-
# :
|
37
|
-
# new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
|
45
|
+
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
|
38
46
|
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
47
|
+
# @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
|
48
|
+
# @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
|
49
|
+
# @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
|
50
|
+
# @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
|
51
|
+
# @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
|
52
|
+
# @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
|
44
53
|
def initialize(params = {})
|
45
54
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
46
55
|
self.params[:random_seed] ||= srand
|
47
56
|
@weight_vec = nil
|
57
|
+
@bias_term = 0.0
|
48
58
|
@rng = Random.new(self.params[:random_seed])
|
49
59
|
end
|
50
60
|
|
51
61
|
# Fit the model with given training data.
|
52
62
|
#
|
53
|
-
# :
|
54
|
-
#
|
55
|
-
#
|
56
|
-
# * *Arguments* :
|
57
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
58
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
|
59
|
-
# * *Returns* :
|
60
|
-
# - The learned classifier itself.
|
63
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
64
|
+
# @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
|
65
|
+
# @return [PegasosSVC] The learned classifier itself.
|
61
66
|
def fit(x, y)
|
62
67
|
# Generate binary labels
|
63
68
|
negative_label = y.uniq.sort.shift
|
64
69
|
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
|
70
|
+
# Expand feature vectors for bias term.
|
71
|
+
samples = x
|
72
|
+
samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
|
65
73
|
# Initialize some variables.
|
66
|
-
n_samples, n_features =
|
74
|
+
n_samples, n_features = samples.shape
|
67
75
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
68
|
-
|
76
|
+
weight_vec = NMatrix.zeros([1, n_features])
|
69
77
|
# Start optimization.
|
70
78
|
params[:max_iter].times do |t|
|
71
79
|
# random sampling
|
72
80
|
subset_ids = rand_ids.shift(params[:batch_size])
|
73
81
|
rand_ids.concat(subset_ids)
|
74
82
|
target_ids = subset_ids.map do |n|
|
75
|
-
n if
|
83
|
+
n if weight_vec.dot(samples.row(n).transpose) * bin_y[n] < 1
|
76
84
|
end
|
77
85
|
n_subsamples = target_ids.size
|
78
86
|
next if n_subsamples.zero?
|
79
87
|
# update the weight vector.
|
80
88
|
eta = 1.0 / (params[:reg_param] * (t + 1))
|
81
89
|
mean_vec = NMatrix.zeros([1, n_features])
|
82
|
-
target_ids.each { |n| mean_vec +=
|
90
|
+
target_ids.each { |n| mean_vec += samples.row(n) * bin_y[n] }
|
83
91
|
mean_vec *= eta / n_subsamples
|
84
|
-
|
92
|
+
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
85
93
|
# scale the weight vector.
|
86
|
-
scaler = (1.0 / params[:reg_param]**0.5) /
|
87
|
-
|
94
|
+
scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
|
95
|
+
weight_vec *= [1.0, scaler].min
|
96
|
+
end
|
97
|
+
# Store the learned model.
|
98
|
+
if params[:fit_bias]
|
99
|
+
@weight_vec = weight_vec[0...n_features - 1]
|
100
|
+
@bias_term = weight_vec[n_features - 1]
|
101
|
+
else
|
102
|
+
@weight_vec = weight_vec[0...n_features]
|
103
|
+
@bias_term = 0.0
|
88
104
|
end
|
89
105
|
self
|
90
106
|
end
|
91
107
|
|
92
108
|
# Calculate confidence scores for samples.
|
93
109
|
#
|
94
|
-
# :
|
95
|
-
#
|
96
|
-
#
|
97
|
-
# * *Arguments* :
|
98
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
99
|
-
# * *Returns* :
|
100
|
-
# - Confidence score per sample.
|
110
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
|
111
|
+
# @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
|
101
112
|
def decision_function(x)
|
102
|
-
@weight_vec.dot(x.transpose)
|
113
|
+
@weight_vec.dot(x.transpose) + @bias_term
|
103
114
|
end
|
104
115
|
|
105
116
|
# Predict class labels for samples.
|
106
117
|
#
|
107
|
-
# :
|
108
|
-
#
|
109
|
-
#
|
110
|
-
# * *Arguments* :
|
111
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
112
|
-
# * *Returns* :
|
113
|
-
# - Predicted class label per sample.
|
118
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
|
119
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
|
114
120
|
def predict(x)
|
115
121
|
decision_function(x).map { |v| v >= 0 ? 1 : -1 }
|
116
122
|
end
|
117
123
|
|
118
124
|
# Claculate the mean accuracy of the given testing data.
|
119
125
|
#
|
120
|
-
# :
|
121
|
-
#
|
122
|
-
#
|
123
|
-
# * *Arguments* :
|
124
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
125
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
126
|
-
# * *Returns* :
|
127
|
-
# - Mean accuracy
|
126
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
|
127
|
+
# @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
|
128
|
+
# @return [Float] Mean accuracy
|
128
129
|
def score(x, y)
|
129
130
|
p = predict(x)
|
130
131
|
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
131
132
|
n_hits / y.size.to_f
|
132
133
|
end
|
133
134
|
|
134
|
-
#
|
135
|
-
|
136
|
-
|
135
|
+
# Dump marshal data.
|
136
|
+
# @return [Hash] The marshal data about PegasosSVC.
|
137
|
+
def marshal_dump
|
138
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
|
137
139
|
end
|
138
140
|
|
139
|
-
#
|
140
|
-
|
141
|
+
# Load marshal data.
|
142
|
+
# @return [nil]
|
143
|
+
def marshal_load(obj)
|
141
144
|
self.params = obj[:params]
|
142
145
|
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
146
|
+
@bias_term = obj[:bias_term]
|
143
147
|
@rng = obj[:rng]
|
144
148
|
nil
|
145
149
|
end
|
@@ -6,33 +6,34 @@ module SVMKit
|
|
6
6
|
module Multiclass
|
7
7
|
# OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# base_estimator =
|
10
11
|
# SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
11
12
|
# estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator)
|
12
13
|
# estimator.fit(training_samples, training_labels)
|
13
14
|
# results = estimator.predict(testing_samples)
|
14
|
-
#
|
15
15
|
class OneVsRestClassifier
|
16
16
|
include Base::BaseEstimator
|
17
17
|
include Base::Classifier
|
18
18
|
|
19
|
-
|
19
|
+
# @!visibility private
|
20
|
+
DEFAULT_PARAMS = {
|
20
21
|
estimator: nil
|
21
22
|
}.freeze
|
22
23
|
|
23
|
-
#
|
24
|
+
# Return the set of estimators.
|
25
|
+
# @return [Array<Classifier>]
|
24
26
|
attr_reader :estimators
|
25
27
|
|
26
|
-
#
|
28
|
+
# Return the class labels.
|
29
|
+
# @return [NMatrix] (shape: [1, n_classes])
|
27
30
|
attr_reader :classes
|
28
31
|
|
29
32
|
# Create a new multi-label classifier with the one-vs-rest startegy.
|
30
33
|
#
|
31
|
-
# :
|
32
|
-
#
|
33
|
-
#
|
34
|
-
# * *Arguments* :
|
35
|
-
# - +:estimator+ (Classifier) (defaults to: nil) -- The (binary) classifier for construction a multi-label classifier.
|
34
|
+
# @overload new(estimator: base_estimator) -> OneVsRestClassifier
|
35
|
+
# @param estimator [Classifier] (defaults to: nil)
|
36
|
+
# The (binary) classifier for construction a multi-label classifier.
|
36
37
|
def initialize(params = {})
|
37
38
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
38
39
|
@estimators = nil
|
@@ -41,14 +42,9 @@ module SVMKit
|
|
41
42
|
|
42
43
|
# Fit the model with given training data.
|
43
44
|
#
|
44
|
-
# :
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# * *Arguments* :
|
48
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
49
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
|
50
|
-
# * *Returns* :
|
51
|
-
# - The learned classifier itself.
|
45
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
|
47
|
+
# @return [OneVsRestClassifier] The learned classifier itself.
|
52
48
|
def fit(x, y)
|
53
49
|
@classes = y.uniq.sort
|
54
50
|
@estimators = @classes.map do |label|
|
@@ -60,13 +56,8 @@ module SVMKit
|
|
60
56
|
|
61
57
|
# Calculate confidence scores for samples.
|
62
58
|
#
|
63
|
-
# :
|
64
|
-
#
|
65
|
-
#
|
66
|
-
# * *Arguments* :
|
67
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
68
|
-
# * *Returns* :
|
69
|
-
# - Confidence scores per sample for each class.
|
59
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
|
60
|
+
# @return [NMatrix] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
70
61
|
def decision_function(x)
|
71
62
|
n_samples, = x.shape
|
72
63
|
n_classes = @classes.size
|
@@ -78,13 +69,8 @@ module SVMKit
|
|
78
69
|
|
79
70
|
# Predict class labels for samples.
|
80
71
|
#
|
81
|
-
# :
|
82
|
-
#
|
83
|
-
#
|
84
|
-
# * *Arguments* :
|
85
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
86
|
-
# * *Returns* :
|
87
|
-
# - Predicted class label per sample.
|
72
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
|
73
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
|
88
74
|
def predict(x)
|
89
75
|
n_samples, = x.shape
|
90
76
|
decision_values = decision_function(x)
|
@@ -94,29 +80,26 @@ module SVMKit
|
|
94
80
|
|
95
81
|
# Claculate the mean accuracy of the given testing data.
|
96
82
|
#
|
97
|
-
# :
|
98
|
-
#
|
99
|
-
#
|
100
|
-
# * *Arguments* :
|
101
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
102
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
103
|
-
# * *Returns* :
|
104
|
-
# - Mean accuracy
|
83
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
|
84
|
+
# @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
|
85
|
+
# @return [Float] Mean accuracy
|
105
86
|
def score(x, y)
|
106
87
|
p = predict(x)
|
107
88
|
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
108
89
|
n_hits / y.size.to_f
|
109
90
|
end
|
110
91
|
|
111
|
-
#
|
112
|
-
|
92
|
+
# Dump marshal data.
|
93
|
+
# @return [Hash] The marshal data about OneVsRestClassifier.
|
94
|
+
def marshal_dump
|
113
95
|
{ params: params,
|
114
96
|
classes: @classes,
|
115
97
|
estimators: @estimators.map { |e| Marshal.dump(e) } }
|
116
98
|
end
|
117
99
|
|
118
|
-
#
|
119
|
-
|
100
|
+
# Load marshal data.
|
101
|
+
# @return [nil]
|
102
|
+
def marshal_load(obj)
|
120
103
|
self.params = obj[:params]
|
121
104
|
@classes = obj[:classes]
|
122
105
|
@estimators = obj[:estimators].map { |e| Marshal.load(e) }
|
@@ -6,32 +6,30 @@ module SVMKit
|
|
6
6
|
module Preprocessing
|
7
7
|
# Normalize samples to unit L2-norm.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# normalizer = SVMKit::Preprocessing::StandardScaler.new
|
10
11
|
# new_samples = normalizer.fit_transform(samples)
|
11
12
|
class L2Normalizer
|
12
13
|
include Base::BaseEstimator
|
13
14
|
include Base::Transformer
|
14
15
|
|
15
|
-
#
|
16
|
+
# Return the vector consists of L2-norm for each sample.
|
17
|
+
# @return [NMatrix] (shape: [1, n_samples])
|
16
18
|
attr_reader :norm_vec # :nodoc:
|
17
19
|
|
18
20
|
# Create a new normalizer for normaliing to unit L2-norm.
|
19
21
|
#
|
20
|
-
#
|
21
|
-
# new() -> L2Normalizer
|
22
|
+
# @overload new() -> L2Normalizer
|
22
23
|
def initialize(_params = {})
|
23
24
|
@norm_vec = nil
|
24
25
|
end
|
25
26
|
|
26
|
-
# Calculate L2
|
27
|
+
# Calculate L2-norms of each sample.
|
27
28
|
#
|
28
|
-
#
|
29
|
-
# fit(x) -> L2Normalizer
|
29
|
+
# @overload fit(x) -> L2Normalizer
|
30
30
|
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
# * *Returns* :
|
34
|
-
# - L2Normalizer
|
31
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
32
|
+
# @return [L2Normalizer]
|
35
33
|
def fit(x, _y = nil)
|
36
34
|
n_samples, = x.shape
|
37
35
|
@norm_vec = NMatrix.new([1, n_samples],
|
@@ -39,15 +37,12 @@ module SVMKit
|
|
39
37
|
self
|
40
38
|
end
|
41
39
|
|
42
|
-
# Calculate L2
|
40
|
+
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
43
41
|
#
|
44
|
-
#
|
45
|
-
# fit_transform(x) -> NMatrix
|
42
|
+
# @overload fit_transform(x) -> NMatrix
|
46
43
|
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
# * *Returns* :
|
50
|
-
# - The normalized samples (NMatrix)
|
44
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
45
|
+
# @return [NMatrix] The normalized samples.
|
51
46
|
def fit_transform(x, _y = nil)
|
52
47
|
fit(x)
|
53
48
|
x / @norm_vec.transpose.repeat(x.shape[1], 1)
|
@@ -6,6 +6,7 @@ module SVMKit
|
|
6
6
|
module Preprocessing
|
7
7
|
# Normalize samples by scaling each feature to a given range.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# normalizer = SVMKit::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
|
10
11
|
# new_training_samples = normalizer.fit_transform(training_samples)
|
11
12
|
# new_testing_samples = normalizer.transform(testing_samples)
|
@@ -13,23 +14,24 @@ module SVMKit
|
|
13
14
|
include Base::BaseEstimator
|
14
15
|
include Base::Transformer
|
15
16
|
|
16
|
-
|
17
|
+
# @!visibility private
|
18
|
+
DEFAULT_PARAMS = {
|
17
19
|
feature_range: [0.0, 1.0]
|
18
20
|
}.freeze
|
19
21
|
|
20
|
-
#
|
21
|
-
|
22
|
+
# Return the vector consists of the minimum value for each feature.
|
23
|
+
# @return [NMatrix] (shape: [1, n_features])
|
24
|
+
attr_reader :min_vec
|
22
25
|
|
23
|
-
#
|
24
|
-
|
26
|
+
# Return the vector consists of the maximum value for each feature.
|
27
|
+
# @return [NMatrix] (shape: [1, n_features])
|
28
|
+
attr_reader :max_vec
|
25
29
|
|
26
30
|
# Creates a new normalizer for scaling each feature to a given range.
|
27
31
|
#
|
28
|
-
#
|
29
|
-
# new(feature_range: [0.0, 1.0]) -> MinMaxScaler
|
32
|
+
# @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
|
30
33
|
#
|
31
|
-
#
|
32
|
-
# - +:feature_range+ (Array) (defaults to: [0.0, 1.0]) -- The desired range of samples.
|
34
|
+
# @param feature_range [Array] (defaults to: [0.0, 1.0]) The desired range of samples.
|
33
35
|
def initialize(params = {})
|
34
36
|
@params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
35
37
|
@min_vec = nil
|
@@ -38,13 +40,10 @@ module SVMKit
|
|
38
40
|
|
39
41
|
# Calculate the minimum and maximum value of each feature for scaling.
|
40
42
|
#
|
41
|
-
#
|
42
|
-
# fit(x) -> MinMaxScaler
|
43
|
+
# @overload fit(x) -> MinMaxScaler
|
43
44
|
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# * *Returns* :
|
47
|
-
# - MinMaxScaler
|
45
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
46
|
+
# @return [MinMaxScaler]
|
48
47
|
def fit(x, _y = nil)
|
49
48
|
@min_vec = x.min(0)
|
50
49
|
@max_vec = x.max(0)
|
@@ -53,26 +52,18 @@ module SVMKit
|
|
53
52
|
|
54
53
|
# Calculate the minimum and maximum values, and then normalize samples to feature_range.
|
55
54
|
#
|
56
|
-
#
|
57
|
-
# fit_transform(x) -> NMatrix
|
55
|
+
# @overload fit_transform(x) -> NMatrix
|
58
56
|
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
# * *Returns* :
|
62
|
-
# - The scaled samples (NMatrix)
|
57
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
58
|
+
# @return [NMatrix] The scaled samples.
|
63
59
|
def fit_transform(x, _y = nil)
|
64
60
|
fit(x).transform(x)
|
65
61
|
end
|
66
62
|
|
67
63
|
# Perform scaling the given samples according to feature_range.
|
68
64
|
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
# * *Arguments* :
|
73
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
|
74
|
-
# * *Returns* :
|
75
|
-
# - The scaled samples (NMatrix)
|
65
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
|
66
|
+
# @return [NMatrix] The scaled samples.
|
76
67
|
def transform(x)
|
77
68
|
n_samples, = x.shape
|
78
69
|
dif_vec = @max_vec - @min_vec
|
@@ -80,15 +71,17 @@ module SVMKit
|
|
80
71
|
nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
|
81
72
|
end
|
82
73
|
|
83
|
-
#
|
84
|
-
|
74
|
+
# Dump marshal data.
|
75
|
+
# @return [Hash] The marshal data about MinMaxScaler.
|
76
|
+
def marshal_dump
|
85
77
|
{ params: @params,
|
86
78
|
min_vec: Utils.dump_nmatrix(@min_vec),
|
87
79
|
max_vec: Utils.dump_nmatrix(@max_vec) }
|
88
80
|
end
|
89
81
|
|
90
|
-
#
|
91
|
-
|
82
|
+
# Load marshal data.
|
83
|
+
# @return [nil]
|
84
|
+
def marshal_load(obj)
|
92
85
|
@params = obj[:params]
|
93
86
|
@min_vec = Utils.restore_nmatrix(obj[:min_vec])
|
94
87
|
@max_vec = Utils.restore_nmatrix(obj[:max_vec])
|
@@ -6,6 +6,7 @@ module SVMKit
|
|
6
6
|
module Preprocessing
|
7
7
|
# Normalize samples by centering and scaling to unit variance.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# normalizer = SVMKit::Preprocessing::StandardScaler.new
|
10
11
|
# new_training_samples = normalizer.fit_transform(training_samples)
|
11
12
|
# new_testing_samples = normalizer.transform(testing_samples)
|
@@ -13,16 +14,17 @@ module SVMKit
|
|
13
14
|
include Base::BaseEstimator
|
14
15
|
include Base::Transformer
|
15
16
|
|
16
|
-
#
|
17
|
-
|
17
|
+
# Return the vector consists of the mean value for each feature.
|
18
|
+
# @return [NMatrix] (shape: [1, n_features])
|
19
|
+
attr_reader :mean_vec
|
18
20
|
|
19
|
-
#
|
20
|
-
|
21
|
+
# Return the vector consists of the standard deviation for each feature.
|
22
|
+
# @return [NMatrix] (shape: [1, n_features])
|
23
|
+
attr_reader :std_vec
|
21
24
|
|
22
25
|
# Create a new normalizer for centering and scaling to unit variance.
|
23
26
|
#
|
24
|
-
#
|
25
|
-
# new() -> StandardScaler
|
27
|
+
# @overload new() -> StandardScaler
|
26
28
|
def initialize(_params = {})
|
27
29
|
@mean_vec = nil
|
28
30
|
@std_vec = nil
|
@@ -30,13 +32,11 @@ module SVMKit
|
|
30
32
|
|
31
33
|
# Calculate the mean value and standard deviation of each feature for scaling.
|
32
34
|
#
|
33
|
-
#
|
34
|
-
# fit(x) -> StandardScaler
|
35
|
+
# @overload fit(x) -> StandardScaler
|
35
36
|
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
# - StandardScaler
|
37
|
+
# @param x [NMatrix] (shape: [n_samples, n_features])
|
38
|
+
# The samples to calculate the mean values and standard deviations.
|
39
|
+
# @return [StandardScaler]
|
40
40
|
def fit(x, _y = nil)
|
41
41
|
@mean_vec = x.mean(0)
|
42
42
|
@std_vec = x.std(0)
|
@@ -45,39 +45,34 @@ module SVMKit
|
|
45
45
|
|
46
46
|
# Calculate the mean values and standard deviations, and then normalize samples using them.
|
47
47
|
#
|
48
|
-
#
|
49
|
-
# fit_transform(x) -> NMatrix
|
48
|
+
# @overload fit_transform(x) -> NMatrix
|
50
49
|
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# - The scaled samples (NMatrix)
|
50
|
+
# @param x [NMatrix] (shape: [n_samples, n_features])
|
51
|
+
# The samples to calculate the mean values and standard deviations.
|
52
|
+
# @return [NMatrix] The scaled samples.
|
55
53
|
def fit_transform(x, _y = nil)
|
56
54
|
fit(x).transform(x)
|
57
55
|
end
|
58
56
|
|
59
57
|
# Perform standardization the given samples.
|
60
58
|
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
64
|
-
# * *Arguments* :
|
65
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
|
66
|
-
# * *Returns* :
|
67
|
-
# - The scaled samples (NMatrix)
|
59
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
|
60
|
+
# @return [NMatrix] The scaled samples.
|
68
61
|
def transform(x)
|
69
62
|
n_samples, = x.shape
|
70
63
|
(x - @mean_vec.repeat(n_samples, 0)) / @std_vec.repeat(n_samples, 0)
|
71
64
|
end
|
72
65
|
|
73
|
-
#
|
74
|
-
|
66
|
+
# Dump marshal data.
|
67
|
+
# @return [Hash] The marshal data about StandardScaler.
|
68
|
+
def marshal_dump
|
75
69
|
{ mean_vec: Utils.dump_nmatrix(@mean_vec),
|
76
70
|
std_vec: Utils.dump_nmatrix(@std_vec) }
|
77
71
|
end
|
78
72
|
|
79
|
-
#
|
80
|
-
|
73
|
+
# Load marshal data.
|
74
|
+
# @return [nil]
|
75
|
+
def marshal_load(obj)
|
81
76
|
@mean_vec = Utils.restore_nmatrix(obj[:mean_vec])
|
82
77
|
@std_vec = Utils.restore_nmatrix(obj[:std_vec])
|
83
78
|
nil
|
data/lib/svmkit/utils.rb
CHANGED
@@ -3,13 +3,9 @@ module SVMKit
|
|
3
3
|
module Utils
|
4
4
|
class << self
|
5
5
|
# Dump an NMatrix object converted to a Ruby Hash.
|
6
|
-
# # call-seq:
|
7
|
-
# dump_nmatrix(mat) -> Hash
|
8
6
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# * *Returns* :
|
12
|
-
# - A Ruby Hash containing matrix information.
|
7
|
+
# @param mat [NMatrix] An NMatrix object converted to a Ruby Hash.
|
8
|
+
# @return [Hash] A Ruby Hash containing matrix information.
|
13
9
|
def dump_nmatrix(mat)
|
14
10
|
return nil if mat.class != NMatrix
|
15
11
|
{ shape: mat.shape, array: mat.to_flat_a, dtype: mat.dtype, stype: mat.stype }
|
@@ -17,13 +13,8 @@ module SVMKit
|
|
17
13
|
|
18
14
|
# Return the results of converting the dumped data into an NMatrix object.
|
19
15
|
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
# * *Arguments* :
|
24
|
-
# - +dumpted_mat+ -- A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
|
25
|
-
# * *Returns* :
|
26
|
-
# - An NMatrix object restored from the given Hash.
|
16
|
+
# @param dmp [Hash] A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
|
17
|
+
# @return [NMatrix] An NMatrix object restored from the given Hash.
|
27
18
|
def restore_nmatrix(dmp = {})
|
28
19
|
return nil unless dmp.class == Hash && %i[shape array dtype stype].all?(&dmp.method(:has_key?))
|
29
20
|
NMatrix.new(dmp[:shape], dmp[:array], dtype: dmp[:dtype], stype: dmp[:stype])
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|