svmkit 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +8 -1
- data/README.md +6 -0
- data/lib/svmkit/base/base_estimator.rb +2 -1
- data/lib/svmkit/kernel_approximation/rbf.rb +37 -41
- data/lib/svmkit/linear_model/logistic_regression.rb +68 -66
- data/lib/svmkit/linear_model/pegasos_svc.rb +60 -56
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +26 -43
- data/lib/svmkit/preprocessing/l2_normalizer.rb +12 -17
- data/lib/svmkit/preprocessing/min_max_scaler.rb +25 -32
- data/lib/svmkit/preprocessing/standard_scaler.rb +24 -29
- data/lib/svmkit/utils.rb +4 -13
- data/lib/svmkit/version.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c2538f07704c57e4dc90caa85411d0795ac23763
|
4
|
+
data.tar.gz: 3815cf50ee5978cca24d944c6e7f577216dae0e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e448dd5f8fddb4a2a805b1dcddb7ab9c53d7c3db3460760b3165940d0ab93ae82ba1b0fec089e7a1d6651154b5f0437f3d4400531cc11017fd16f9e2029e2611
|
7
|
+
data.tar.gz: 1416d8c3ea1f55abd1fb269bdaf86f80faaa31be298d0ed1349f6b708d05e64545bf0ad4c3865c7ced26057441ff0999dd82ca77eae54209190527b87ba4ec27
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
# 0.1.2
|
2
|
+
- Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
|
3
|
+
- Rewrited the document with yard notation.
|
4
|
+
|
5
|
+
# 0.1.1
|
6
|
+
- Added class for Logistic Regression with SGD optimization.
|
7
|
+
- Fixed some mistakes on the document.
|
8
|
+
|
1
9
|
# 0.1.0
|
2
10
|
- Added basic classes.
|
3
11
|
- Added an utility module.
|
@@ -5,4 +13,3 @@
|
|
5
13
|
- Added class for Support Vector Machine with Pegasos alogrithm.
|
6
14
|
- Added class that performs mutlclass classification with one-vs.-rest strategy.
|
7
15
|
- Added classes for preprocessing such as min-max scaling, standardization, and L2 normalization.
|
8
|
-
|
data/README.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# SVMKit
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/yoshoku/SVMKit.svg?branch=master)](https://travis-ci.org/yoshoku/SVMKit)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/svmkit.svg)](https://badge.fury.io/rb/svmkit)
|
5
|
+
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/SVMKit/blob/master/LICENSE.txt)
|
6
|
+
|
3
7
|
SVMKit is a library for machine learninig in Ruby.
|
4
8
|
SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
|
5
9
|
However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
|
@@ -23,6 +27,7 @@ Or install it yourself as:
|
|
23
27
|
## Usage
|
24
28
|
|
25
29
|
Training phase:
|
30
|
+
|
26
31
|
```ruby
|
27
32
|
require 'svmkit'
|
28
33
|
require 'libsvmloader'
|
@@ -46,6 +51,7 @@ File.open('trained_classifier.dat', 'wb') { |f| f.write(Marshal.dump(classifier)
|
|
46
51
|
```
|
47
52
|
|
48
53
|
Testing phase:
|
54
|
+
|
49
55
|
```ruby
|
50
56
|
require 'svmkit'
|
51
57
|
require 'libsvmloader'
|
@@ -6,40 +6,43 @@ module SVMKit
|
|
6
6
|
module KernelApproximation
|
7
7
|
# Class for RBF kernel feature mapping.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# transformer = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
|
10
11
|
# new_training_samples = transformer.fit_transform(training_samples)
|
11
12
|
# new_testing_samples = transformer.transform(testing_samples)
|
12
13
|
#
|
13
|
-
# *
|
14
|
-
#
|
14
|
+
# *Refernce*:
|
15
|
+
# 1. A. Rahimi and B. Recht, "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
|
15
16
|
class RBF
|
16
17
|
include Base::BaseEstimator
|
17
18
|
include Base::Transformer
|
18
19
|
|
19
|
-
|
20
|
+
# @!visibility private
|
21
|
+
DEFAULT_PARAMS = {
|
20
22
|
gamma: 1.0,
|
21
23
|
n_components: 128,
|
22
24
|
random_seed: nil
|
23
25
|
}.freeze
|
24
26
|
|
25
|
-
#
|
26
|
-
|
27
|
+
# Return the random matrix for transformation.
|
28
|
+
# @return [NMatrix] (shape: [n_features, n_components])
|
29
|
+
attr_reader :random_mat
|
27
30
|
|
28
|
-
#
|
29
|
-
|
31
|
+
# Return the random vector for transformation.
|
32
|
+
# @return [NMatrix] (shape: [1, n_components])
|
33
|
+
attr_reader :random_vec
|
30
34
|
|
31
|
-
#
|
32
|
-
|
35
|
+
# Return the random generator for transformation.
|
36
|
+
# @return [Random]
|
37
|
+
attr_reader :rng
|
33
38
|
|
34
|
-
#
|
39
|
+
# Create a new transformer for mapping to RBF kernel feature space.
|
35
40
|
#
|
36
|
-
#
|
37
|
-
# new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
|
41
|
+
# @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
|
38
42
|
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
# - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
|
43
|
+
# @param gamma [Float] (defaults to: 1.0) The parameter of RBF kernel: exp(-gamma * x^2).
|
44
|
+
# @param n_components [Integer] (defaults to: 128) The number of dimensions of the RBF kernel feature space.
|
45
|
+
# @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
|
43
46
|
def initialize(params = {})
|
44
47
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
45
48
|
self.params[:random_seed] ||= srand
|
@@ -50,13 +53,11 @@ module SVMKit
|
|
50
53
|
|
51
54
|
# Fit the model with given training data.
|
52
55
|
#
|
53
|
-
#
|
54
|
-
# fit(x) -> RBF
|
56
|
+
# @overload fit(x) -> RBF
|
55
57
|
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
#
|
59
|
-
# - The learned transformer itself.
|
58
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
59
|
+
# This method uses only the number of features of the data.
|
60
|
+
# @return [RBF] The learned transformer itself.
|
60
61
|
def fit(x, _y = nil)
|
61
62
|
n_features = x.shape[1]
|
62
63
|
params[:n_components] = 2 * n_features if params[:n_components] <= 0
|
@@ -65,48 +66,43 @@ module SVMKit
|
|
65
66
|
@random_vec = NMatrix.zeros([1, params[:n_components] - n_half_components]).hconcat(
|
66
67
|
NMatrix.ones([1, n_half_components]) * (0.5 * Math::PI)
|
67
68
|
)
|
68
|
-
#@random_vec = rand_uniform([1, self.params[:n_components]]) * (2.0 * Math::PI)
|
69
69
|
self
|
70
70
|
end
|
71
71
|
|
72
72
|
# Fit the model with training data, and then transform them with the learned model.
|
73
73
|
#
|
74
|
-
#
|
75
|
-
# fit_transform(x) -> NMatrix
|
74
|
+
# @overload fit_transform(x) -> NMatrix
|
76
75
|
#
|
77
|
-
#
|
78
|
-
#
|
79
|
-
# * *Returns* :
|
80
|
-
# - The transformed data (NMatrix, shape: [n_samples, n_components]).
|
76
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
77
|
+
# @return [NMatrix] (shape: [n_samples, n_components]) The transformed data
|
81
78
|
def fit_transform(x, _y = nil)
|
82
79
|
fit(x).transform(x)
|
83
80
|
end
|
84
81
|
|
85
82
|
# Transform the given data with the learned model.
|
86
83
|
#
|
87
|
-
#
|
88
|
-
# transform(x) -> NMatrix
|
84
|
+
# @overload transform(x) -> NMatrix
|
89
85
|
#
|
90
|
-
#
|
91
|
-
#
|
92
|
-
# * *Returns* :
|
93
|
-
# - The transformed data (NMatrix, shape: [n_samples, n_components]).
|
86
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
87
|
+
# @return [NMatrix] (shape: [n_samples, n_components]) The transformed data.
|
94
88
|
def transform(x)
|
95
89
|
n_samples, = x.shape
|
96
90
|
projection = x.dot(@random_mat) + @random_vec.repeat(n_samples, 0)
|
97
91
|
projection.sin * ((2.0 / params[:n_components])**0.5)
|
98
92
|
end
|
99
93
|
|
100
|
-
#
|
101
|
-
|
94
|
+
# Dump marshal data.
|
95
|
+
# @return [Hash] The marshal data about RBF.
|
96
|
+
def marshal_dump
|
102
97
|
{ params: params,
|
103
98
|
random_mat: Utils.dump_nmatrix(@random_mat),
|
104
99
|
random_vec: Utils.dump_nmatrix(@random_vec),
|
105
100
|
rng: @rng }
|
106
101
|
end
|
107
102
|
|
108
|
-
#
|
109
|
-
|
103
|
+
# Load marshal data.
|
104
|
+
# @return [nil]
|
105
|
+
def marshal_load(obj)
|
110
106
|
self.params = obj[:params]
|
111
107
|
@random_mat = Utils.restore_nmatrix(obj[:random_mat])
|
112
108
|
@random_vec = Utils.restore_nmatrix(obj[:random_vec])
|
@@ -117,13 +113,13 @@ module SVMKit
|
|
117
113
|
protected
|
118
114
|
|
119
115
|
# Generate the uniform random matrix with the given shape.
|
120
|
-
def rand_uniform(shape)
|
116
|
+
def rand_uniform(shape)
|
121
117
|
rnd_vals = Array.new(NMatrix.size(shape)) { @rng.rand }
|
122
118
|
NMatrix.new(shape, rnd_vals, dtype: :float64, stype: :dense)
|
123
119
|
end
|
124
120
|
|
125
121
|
# Generate the normal random matrix with the given shape, mean, and standard deviation.
|
126
|
-
def rand_normal(shape, mu = 0.0, sigma = 1.0)
|
122
|
+
def rand_normal(shape, mu = 0.0, sigma = 1.0)
|
127
123
|
a = rand_uniform(shape)
|
128
124
|
b = rand_uniform(shape)
|
129
125
|
((a.log * -2.0).sqrt * (b * 2.0 * Math::PI).sin) * sigma + mu
|
@@ -4,69 +4,80 @@ require 'svmkit/base/classifier'
|
|
4
4
|
module SVMKit
|
5
5
|
# This module consists of the classes that implement generalized linear models.
|
6
6
|
module LinearModel
|
7
|
-
# LogisticRegression is a class that implements Logistic Regression
|
8
|
-
#
|
7
|
+
# LogisticRegression is a class that implements Logistic Regression
|
8
|
+
# with stochastic gradient descent (SGD) optimization.
|
9
|
+
# Note that the class performs as a binary classifier.
|
9
10
|
#
|
11
|
+
# @example
|
10
12
|
# estimator =
|
11
13
|
# SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
12
14
|
# estimator.fit(training_samples, traininig_labels)
|
13
15
|
# results = estimator.predict(testing_samples)
|
14
16
|
#
|
15
|
-
# *
|
16
|
-
#
|
17
|
-
#
|
17
|
+
# *Reference*
|
18
|
+
# 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
|
18
19
|
class LogisticRegression
|
19
20
|
include Base::BaseEstimator
|
20
21
|
include Base::Classifier
|
21
22
|
|
22
|
-
|
23
|
+
# @!visibility private
|
24
|
+
DEFAULT_PARAMS = {
|
23
25
|
reg_param: 1.0,
|
26
|
+
fit_bias: false,
|
27
|
+
bias_scale: 1.0,
|
24
28
|
max_iter: 100,
|
25
29
|
batch_size: 50,
|
26
30
|
random_seed: nil
|
27
31
|
}.freeze
|
28
32
|
|
29
|
-
#
|
33
|
+
# Return the weight vector for Logistic Regression.
|
34
|
+
# @return [NMatrix] (shape: [1, n_features])
|
30
35
|
attr_reader :weight_vec
|
31
36
|
|
32
|
-
#
|
37
|
+
# Return the bias term (a.k.a. intercept) for Logistic Regression.
|
38
|
+
# @return [Float]
|
39
|
+
attr_reader :bias_term
|
40
|
+
|
41
|
+
# Return the random generator for transformation.
|
42
|
+
# @return [Random]
|
33
43
|
attr_reader :rng
|
34
44
|
|
35
45
|
# Create a new classifier with Logisitc Regression by the SGD optimization.
|
36
46
|
#
|
37
|
-
# :
|
38
|
-
# new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
47
|
+
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
|
39
48
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
49
|
+
# @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
|
50
|
+
# @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
|
51
|
+
# @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
|
52
|
+
# If fit_bias is true, the feature vector v becoms [v; bias_scale].
|
53
|
+
# @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
|
54
|
+
# @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
|
55
|
+
# @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
|
45
56
|
def initialize(params = {})
|
46
57
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
47
58
|
self.params[:random_seed] ||= srand
|
48
59
|
@weight_vec = nil
|
60
|
+
@bias_term = 0.0
|
49
61
|
@rng = Random.new(self.params[:random_seed])
|
50
62
|
end
|
51
63
|
|
52
64
|
# Fit the model with given training data.
|
53
65
|
#
|
54
|
-
# :
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
59
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
|
60
|
-
# * *Returns* :
|
61
|
-
# - The learned classifier itself.
|
66
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
67
|
+
# @param y [NMatrix] (shape: [1, n_samples]) The categorical variables (e.g. labels)
|
68
|
+
# to be used for fitting the model.
|
69
|
+
# @return [LogisticRegression] The learned classifier itself.
|
62
70
|
def fit(x, y)
|
63
|
-
# Generate binary labels
|
71
|
+
# Generate binary labels.
|
64
72
|
negative_label = y.uniq.sort.shift
|
65
73
|
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
|
74
|
+
# Expand feature vectors for bias term.
|
75
|
+
samples = x
|
76
|
+
samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
|
66
77
|
# Initialize some variables.
|
67
|
-
n_samples, n_features =
|
78
|
+
n_samples, n_features = samples.shape
|
68
79
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
69
|
-
|
80
|
+
weight_vec = NMatrix.zeros([1, n_features])
|
70
81
|
# Start optimization.
|
71
82
|
params[:max_iter].times do |t|
|
72
83
|
# random sampling
|
@@ -76,84 +87,75 @@ module SVMKit
|
|
76
87
|
eta = 1.0 / (params[:reg_param] * (t + 1))
|
77
88
|
mean_vec = NMatrix.zeros([1, n_features])
|
78
89
|
subset_ids.each do |n|
|
79
|
-
z =
|
90
|
+
z = weight_vec.dot(samples.row(n).transpose)[0]
|
80
91
|
coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
|
81
|
-
mean_vec +=
|
92
|
+
mean_vec += samples.row(n) * coef
|
82
93
|
end
|
83
94
|
mean_vec *= eta / params[:batch_size]
|
84
|
-
|
95
|
+
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
85
96
|
# scale the weight vector.
|
86
|
-
scaler = (1.0 / params[:reg_param]**0.5) /
|
87
|
-
|
97
|
+
scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
|
98
|
+
weight_vec *= [1.0, scaler].min
|
99
|
+
end
|
100
|
+
# Store the learned model.
|
101
|
+
if params[:fit_bias]
|
102
|
+
@weight_vec = weight_vec[0...n_features - 1]
|
103
|
+
@bias_term = weight_vec[n_features - 1]
|
104
|
+
else
|
105
|
+
@weight_vec = weight_vec[0...n_features]
|
106
|
+
@bias_term = 0.0
|
88
107
|
end
|
89
108
|
self
|
90
109
|
end
|
91
110
|
|
92
111
|
# Calculate confidence scores for samples.
|
93
112
|
#
|
94
|
-
# :
|
95
|
-
#
|
96
|
-
#
|
97
|
-
# * *Arguments* :
|
98
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
99
|
-
# * *Returns* :
|
100
|
-
# - Confidence score per sample.
|
113
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
|
114
|
+
# @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
|
101
115
|
def decision_function(x)
|
102
|
-
w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
|
116
|
+
w = ((@weight_vec.dot(x.transpose) + @bias_term) * -1.0).exp + 1.0
|
103
117
|
w.map { |v| 1.0 / v }
|
104
118
|
end
|
105
119
|
|
106
120
|
# Predict class labels for samples.
|
107
121
|
#
|
108
|
-
# :
|
109
|
-
#
|
110
|
-
#
|
111
|
-
# * *Arguments* :
|
112
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
113
|
-
# * *Returns* :
|
114
|
-
# - Predicted class label per sample.
|
122
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
|
123
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
|
115
124
|
def predict(x)
|
116
125
|
decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
|
117
126
|
end
|
118
127
|
|
119
128
|
# Predict probability for samples.
|
120
129
|
#
|
121
|
-
# :
|
122
|
-
#
|
123
|
-
#
|
124
|
-
# * *Arguments* :
|
125
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
|
126
|
-
# * *Returns* :
|
127
|
-
# - Predicted probability per sample.
|
130
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
131
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted probability per sample.
|
128
132
|
def predict_proba(x)
|
129
133
|
decision_function(x)
|
130
134
|
end
|
131
135
|
|
132
136
|
# Claculate the mean accuracy of the given testing data.
|
133
137
|
#
|
134
|
-
# :
|
135
|
-
#
|
136
|
-
#
|
137
|
-
# * *Arguments* :
|
138
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
139
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
140
|
-
# * *Returns* :
|
141
|
-
# - Mean accuracy
|
138
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
|
139
|
+
# @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
|
140
|
+
# @return [Float] Mean accuracy
|
142
141
|
def score(x, y)
|
143
142
|
p = predict(x)
|
144
143
|
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
145
144
|
n_hits / y.size.to_f
|
146
145
|
end
|
147
146
|
|
148
|
-
#
|
149
|
-
|
150
|
-
|
147
|
+
# Dump marshal data.
|
148
|
+
# @return [Hash] The marshal data about LogisticRegression.
|
149
|
+
def marshal_dump
|
150
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
|
151
151
|
end
|
152
152
|
|
153
|
-
#
|
154
|
-
|
153
|
+
# Load marshal data.
|
154
|
+
# @return [nil]
|
155
|
+
def marshal_load(obj)
|
155
156
|
self.params = obj[:params]
|
156
157
|
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
158
|
+
@bias_term = obj[:bias_term]
|
157
159
|
@rng = obj[:rng]
|
158
160
|
nil
|
159
161
|
end
|
@@ -6,140 +6,144 @@ module SVMKit
|
|
6
6
|
module LinearModel
|
7
7
|
# PegasosSVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# estimator =
|
10
11
|
# SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
11
12
|
# estimator.fit(training_samples, traininig_labels)
|
12
13
|
# results = estimator.predict(testing_samples)
|
13
14
|
#
|
14
|
-
# *
|
15
|
-
#
|
16
|
-
#
|
15
|
+
# *Reference*
|
16
|
+
# 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
17
17
|
class PegasosSVC
|
18
18
|
include Base::BaseEstimator
|
19
19
|
include Base::Classifier
|
20
20
|
|
21
|
-
|
21
|
+
# @!visibility private
|
22
|
+
DEFAULT_PARAMS = {
|
22
23
|
reg_param: 1.0,
|
24
|
+
fit_bias: false,
|
25
|
+
bias_scale: 1.0,
|
23
26
|
max_iter: 100,
|
24
27
|
batch_size: 50,
|
25
28
|
random_seed: nil
|
26
29
|
}.freeze
|
27
30
|
|
28
|
-
#
|
31
|
+
# Return the weight vector for SVC.
|
32
|
+
# @return [NMatrix] (shape: [1, n_features])
|
29
33
|
attr_reader :weight_vec
|
30
34
|
|
31
|
-
#
|
35
|
+
# Return the bias term (a.k.a. intercept) for SVC.
|
36
|
+
# @return [Float]
|
37
|
+
attr_reader :bias_term
|
38
|
+
|
39
|
+
# Return the random generator for performing random sampling in the Pegasos algorithm.
|
40
|
+
# @return [Random]
|
32
41
|
attr_reader :rng
|
33
42
|
|
34
43
|
# Create a new classifier with Support Vector Machine by the Pegasos algorithm.
|
35
44
|
#
|
36
|
-
# :
|
37
|
-
# new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
|
45
|
+
# @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
|
38
46
|
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
47
|
+
# @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
|
48
|
+
# @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
|
49
|
+
# @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
|
50
|
+
# @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
|
51
|
+
# @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
|
52
|
+
# @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
|
44
53
|
def initialize(params = {})
|
45
54
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
46
55
|
self.params[:random_seed] ||= srand
|
47
56
|
@weight_vec = nil
|
57
|
+
@bias_term = 0.0
|
48
58
|
@rng = Random.new(self.params[:random_seed])
|
49
59
|
end
|
50
60
|
|
51
61
|
# Fit the model with given training data.
|
52
62
|
#
|
53
|
-
# :
|
54
|
-
#
|
55
|
-
#
|
56
|
-
# * *Arguments* :
|
57
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
58
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
|
59
|
-
# * *Returns* :
|
60
|
-
# - The learned classifier itself.
|
63
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
64
|
+
# @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
|
65
|
+
# @return [PegasosSVC] The learned classifier itself.
|
61
66
|
def fit(x, y)
|
62
67
|
# Generate binary labels
|
63
68
|
negative_label = y.uniq.sort.shift
|
64
69
|
bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
|
70
|
+
# Expand feature vectors for bias term.
|
71
|
+
samples = x
|
72
|
+
samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
|
65
73
|
# Initialize some variables.
|
66
|
-
n_samples, n_features =
|
74
|
+
n_samples, n_features = samples.shape
|
67
75
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
68
|
-
|
76
|
+
weight_vec = NMatrix.zeros([1, n_features])
|
69
77
|
# Start optimization.
|
70
78
|
params[:max_iter].times do |t|
|
71
79
|
# random sampling
|
72
80
|
subset_ids = rand_ids.shift(params[:batch_size])
|
73
81
|
rand_ids.concat(subset_ids)
|
74
82
|
target_ids = subset_ids.map do |n|
|
75
|
-
n if
|
83
|
+
n if weight_vec.dot(samples.row(n).transpose) * bin_y[n] < 1
|
76
84
|
end
|
77
85
|
n_subsamples = target_ids.size
|
78
86
|
next if n_subsamples.zero?
|
79
87
|
# update the weight vector.
|
80
88
|
eta = 1.0 / (params[:reg_param] * (t + 1))
|
81
89
|
mean_vec = NMatrix.zeros([1, n_features])
|
82
|
-
target_ids.each { |n| mean_vec +=
|
90
|
+
target_ids.each { |n| mean_vec += samples.row(n) * bin_y[n] }
|
83
91
|
mean_vec *= eta / n_subsamples
|
84
|
-
|
92
|
+
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
85
93
|
# scale the weight vector.
|
86
|
-
scaler = (1.0 / params[:reg_param]**0.5) /
|
87
|
-
|
94
|
+
scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
|
95
|
+
weight_vec *= [1.0, scaler].min
|
96
|
+
end
|
97
|
+
# Store the learned model.
|
98
|
+
if params[:fit_bias]
|
99
|
+
@weight_vec = weight_vec[0...n_features - 1]
|
100
|
+
@bias_term = weight_vec[n_features - 1]
|
101
|
+
else
|
102
|
+
@weight_vec = weight_vec[0...n_features]
|
103
|
+
@bias_term = 0.0
|
88
104
|
end
|
89
105
|
self
|
90
106
|
end
|
91
107
|
|
92
108
|
# Calculate confidence scores for samples.
|
93
109
|
#
|
94
|
-
# :
|
95
|
-
#
|
96
|
-
#
|
97
|
-
# * *Arguments* :
|
98
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
99
|
-
# * *Returns* :
|
100
|
-
# - Confidence score per sample.
|
110
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
|
111
|
+
# @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
|
101
112
|
def decision_function(x)
|
102
|
-
@weight_vec.dot(x.transpose)
|
113
|
+
@weight_vec.dot(x.transpose) + @bias_term
|
103
114
|
end
|
104
115
|
|
105
116
|
# Predict class labels for samples.
|
106
117
|
#
|
107
|
-
# :
|
108
|
-
#
|
109
|
-
#
|
110
|
-
# * *Arguments* :
|
111
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
112
|
-
# * *Returns* :
|
113
|
-
# - Predicted class label per sample.
|
118
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
|
119
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
|
114
120
|
def predict(x)
|
115
121
|
decision_function(x).map { |v| v >= 0 ? 1 : -1 }
|
116
122
|
end
|
117
123
|
|
118
124
|
# Claculate the mean accuracy of the given testing data.
|
119
125
|
#
|
120
|
-
# :
|
121
|
-
#
|
122
|
-
#
|
123
|
-
# * *Arguments* :
|
124
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
125
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
126
|
-
# * *Returns* :
|
127
|
-
# - Mean accuracy
|
126
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
|
127
|
+
# @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
|
128
|
+
# @return [Float] Mean accuracy
|
128
129
|
def score(x, y)
|
129
130
|
p = predict(x)
|
130
131
|
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
131
132
|
n_hits / y.size.to_f
|
132
133
|
end
|
133
134
|
|
134
|
-
#
|
135
|
-
|
136
|
-
|
135
|
+
# Dump marshal data.
|
136
|
+
# @return [Hash] The marshal data about PegasosSVC.
|
137
|
+
def marshal_dump
|
138
|
+
{ params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
|
137
139
|
end
|
138
140
|
|
139
|
-
#
|
140
|
-
|
141
|
+
# Load marshal data.
|
142
|
+
# @return [nil]
|
143
|
+
def marshal_load(obj)
|
141
144
|
self.params = obj[:params]
|
142
145
|
@weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
|
146
|
+
@bias_term = obj[:bias_term]
|
143
147
|
@rng = obj[:rng]
|
144
148
|
nil
|
145
149
|
end
|
@@ -6,33 +6,34 @@ module SVMKit
|
|
6
6
|
module Multiclass
|
7
7
|
# OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# base_estimator =
|
10
11
|
# SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
11
12
|
# estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator)
|
12
13
|
# estimator.fit(training_samples, training_labels)
|
13
14
|
# results = estimator.predict(testing_samples)
|
14
|
-
#
|
15
15
|
class OneVsRestClassifier
|
16
16
|
include Base::BaseEstimator
|
17
17
|
include Base::Classifier
|
18
18
|
|
19
|
-
|
19
|
+
# @!visibility private
|
20
|
+
DEFAULT_PARAMS = {
|
20
21
|
estimator: nil
|
21
22
|
}.freeze
|
22
23
|
|
23
|
-
#
|
24
|
+
# Return the set of estimators.
|
25
|
+
# @return [Array<Classifier>]
|
24
26
|
attr_reader :estimators
|
25
27
|
|
26
|
-
#
|
28
|
+
# Return the class labels.
|
29
|
+
# @return [NMatrix] (shape: [1, n_classes])
|
27
30
|
attr_reader :classes
|
28
31
|
|
29
32
|
# Create a new multi-label classifier with the one-vs-rest startegy.
|
30
33
|
#
|
31
|
-
# :
|
32
|
-
#
|
33
|
-
#
|
34
|
-
# * *Arguments* :
|
35
|
-
# - +:estimator+ (Classifier) (defaults to: nil) -- The (binary) classifier for construction a multi-label classifier.
|
34
|
+
# @overload new(estimator: base_estimator) -> OneVsRestClassifier
|
35
|
+
# @param estimator [Classifier] (defaults to: nil)
|
36
|
+
# The (binary) classifier for construction a multi-label classifier.
|
36
37
|
def initialize(params = {})
|
37
38
|
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
38
39
|
@estimators = nil
|
@@ -41,14 +42,9 @@ module SVMKit
|
|
41
42
|
|
42
43
|
# Fit the model with given training data.
|
43
44
|
#
|
44
|
-
# :
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# * *Arguments* :
|
48
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
|
49
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
|
50
|
-
# * *Returns* :
|
51
|
-
# - The learned classifier itself.
|
45
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
|
47
|
+
# @return [OneVsRestClassifier] The learned classifier itself.
|
52
48
|
def fit(x, y)
|
53
49
|
@classes = y.uniq.sort
|
54
50
|
@estimators = @classes.map do |label|
|
@@ -60,13 +56,8 @@ module SVMKit
|
|
60
56
|
|
61
57
|
# Calculate confidence scores for samples.
|
62
58
|
#
|
63
|
-
# :
|
64
|
-
#
|
65
|
-
#
|
66
|
-
# * *Arguments* :
|
67
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
|
68
|
-
# * *Returns* :
|
69
|
-
# - Confidence scores per sample for each class.
|
59
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
|
60
|
+
# @return [NMatrix] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
70
61
|
def decision_function(x)
|
71
62
|
n_samples, = x.shape
|
72
63
|
n_classes = @classes.size
|
@@ -78,13 +69,8 @@ module SVMKit
|
|
78
69
|
|
79
70
|
# Predict class labels for samples.
|
80
71
|
#
|
81
|
-
# :
|
82
|
-
#
|
83
|
-
#
|
84
|
-
# * *Arguments* :
|
85
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
|
86
|
-
# * *Returns* :
|
87
|
-
# - Predicted class label per sample.
|
72
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
|
73
|
+
# @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
|
88
74
|
def predict(x)
|
89
75
|
n_samples, = x.shape
|
90
76
|
decision_values = decision_function(x)
|
@@ -94,29 +80,26 @@ module SVMKit
|
|
94
80
|
|
95
81
|
# Claculate the mean accuracy of the given testing data.
|
96
82
|
#
|
97
|
-
# :
|
98
|
-
#
|
99
|
-
#
|
100
|
-
# * *Arguments* :
|
101
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
|
102
|
-
# - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
|
103
|
-
# * *Returns* :
|
104
|
-
# - Mean accuracy
|
83
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
|
84
|
+
# @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
|
85
|
+
# @return [Float] Mean accuracy
|
105
86
|
def score(x, y)
|
106
87
|
p = predict(x)
|
107
88
|
n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
108
89
|
n_hits / y.size.to_f
|
109
90
|
end
|
110
91
|
|
111
|
-
#
|
112
|
-
|
92
|
+
# Dump marshal data.
|
93
|
+
# @return [Hash] The marshal data about OneVsRestClassifier.
|
94
|
+
def marshal_dump
|
113
95
|
{ params: params,
|
114
96
|
classes: @classes,
|
115
97
|
estimators: @estimators.map { |e| Marshal.dump(e) } }
|
116
98
|
end
|
117
99
|
|
118
|
-
#
|
119
|
-
|
100
|
+
# Load marshal data.
|
101
|
+
# @return [nil]
|
102
|
+
def marshal_load(obj)
|
120
103
|
self.params = obj[:params]
|
121
104
|
@classes = obj[:classes]
|
122
105
|
@estimators = obj[:estimators].map { |e| Marshal.load(e) }
|
@@ -6,32 +6,30 @@ module SVMKit
|
|
6
6
|
module Preprocessing
|
7
7
|
# Normalize samples to unit L2-norm.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# normalizer = SVMKit::Preprocessing::StandardScaler.new
|
10
11
|
# new_samples = normalizer.fit_transform(samples)
|
11
12
|
class L2Normalizer
|
12
13
|
include Base::BaseEstimator
|
13
14
|
include Base::Transformer
|
14
15
|
|
15
|
-
#
|
16
|
+
# Return the vector consists of L2-norm for each sample.
|
17
|
+
# @return [NMatrix] (shape: [1, n_samples])
|
16
18
|
attr_reader :norm_vec # :nodoc:
|
17
19
|
|
18
20
|
# Create a new normalizer for normaliing to unit L2-norm.
|
19
21
|
#
|
20
|
-
#
|
21
|
-
# new() -> L2Normalizer
|
22
|
+
# @overload new() -> L2Normalizer
|
22
23
|
def initialize(_params = {})
|
23
24
|
@norm_vec = nil
|
24
25
|
end
|
25
26
|
|
26
|
-
# Calculate L2
|
27
|
+
# Calculate L2-norms of each sample.
|
27
28
|
#
|
28
|
-
#
|
29
|
-
# fit(x) -> L2Normalizer
|
29
|
+
# @overload fit(x) -> L2Normalizer
|
30
30
|
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
# * *Returns* :
|
34
|
-
# - L2Normalizer
|
31
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
32
|
+
# @return [L2Normalizer]
|
35
33
|
def fit(x, _y = nil)
|
36
34
|
n_samples, = x.shape
|
37
35
|
@norm_vec = NMatrix.new([1, n_samples],
|
@@ -39,15 +37,12 @@ module SVMKit
|
|
39
37
|
self
|
40
38
|
end
|
41
39
|
|
42
|
-
# Calculate L2
|
40
|
+
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
43
41
|
#
|
44
|
-
#
|
45
|
-
# fit_transform(x) -> NMatrix
|
42
|
+
# @overload fit_transform(x) -> NMatrix
|
46
43
|
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
# * *Returns* :
|
50
|
-
# - The normalized samples (NMatrix)
|
44
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
45
|
+
# @return [NMatrix] The normalized samples.
|
51
46
|
def fit_transform(x, _y = nil)
|
52
47
|
fit(x)
|
53
48
|
x / @norm_vec.transpose.repeat(x.shape[1], 1)
|
@@ -6,6 +6,7 @@ module SVMKit
|
|
6
6
|
module Preprocessing
|
7
7
|
# Normalize samples by scaling each feature to a given range.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# normalizer = SVMKit::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
|
10
11
|
# new_training_samples = normalizer.fit_transform(training_samples)
|
11
12
|
# new_testing_samples = normalizer.transform(testing_samples)
|
@@ -13,23 +14,24 @@ module SVMKit
|
|
13
14
|
include Base::BaseEstimator
|
14
15
|
include Base::Transformer
|
15
16
|
|
16
|
-
|
17
|
+
# @!visibility private
|
18
|
+
DEFAULT_PARAMS = {
|
17
19
|
feature_range: [0.0, 1.0]
|
18
20
|
}.freeze
|
19
21
|
|
20
|
-
#
|
21
|
-
|
22
|
+
# Return the vector consists of the minimum value for each feature.
|
23
|
+
# @return [NMatrix] (shape: [1, n_features])
|
24
|
+
attr_reader :min_vec
|
22
25
|
|
23
|
-
#
|
24
|
-
|
26
|
+
# Return the vector consists of the maximum value for each feature.
|
27
|
+
# @return [NMatrix] (shape: [1, n_features])
|
28
|
+
attr_reader :max_vec
|
25
29
|
|
26
30
|
# Creates a new normalizer for scaling each feature to a given range.
|
27
31
|
#
|
28
|
-
#
|
29
|
-
# new(feature_range: [0.0, 1.0]) -> MinMaxScaler
|
32
|
+
# @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
|
30
33
|
#
|
31
|
-
#
|
32
|
-
# - +:feature_range+ (Array) (defaults to: [0.0, 1.0]) -- The desired range of samples.
|
34
|
+
# @param feature_range [Array] (defaults to: [0.0, 1.0]) The desired range of samples.
|
33
35
|
def initialize(params = {})
|
34
36
|
@params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
35
37
|
@min_vec = nil
|
@@ -38,13 +40,10 @@ module SVMKit
|
|
38
40
|
|
39
41
|
# Calculate the minimum and maximum value of each feature for scaling.
|
40
42
|
#
|
41
|
-
#
|
42
|
-
# fit(x) -> MinMaxScaler
|
43
|
+
# @overload fit(x) -> MinMaxScaler
|
43
44
|
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# * *Returns* :
|
47
|
-
# - MinMaxScaler
|
45
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
46
|
+
# @return [MinMaxScaler]
|
48
47
|
def fit(x, _y = nil)
|
49
48
|
@min_vec = x.min(0)
|
50
49
|
@max_vec = x.max(0)
|
@@ -53,26 +52,18 @@ module SVMKit
|
|
53
52
|
|
54
53
|
# Calculate the minimum and maximum values, and then normalize samples to feature_range.
|
55
54
|
#
|
56
|
-
#
|
57
|
-
# fit_transform(x) -> NMatrix
|
55
|
+
# @overload fit_transform(x) -> NMatrix
|
58
56
|
#
|
59
|
-
#
|
60
|
-
#
|
61
|
-
# * *Returns* :
|
62
|
-
# - The scaled samples (NMatrix)
|
57
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
58
|
+
# @return [NMatrix] The scaled samples.
|
63
59
|
def fit_transform(x, _y = nil)
|
64
60
|
fit(x).transform(x)
|
65
61
|
end
|
66
62
|
|
67
63
|
# Perform scaling the given samples according to feature_range.
|
68
64
|
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
72
|
-
# * *Arguments* :
|
73
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
|
74
|
-
# * *Returns* :
|
75
|
-
# - The scaled samples (NMatrix)
|
65
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
|
66
|
+
# @return [NMatrix] The scaled samples.
|
76
67
|
def transform(x)
|
77
68
|
n_samples, = x.shape
|
78
69
|
dif_vec = @max_vec - @min_vec
|
@@ -80,15 +71,17 @@ module SVMKit
|
|
80
71
|
nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
|
81
72
|
end
|
82
73
|
|
83
|
-
#
|
84
|
-
|
74
|
+
# Dump marshal data.
|
75
|
+
# @return [Hash] The marshal data about MinMaxScaler.
|
76
|
+
def marshal_dump
|
85
77
|
{ params: @params,
|
86
78
|
min_vec: Utils.dump_nmatrix(@min_vec),
|
87
79
|
max_vec: Utils.dump_nmatrix(@max_vec) }
|
88
80
|
end
|
89
81
|
|
90
|
-
#
|
91
|
-
|
82
|
+
# Load marshal data.
|
83
|
+
# @return [nil]
|
84
|
+
def marshal_load(obj)
|
92
85
|
@params = obj[:params]
|
93
86
|
@min_vec = Utils.restore_nmatrix(obj[:min_vec])
|
94
87
|
@max_vec = Utils.restore_nmatrix(obj[:max_vec])
|
@@ -6,6 +6,7 @@ module SVMKit
|
|
6
6
|
module Preprocessing
|
7
7
|
# Normalize samples by centering and scaling to unit variance.
|
8
8
|
#
|
9
|
+
# @example
|
9
10
|
# normalizer = SVMKit::Preprocessing::StandardScaler.new
|
10
11
|
# new_training_samples = normalizer.fit_transform(training_samples)
|
11
12
|
# new_testing_samples = normalizer.transform(testing_samples)
|
@@ -13,16 +14,17 @@ module SVMKit
|
|
13
14
|
include Base::BaseEstimator
|
14
15
|
include Base::Transformer
|
15
16
|
|
16
|
-
#
|
17
|
-
|
17
|
+
# Return the vector consists of the mean value for each feature.
|
18
|
+
# @return [NMatrix] (shape: [1, n_features])
|
19
|
+
attr_reader :mean_vec
|
18
20
|
|
19
|
-
#
|
20
|
-
|
21
|
+
# Return the vector consists of the standard deviation for each feature.
|
22
|
+
# @return [NMatrix] (shape: [1, n_features])
|
23
|
+
attr_reader :std_vec
|
21
24
|
|
22
25
|
# Create a new normalizer for centering and scaling to unit variance.
|
23
26
|
#
|
24
|
-
#
|
25
|
-
# new() -> StandardScaler
|
27
|
+
# @overload new() -> StandardScaler
|
26
28
|
def initialize(_params = {})
|
27
29
|
@mean_vec = nil
|
28
30
|
@std_vec = nil
|
@@ -30,13 +32,11 @@ module SVMKit
|
|
30
32
|
|
31
33
|
# Calculate the mean value and standard deviation of each feature for scaling.
|
32
34
|
#
|
33
|
-
#
|
34
|
-
# fit(x) -> StandardScaler
|
35
|
+
# @overload fit(x) -> StandardScaler
|
35
36
|
#
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
# - StandardScaler
|
37
|
+
# @param x [NMatrix] (shape: [n_samples, n_features])
|
38
|
+
# The samples to calculate the mean values and standard deviations.
|
39
|
+
# @return [StandardScaler]
|
40
40
|
def fit(x, _y = nil)
|
41
41
|
@mean_vec = x.mean(0)
|
42
42
|
@std_vec = x.std(0)
|
@@ -45,39 +45,34 @@ module SVMKit
|
|
45
45
|
|
46
46
|
# Calculate the mean values and standard deviations, and then normalize samples using them.
|
47
47
|
#
|
48
|
-
#
|
49
|
-
# fit_transform(x) -> NMatrix
|
48
|
+
# @overload fit_transform(x) -> NMatrix
|
50
49
|
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# - The scaled samples (NMatrix)
|
50
|
+
# @param x [NMatrix] (shape: [n_samples, n_features])
|
51
|
+
# The samples to calculate the mean values and standard deviations.
|
52
|
+
# @return [NMatrix] The scaled samples.
|
55
53
|
def fit_transform(x, _y = nil)
|
56
54
|
fit(x).transform(x)
|
57
55
|
end
|
58
56
|
|
59
57
|
# Perform standardization the given samples.
|
60
58
|
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
64
|
-
# * *Arguments* :
|
65
|
-
# - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
|
66
|
-
# * *Returns* :
|
67
|
-
# - The scaled samples (NMatrix)
|
59
|
+
# @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
|
60
|
+
# @return [NMatrix] The scaled samples.
|
68
61
|
def transform(x)
|
69
62
|
n_samples, = x.shape
|
70
63
|
(x - @mean_vec.repeat(n_samples, 0)) / @std_vec.repeat(n_samples, 0)
|
71
64
|
end
|
72
65
|
|
73
|
-
#
|
74
|
-
|
66
|
+
# Dump marshal data.
|
67
|
+
# @return [Hash] The marshal data about StandardScaler.
|
68
|
+
def marshal_dump
|
75
69
|
{ mean_vec: Utils.dump_nmatrix(@mean_vec),
|
76
70
|
std_vec: Utils.dump_nmatrix(@std_vec) }
|
77
71
|
end
|
78
72
|
|
79
|
-
#
|
80
|
-
|
73
|
+
# Load marshal data.
|
74
|
+
# @return [nil]
|
75
|
+
def marshal_load(obj)
|
81
76
|
@mean_vec = Utils.restore_nmatrix(obj[:mean_vec])
|
82
77
|
@std_vec = Utils.restore_nmatrix(obj[:std_vec])
|
83
78
|
nil
|
data/lib/svmkit/utils.rb
CHANGED
@@ -3,13 +3,9 @@ module SVMKit
|
|
3
3
|
module Utils
|
4
4
|
class << self
|
5
5
|
# Dump an NMatrix object converted to a Ruby Hash.
|
6
|
-
# # call-seq:
|
7
|
-
# dump_nmatrix(mat) -> Hash
|
8
6
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# * *Returns* :
|
12
|
-
# - A Ruby Hash containing matrix information.
|
7
|
+
# @param mat [NMatrix] An NMatrix object converted to a Ruby Hash.
|
8
|
+
# @return [Hash] A Ruby Hash containing matrix information.
|
13
9
|
def dump_nmatrix(mat)
|
14
10
|
return nil if mat.class != NMatrix
|
15
11
|
{ shape: mat.shape, array: mat.to_flat_a, dtype: mat.dtype, stype: mat.stype }
|
@@ -17,13 +13,8 @@ module SVMKit
|
|
17
13
|
|
18
14
|
# Return the results of converting the dumped data into an NMatrix object.
|
19
15
|
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
# * *Arguments* :
|
24
|
-
# - +dumpted_mat+ -- A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
|
25
|
-
# * *Returns* :
|
26
|
-
# - An NMatrix object restored from the given Hash.
|
16
|
+
# @param dmp [Hash] A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
|
17
|
+
# @return [NMatrix] An NMatrix object restored from the given Hash.
|
27
18
|
def restore_nmatrix(dmp = {})
|
28
19
|
return nil unless dmp.class == Hash && %i[shape array dtype stype].all?(&dmp.method(:has_key?))
|
29
20
|
NMatrix.new(dmp[:shape], dmp[:array], dtype: dmp[:dtype], stype: dmp[:stype])
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|