svmkit 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/README.md +3 -5
- data/lib/svmkit.rb +4 -6
- data/lib/svmkit/dataset.rb +90 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +28 -35
- data/lib/svmkit/kernel_machine/kernel_svc.rb +27 -34
- data/lib/svmkit/linear_model/logistic_regression.rb +43 -35
- data/lib/svmkit/linear_model/{pegasos_svc.rb → svc.rb} +45 -39
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +20 -31
- data/lib/svmkit/pairwise_metric.rb +20 -20
- data/lib/svmkit/preprocessing/l2_normalizer.rb +9 -12
- data/lib/svmkit/preprocessing/min_max_scaler.rb +17 -24
- data/lib/svmkit/preprocessing/standard_scaler.rb +16 -17
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +15 -3
- metadata +43 -9
- data/lib/svmkit/utils.rb +0 -24
@@ -4,17 +4,17 @@ require 'svmkit/base/classifier'
|
|
4
4
|
module SVMKit
|
5
5
|
# This module consists of the classes that implement generalized linear models.
|
6
6
|
module LinearModel
|
7
|
-
#
|
7
|
+
# SVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
|
8
8
|
#
|
9
9
|
# @example
|
10
10
|
# estimator =
|
11
|
-
# SVMKit::LinearModel::
|
11
|
+
# SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
|
12
12
|
# estimator.fit(training_samples, traininig_labels)
|
13
13
|
# results = estimator.predict(testing_samples)
|
14
14
|
#
|
15
15
|
# *Reference*
|
16
16
|
# 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
|
17
|
-
class
|
17
|
+
class SVC
|
18
18
|
include Base::BaseEstimator
|
19
19
|
include Base::Classifier
|
20
20
|
|
@@ -29,7 +29,7 @@ module SVMKit
|
|
29
29
|
}.freeze
|
30
30
|
|
31
31
|
# Return the weight vector for SVC.
|
32
|
-
# @return [
|
32
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
33
33
|
attr_reader :weight_vec
|
34
34
|
|
35
35
|
# Return the bias term (a.k.a. intercept) for SVC.
|
@@ -42,17 +42,20 @@ module SVMKit
|
|
42
42
|
|
43
43
|
# Create a new classifier with Support Vector Machine by the Pegasos algorithm.
|
44
44
|
#
|
45
|
-
# @
|
46
|
-
#
|
47
|
-
# @param
|
48
|
-
# @
|
49
|
-
# @
|
50
|
-
# @
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
self.params =
|
45
|
+
# @param reg_param [Float] The regularization parameter.
|
46
|
+
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
47
|
+
# @param bias_scale [Float] The scale of the bias term.
|
48
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
49
|
+
# @param batch_size [Integer] The size of the mini batches.
|
50
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
|
+
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
|
52
|
+
self.params = {}
|
53
|
+
self.params[:reg_param] = reg_param
|
54
|
+
self.params[:fit_bias] = fit_bias
|
55
|
+
self.params[:bias_scale] = bias_scale
|
56
|
+
self.params[:max_iter] = max_iter
|
57
|
+
self.params[:batch_size] = batch_size
|
58
|
+
self.params[:random_seed] = random_seed
|
56
59
|
self.params[:random_seed] ||= srand
|
57
60
|
@weight_vec = nil
|
58
61
|
@bias_term = 0.0
|
@@ -61,38 +64,41 @@ module SVMKit
|
|
61
64
|
|
62
65
|
# Fit the model with given training data.
|
63
66
|
#
|
64
|
-
# @param x [
|
65
|
-
# @param y [
|
66
|
-
# @return [
|
67
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
68
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
69
|
+
# @return [SVC] The learned classifier itself.
|
67
70
|
def fit(x, y)
|
68
71
|
# Generate binary labels
|
69
|
-
negative_label = y.uniq.sort.shift
|
70
|
-
bin_y = y.
|
72
|
+
negative_label = y.to_a.uniq.sort.shift
|
73
|
+
bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
|
71
74
|
# Expand feature vectors for bias term.
|
72
75
|
samples = x
|
73
|
-
|
76
|
+
if params[:fit_bias]
|
77
|
+
samples = Numo::NArray.hstack(
|
78
|
+
[samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
|
79
|
+
)
|
80
|
+
end
|
74
81
|
# Initialize some variables.
|
75
82
|
n_samples, n_features = samples.shape
|
76
83
|
rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
|
77
|
-
weight_vec =
|
84
|
+
weight_vec = Numo::DFloat.zeros(n_features)
|
78
85
|
# Start optimization.
|
79
86
|
params[:max_iter].times do |t|
|
80
87
|
# random sampling
|
81
88
|
subset_ids = rand_ids.shift(params[:batch_size])
|
82
89
|
rand_ids.concat(subset_ids)
|
83
|
-
target_ids = subset_ids.map
|
84
|
-
n if weight_vec.dot(samples.row(n).transpose) * bin_y[n] < 1
|
85
|
-
end
|
90
|
+
target_ids = subset_ids.map { |n| n if weight_vec.dot(samples[n, true]) * bin_y[n] < 1 }.compact
|
86
91
|
n_subsamples = target_ids.size
|
87
92
|
next if n_subsamples.zero?
|
88
93
|
# update the weight vector.
|
89
94
|
eta = 1.0 / (params[:reg_param] * (t + 1))
|
90
|
-
mean_vec =
|
91
|
-
target_ids.each { |n| mean_vec += samples
|
95
|
+
mean_vec = Numo::DFloat.zeros(n_features)
|
96
|
+
target_ids.each { |n| mean_vec += samples[n, true] * bin_y[n] }
|
92
97
|
mean_vec *= eta / n_subsamples
|
93
98
|
weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
|
94
99
|
# scale the weight vector.
|
95
|
-
|
100
|
+
norm = Math.sqrt(weight_vec.dot(weight_vec))
|
101
|
+
scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
|
96
102
|
weight_vec *= [1.0, scaler].min
|
97
103
|
end
|
98
104
|
# Store the learned model.
|
@@ -108,42 +114,42 @@ module SVMKit
|
|
108
114
|
|
109
115
|
# Calculate confidence scores for samples.
|
110
116
|
#
|
111
|
-
# @param x [
|
112
|
-
# @return [
|
117
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
118
|
+
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
113
119
|
def decision_function(x)
|
114
120
|
@weight_vec.dot(x.transpose) + @bias_term
|
115
121
|
end
|
116
122
|
|
117
123
|
# Predict class labels for samples.
|
118
124
|
#
|
119
|
-
# @param x [
|
120
|
-
# @return [
|
125
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
126
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
121
127
|
def predict(x)
|
122
|
-
decision_function(x).map { |v| v >= 0 ? 1 : -1 }
|
128
|
+
Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
|
123
129
|
end
|
124
130
|
|
125
131
|
# Claculate the mean accuracy of the given testing data.
|
126
132
|
#
|
127
|
-
# @param x [
|
128
|
-
# @param y [
|
133
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
134
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
129
135
|
# @return [Float] Mean accuracy
|
130
136
|
def score(x, y)
|
131
137
|
p = predict(x)
|
132
|
-
n_hits = (y.
|
138
|
+
n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
133
139
|
n_hits / y.size.to_f
|
134
140
|
end
|
135
141
|
|
136
142
|
# Dump marshal data.
|
137
|
-
# @return [Hash] The marshal data about
|
143
|
+
# @return [Hash] The marshal data about SVC.
|
138
144
|
def marshal_dump
|
139
|
-
{ params: params, weight_vec:
|
145
|
+
{ params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
|
140
146
|
end
|
141
147
|
|
142
148
|
# Load marshal data.
|
143
149
|
# @return [nil]
|
144
150
|
def marshal_load(obj)
|
145
151
|
self.params = obj[:params]
|
146
|
-
@weight_vec =
|
152
|
+
@weight_vec = obj[:weight_vec]
|
147
153
|
@bias_term = obj[:bias_term]
|
148
154
|
@rng = obj[:rng]
|
149
155
|
nil
|
@@ -16,40 +16,34 @@ module SVMKit
|
|
16
16
|
include Base::BaseEstimator
|
17
17
|
include Base::Classifier
|
18
18
|
|
19
|
-
# @!visibility private
|
20
|
-
DEFAULT_PARAMS = {
|
21
|
-
estimator: nil
|
22
|
-
}.freeze
|
23
|
-
|
24
19
|
# Return the set of estimators.
|
25
20
|
# @return [Array<Classifier>]
|
26
21
|
attr_reader :estimators
|
27
22
|
|
28
23
|
# Return the class labels.
|
29
|
-
# @return [
|
24
|
+
# @return [Numo::Int32] (shape: [n_classes])
|
30
25
|
attr_reader :classes
|
31
26
|
|
32
27
|
# Create a new multi-label classifier with the one-vs-rest startegy.
|
33
28
|
#
|
34
|
-
# @
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
def initialize(params = {})
|
39
|
-
self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
29
|
+
# @param estimator [Classifier] The (binary) classifier for construction a multi-label classifier.
|
30
|
+
def initialize(estimator: nil)
|
31
|
+
self.params = {}
|
32
|
+
self.params[:estimator] = estimator
|
40
33
|
@estimators = nil
|
41
34
|
@classes = nil
|
42
35
|
end
|
43
36
|
|
44
37
|
# Fit the model with given training data.
|
45
38
|
#
|
46
|
-
# @param x [
|
47
|
-
# @param y [
|
39
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
40
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
48
41
|
# @return [OneVsRestClassifier] The learned classifier itself.
|
49
42
|
def fit(x, y)
|
50
|
-
|
51
|
-
@
|
52
|
-
|
43
|
+
y_arr = y.to_a
|
44
|
+
@classes = Numo::Int32.asarray(y_arr.uniq.sort)
|
45
|
+
@estimators = @classes.to_a.map do |label|
|
46
|
+
bin_y = Numo::Int32.asarray(y_arr.map { |l| l == label ? 1 : -1 })
|
53
47
|
params[:estimator].dup.fit(x, bin_y)
|
54
48
|
end
|
55
49
|
self
|
@@ -57,36 +51,31 @@ module SVMKit
|
|
57
51
|
|
58
52
|
# Calculate confidence scores for samples.
|
59
53
|
#
|
60
|
-
# @param x [
|
61
|
-
# @return [
|
54
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
55
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
62
56
|
def decision_function(x)
|
63
|
-
n_samples, = x.shape
|
64
57
|
n_classes = @classes.size
|
65
|
-
|
66
|
-
[n_classes, n_samples],
|
67
|
-
Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }.flatten
|
68
|
-
).transpose
|
58
|
+
Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
|
69
59
|
end
|
70
60
|
|
71
61
|
# Predict class labels for samples.
|
72
62
|
#
|
73
|
-
# @param x [
|
74
|
-
# @return [
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
64
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
75
65
|
def predict(x)
|
76
66
|
n_samples, = x.shape
|
77
67
|
decision_values = decision_function(x)
|
78
|
-
|
79
|
-
decision_values.each_row.map { |vals| @classes[vals.to_a.index(vals.to_a.max)] })
|
68
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
80
69
|
end
|
81
70
|
|
82
71
|
# Claculate the mean accuracy of the given testing data.
|
83
72
|
#
|
84
|
-
# @param x [
|
85
|
-
# @param y [
|
73
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
74
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
86
75
|
# @return [Float] Mean accuracy
|
87
76
|
def score(x, y)
|
88
77
|
p = predict(x)
|
89
|
-
n_hits = (y.
|
78
|
+
n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
|
90
79
|
n_hits / y.size.to_f
|
91
80
|
end
|
92
81
|
|
@@ -4,38 +4,38 @@ module SVMKit
|
|
4
4
|
class << self
|
5
5
|
# Calculate the pairwise euclidean distances between x and y.
|
6
6
|
#
|
7
|
-
# @param x [
|
8
|
-
# @param y [
|
9
|
-
# @return [
|
7
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
8
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
9
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
10
10
|
def euclidean_distance(x, y = nil)
|
11
11
|
y = x if y.nil?
|
12
12
|
sum_x_vec = (x**2).sum(1)
|
13
13
|
sum_y_vec = (y**2).sum(1)
|
14
14
|
dot_xy_mat = x.dot(y.transpose)
|
15
15
|
distance_matrix = dot_xy_mat * -2.0 +
|
16
|
-
sum_x_vec.
|
17
|
-
sum_y_vec.
|
18
|
-
distance_matrix.abs
|
16
|
+
sum_x_vec.tile(y.shape[0], 1).transpose +
|
17
|
+
sum_y_vec.tile(x.shape[0], 1)
|
18
|
+
Numo::NMath.sqrt(distance_matrix.abs)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Calculate the rbf kernel between x and y.
|
22
22
|
#
|
23
|
-
# @param x [
|
24
|
-
# @param y [
|
23
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
24
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
25
25
|
# @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
|
26
|
-
# @return [
|
26
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
27
27
|
def rbf_kernel(x, y = nil, gamma = nil)
|
28
28
|
y = x if y.nil?
|
29
29
|
gamma ||= 1.0 / x.shape[1]
|
30
30
|
distance_matrix = euclidean_distance(x, y)
|
31
|
-
((distance_matrix**2) * -gamma)
|
31
|
+
Numo::NMath.exp((distance_matrix**2) * -gamma)
|
32
32
|
end
|
33
33
|
|
34
34
|
# Calculate the linear kernel between x and y.
|
35
35
|
#
|
36
|
-
# @param x [
|
37
|
-
# @param y [
|
38
|
-
# @return [
|
36
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
37
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
38
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
39
39
|
def linear_kernel(x, y = nil)
|
40
40
|
y = x if y.nil?
|
41
41
|
x.dot(y.transpose)
|
@@ -43,12 +43,12 @@ module SVMKit
|
|
43
43
|
|
44
44
|
# Calculate the polynomial kernel between x and y.
|
45
45
|
#
|
46
|
-
# @param x [
|
47
|
-
# @param y [
|
46
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
47
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
48
48
|
# @param degree [Integer] The parameter of polynomial kernel.
|
49
49
|
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
50
50
|
# @param coef [Integer] The parameter of polynomial kernel.
|
51
|
-
# @return [
|
51
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
52
52
|
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
53
53
|
y = x if y.nil?
|
54
54
|
gamma ||= 1.0 / x.shape[1]
|
@@ -57,15 +57,15 @@ module SVMKit
|
|
57
57
|
|
58
58
|
# Calculate the sigmoid kernel between x and y.
|
59
59
|
#
|
60
|
-
# @param x [
|
61
|
-
# @param y [
|
60
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
61
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
62
62
|
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
63
63
|
# @param coef [Integer] The parameter of polynomial kernel.
|
64
|
-
# @return [
|
64
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
65
65
|
def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
|
66
66
|
y = x if y.nil?
|
67
67
|
gamma ||= 1.0 / x.shape[1]
|
68
|
-
(x.dot(y.transpose) * gamma + coef)
|
68
|
+
Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
|
69
69
|
end
|
70
70
|
end
|
71
71
|
end
|
@@ -14,13 +14,12 @@ module SVMKit
|
|
14
14
|
include Base::Transformer
|
15
15
|
|
16
16
|
# Return the vector consists of L2-norm for each sample.
|
17
|
-
# @return [
|
17
|
+
# @return [Numo::DFloat] (shape: [n_samples])
|
18
18
|
attr_reader :norm_vec # :nodoc:
|
19
19
|
|
20
20
|
# Create a new normalizer for normaliing to unit L2-norm.
|
21
|
-
|
22
|
-
|
23
|
-
def initialize(_params = {})
|
21
|
+
def initialize()
|
22
|
+
self.params = {}
|
24
23
|
@norm_vec = nil
|
25
24
|
end
|
26
25
|
|
@@ -28,24 +27,22 @@ module SVMKit
|
|
28
27
|
#
|
29
28
|
# @overload fit(x) -> L2Normalizer
|
30
29
|
#
|
31
|
-
# @param x [
|
30
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
32
31
|
# @return [L2Normalizer]
|
33
32
|
def fit(x, _y = nil)
|
34
|
-
|
35
|
-
@norm_vec = NMatrix.new([1, n_samples],
|
36
|
-
Array.new(n_samples) { |n| x.row(n).norm2 })
|
33
|
+
@norm_vec = Numo::NMath.sqrt((x**2).sum(1))
|
37
34
|
self
|
38
35
|
end
|
39
36
|
|
40
37
|
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
41
38
|
#
|
42
|
-
# @overload fit_transform(x) ->
|
39
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
43
40
|
#
|
44
|
-
# @param x [
|
45
|
-
# @return [
|
41
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
42
|
+
# @return [Numo::DFloat] The normalized samples.
|
46
43
|
def fit_transform(x, _y = nil)
|
47
44
|
fit(x)
|
48
|
-
x / @norm_vec.
|
45
|
+
x / @norm_vec.tile(x.shape[1], 1).transpose
|
49
46
|
end
|
50
47
|
end
|
51
48
|
end
|
@@ -14,27 +14,20 @@ module SVMKit
|
|
14
14
|
include Base::BaseEstimator
|
15
15
|
include Base::Transformer
|
16
16
|
|
17
|
-
# @!visibility private
|
18
|
-
DEFAULT_PARAMS = {
|
19
|
-
feature_range: [0.0, 1.0]
|
20
|
-
}.freeze
|
21
|
-
|
22
17
|
# Return the vector consists of the minimum value for each feature.
|
23
|
-
# @return [
|
18
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
24
19
|
attr_reader :min_vec
|
25
20
|
|
26
21
|
# Return the vector consists of the maximum value for each feature.
|
27
|
-
# @return [
|
22
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
28
23
|
attr_reader :max_vec
|
29
24
|
|
30
25
|
# Creates a new normalizer for scaling each feature to a given range.
|
31
26
|
#
|
32
|
-
# @
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
def initialize(params = {})
|
37
|
-
@params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
|
27
|
+
# @param feature_range [Array<Float>] The desired range of samples.
|
28
|
+
def initialize(feature_range: [0.0, 1.0])
|
29
|
+
self.params = {}
|
30
|
+
self.params[:feature_range] = feature_range
|
38
31
|
@min_vec = nil
|
39
32
|
@max_vec = nil
|
40
33
|
end
|
@@ -43,7 +36,7 @@ module SVMKit
|
|
43
36
|
#
|
44
37
|
# @overload fit(x) -> MinMaxScaler
|
45
38
|
#
|
46
|
-
# @param x [
|
39
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
47
40
|
# @return [MinMaxScaler]
|
48
41
|
def fit(x, _y = nil)
|
49
42
|
@min_vec = x.min(0)
|
@@ -53,22 +46,22 @@ module SVMKit
|
|
53
46
|
|
54
47
|
# Calculate the minimum and maximum values, and then normalize samples to feature_range.
|
55
48
|
#
|
56
|
-
# @overload fit_transform(x) ->
|
49
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
57
50
|
#
|
58
|
-
# @param x [
|
59
|
-
# @return [
|
51
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
52
|
+
# @return [Numo::DFloat] The scaled samples.
|
60
53
|
def fit_transform(x, _y = nil)
|
61
54
|
fit(x).transform(x)
|
62
55
|
end
|
63
56
|
|
64
57
|
# Perform scaling the given samples according to feature_range.
|
65
58
|
#
|
66
|
-
# @param x [
|
67
|
-
# @return [
|
59
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
60
|
+
# @return [Numo::DFloat] The scaled samples.
|
68
61
|
def transform(x)
|
69
62
|
n_samples, = x.shape
|
70
63
|
dif_vec = @max_vec - @min_vec
|
71
|
-
nx = (x - @min_vec.
|
64
|
+
nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
|
72
65
|
nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
|
73
66
|
end
|
74
67
|
|
@@ -76,16 +69,16 @@ module SVMKit
|
|
76
69
|
# @return [Hash] The marshal data about MinMaxScaler.
|
77
70
|
def marshal_dump
|
78
71
|
{ params: @params,
|
79
|
-
min_vec:
|
80
|
-
max_vec:
|
72
|
+
min_vec: @min_vec,
|
73
|
+
max_vec: @max_vec }
|
81
74
|
end
|
82
75
|
|
83
76
|
# Load marshal data.
|
84
77
|
# @return [nil]
|
85
78
|
def marshal_load(obj)
|
86
79
|
@params = obj[:params]
|
87
|
-
@min_vec =
|
88
|
-
@max_vec =
|
80
|
+
@min_vec = obj[:min_vec]
|
81
|
+
@max_vec = obj[:max_vec]
|
89
82
|
nil
|
90
83
|
end
|
91
84
|
end
|