svmkit 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -1
- data/HISTORY.md +4 -0
- data/README.md +33 -0
- data/lib/svmkit.rb +1 -0
- data/lib/svmkit/ensemble/ada_boost_regressor.rb +1 -1
- data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +1 -1
- data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +1 -1
- data/lib/svmkit/pipeline/pipeline.rb +187 -0
- data/lib/svmkit/preprocessing/l2_normalizer.rb +9 -0
- data/lib/svmkit/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f20192e678f6f066eb1d40c066f0e9a0efefd3a0
|
4
|
+
data.tar.gz: 1be802cdbbfb2ee7a641fb78d1409c2ee49b8450
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 43471c5a4ef290781d5d2270732313fbcffba60a4351805d6c7bb8abec7537bcd8ac50260600fbfb1ff52c947c45c3f6f19b9ccecd47e6015e6ac45da5c855a6
|
7
|
+
data.tar.gz: 908f675396a2da835b82da8cf117a4a17d6d90d489618cf110e993de6c03d6ec8e6651115df333033314b0f54c1e931f68da8ff541a1b5e22886741f48496259
|
data/.travis.yml
CHANGED
@@ -3,10 +3,13 @@ os: linux
|
|
3
3
|
dist: trusty
|
4
4
|
language: ruby
|
5
5
|
rvm:
|
6
|
+
- 2.1
|
6
7
|
- 2.2
|
7
8
|
- 2.3
|
8
9
|
- 2.4
|
9
10
|
- 2.5
|
10
11
|
- 2.6
|
11
12
|
before_install:
|
12
|
-
- gem
|
13
|
+
- travis_retry gem update --system || travis_retry gem update --system 2.7.8
|
14
|
+
- travis_retry gem install bundler --no-document || travis_retry gem install bundler --no-document -v 1.17.3
|
15
|
+
|
data/HISTORY.md
CHANGED
data/README.md
CHANGED
@@ -124,6 +124,39 @@ mean_logloss = report[:test_score].inject(:+) / kf.n_splits
|
|
124
124
|
puts("5-CV mean log-loss: %.3f" % mean_logloss)
|
125
125
|
```
|
126
126
|
|
127
|
+
### Example 3. Pipeline
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
require 'svmkit'
|
131
|
+
|
132
|
+
# Load dataset.
|
133
|
+
samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
|
134
|
+
samples = Numo::DFloat.cast(samples)
|
135
|
+
|
136
|
+
# Construct pipeline with kernel approximation and SVC.
|
137
|
+
rbf = SVMKit::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
|
138
|
+
svc = SVMKit::LinearModel::SVC.new(reg_param: 0.0001, max_iter: 1000, random_seed: 1)
|
139
|
+
pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
|
140
|
+
|
141
|
+
# Define the splitting strategy and cross validation.
|
142
|
+
kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
|
143
|
+
cv = SVMKit::ModelSelection::CrossValidation.new(estimator: pipeline, splitter: kf)
|
144
|
+
|
145
|
+
# Perform 5-cross validation.
|
146
|
+
report = cv.perform(samples, labels)
|
147
|
+
|
148
|
+
# Output result.
|
149
|
+
mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
|
150
|
+
puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
|
151
|
+
```
|
152
|
+
|
153
|
+
Execution of the above scripts result in the following.
|
154
|
+
|
155
|
+
```bash
|
156
|
+
$ ruby pipeline.rb
|
157
|
+
5-CV mean accuracy: 99.2 %
|
158
|
+
```
|
159
|
+
|
127
160
|
## Development
|
128
161
|
|
129
162
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/svmkit.rb
CHANGED
@@ -20,6 +20,7 @@ require 'svmkit/optimizer/sgd'
|
|
20
20
|
require 'svmkit/optimizer/rmsprop'
|
21
21
|
require 'svmkit/optimizer/nadam'
|
22
22
|
require 'svmkit/optimizer/yellow_fin'
|
23
|
+
require 'svmkit/pipeline/pipeline'
|
23
24
|
require 'svmkit/kernel_approximation/rbf'
|
24
25
|
require 'svmkit/linear_model/sgd_linear_estimator'
|
25
26
|
require 'svmkit/linear_model/svc'
|
@@ -8,7 +8,7 @@ require 'svmkit/tree/decision_tree_regressor'
|
|
8
8
|
|
9
9
|
module SVMKit
|
10
10
|
module Ensemble
|
11
|
-
# AdaBoostRegressor is a class that implements random forest for regression
|
11
|
+
# AdaBoostRegressor is a class that implements random forest for regression.
|
12
12
|
# This class uses decision tree for a weak learner.
|
13
13
|
#
|
14
14
|
# @example
|
@@ -12,7 +12,7 @@ module SVMKit
|
|
12
12
|
#
|
13
13
|
# @example
|
14
14
|
# estimator =
|
15
|
-
# SVMKit::
|
15
|
+
# SVMKit::NearestNeighbors::KNeighborsClassifier.new(n_neighbors = 5)
|
16
16
|
# estimator.fit(training_samples, traininig_labels)
|
17
17
|
# results = estimator.predict(testing_samples)
|
18
18
|
#
|
@@ -11,7 +11,7 @@ module SVMKit
|
|
11
11
|
#
|
12
12
|
# @example
|
13
13
|
# estimator =
|
14
|
-
# SVMKit::
|
14
|
+
# SVMKit::NearestNeighbors::KNeighborsRegressor.new(n_neighbors = 5)
|
15
15
|
# estimator.fit(training_samples, traininig_target_values)
|
16
16
|
# results = estimator.predict(testing_samples)
|
17
17
|
#
|
@@ -0,0 +1,187 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
|
6
|
+
module SVMKit
|
7
|
+
# Module implements utilities of pipeline that cosists of a chain of transfomers and estimators.
|
8
|
+
module Pipeline
|
9
|
+
# Pipeline is a class that implements the function to perform the transformers and estimators sequencially.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# rbf = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
|
13
|
+
# svc = SVMKit::LinearModel::SVC.new(reg_param: 1.0, fit_bias: true, max_iter: 5000, random_seed: 1)
|
14
|
+
# pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trs: rbf, est: svc })
|
15
|
+
# pipeline.fit(training_samples, traininig_labels)
|
16
|
+
# results = pipeline.predict(testing_samples)
|
17
|
+
#
|
18
|
+
class Pipeline
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Validation
|
21
|
+
|
22
|
+
# Return the steps.
|
23
|
+
# @return [Hash]
|
24
|
+
attr_reader :steps
|
25
|
+
|
26
|
+
# Create a new pipeline.
|
27
|
+
#
|
28
|
+
# @param steps [Hash] List of transformers and estimators. The order of transforms follows the insertion order of hash keys.
|
29
|
+
# The last entry is considered an estimator.
|
30
|
+
def initialize(steps:)
|
31
|
+
check_params_type(Hash, steps: steps)
|
32
|
+
validate_steps(steps)
|
33
|
+
@params = {}
|
34
|
+
@steps = steps
|
35
|
+
end
|
36
|
+
|
37
|
+
# Fit the model with given training data.
|
38
|
+
#
|
39
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
|
40
|
+
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
|
41
|
+
# @return [Pipeline] The learned pipeline itself.
|
42
|
+
def fit(x, y)
|
43
|
+
trans_x = apply_transforms(x, y, fit: true)
|
44
|
+
last_estimator.fit(trans_x, y) unless last_estimator.nil?
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
# Call the fit_predict method of last estimator after applying all transforms.
|
49
|
+
#
|
50
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
|
51
|
+
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
52
|
+
# @return [Numo::NArray] The predicted results by last estimator.
|
53
|
+
def fit_predict(x, y = nil)
|
54
|
+
trans_x = apply_transforms(x, y, fit: true)
|
55
|
+
last_estimator.fit_predict(trans_x)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Call the fit_transform method of last estimator after applying all transforms.
|
59
|
+
#
|
60
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
|
61
|
+
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
62
|
+
# @return [Numo::NArray] The predicted results by last estimator.
|
63
|
+
def fit_transform(x, y = nil)
|
64
|
+
trans_x = apply_transforms(x, y, fit: true)
|
65
|
+
last_estimator.fit_transform(trans_x, y)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Call the decision_function method of last estimator after applying all transforms.
|
69
|
+
#
|
70
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
71
|
+
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
72
|
+
def decision_function(x)
|
73
|
+
trans_x = apply_transforms(x)
|
74
|
+
last_estimator.decision_function(trans_x)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Call the predict method of last estimator after applying all transforms.
|
78
|
+
#
|
79
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
|
80
|
+
# @return [Numo::NArray] The predicted results by last estimator.
|
81
|
+
def predict(x)
|
82
|
+
trans_x = apply_transforms(x)
|
83
|
+
last_estimator.predict(trans_x)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Call the predict_log_proba method of last estimator after applying all transforms.
|
87
|
+
#
|
88
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
89
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
90
|
+
def predict_log_proba(x)
|
91
|
+
trans_x = apply_transforms(x)
|
92
|
+
last_estimator.predict_log_proba(trans_x)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Call the predict_proba method of last estimator after applying all transforms.
|
96
|
+
#
|
97
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
98
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
99
|
+
def predict_proba(x)
|
100
|
+
trans_x = apply_transforms(x)
|
101
|
+
last_estimator.predict_proba(trans_x)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Call the transform method of last estimator after applying all transforms.
|
105
|
+
#
|
106
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
|
107
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
|
108
|
+
def transform(x)
|
109
|
+
trans_x = apply_transforms(x)
|
110
|
+
last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Call the inverse_transform method in reverse order.
|
114
|
+
#
|
115
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
|
116
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
|
117
|
+
def inverse_transform(z)
|
118
|
+
itrans_z = z
|
119
|
+
@steps.keys.reverse.each do |name|
|
120
|
+
transformer = @steps[name]
|
121
|
+
next if transformer.nil?
|
122
|
+
itrans_z = transformer.inverse_transform(itrans_z)
|
123
|
+
end
|
124
|
+
itrans_z
|
125
|
+
end
|
126
|
+
|
127
|
+
# Call the score method of last estimator after applying all transforms.
|
128
|
+
#
|
129
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
130
|
+
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
|
131
|
+
# @return [Float] The score of last estimator
|
132
|
+
def score(x, y)
|
133
|
+
trans_x = apply_transforms(x)
|
134
|
+
last_estimator.score(trans_x, y)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Dump marshal data.
|
138
|
+
# @return [Hash] The marshal data about Pipeline.
|
139
|
+
def marshal_dump
|
140
|
+
{ params: @params,
|
141
|
+
steps: @steps }
|
142
|
+
end
|
143
|
+
|
144
|
+
# Load marshal data.
|
145
|
+
# @return [nil]
|
146
|
+
def marshal_load(obj)
|
147
|
+
@params = obj[:params]
|
148
|
+
@steps = obj[:steps]
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def validate_steps(steps)
|
155
|
+
steps.keys[0...-1].each do |name|
|
156
|
+
transformer = steps[name]
|
157
|
+
next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
|
158
|
+
raise TypeError,
|
159
|
+
'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
|
160
|
+
"#{name} => #{transformer.class}"
|
161
|
+
end
|
162
|
+
|
163
|
+
estimator = steps[steps.keys.last]
|
164
|
+
unless estimator.nil? || estimator.class.method_defined?(:fit)
|
165
|
+
raise TypeError,
|
166
|
+
'Class of last step in pipeline should be implemented fit method: ' \
|
167
|
+
"#{steps.keys.last} => #{estimator.class}"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def apply_transforms(x, y = nil, fit: false)
|
172
|
+
trans_x = x
|
173
|
+
@steps.keys[0...-1].each do |name|
|
174
|
+
transformer = @steps[name]
|
175
|
+
next if transformer.nil?
|
176
|
+
transformer.fit(trans_x, y) if fit
|
177
|
+
trans_x = transformer.transform(trans_x)
|
178
|
+
end
|
179
|
+
trans_x
|
180
|
+
end
|
181
|
+
|
182
|
+
def last_estimator
|
183
|
+
@steps[@steps.keys.last]
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
@@ -49,6 +49,15 @@ module SVMKit
|
|
49
49
|
fit(x)
|
50
50
|
x / @norm_vec.tile(x.shape[1], 1).transpose
|
51
51
|
end
|
52
|
+
|
53
|
+
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
54
|
+
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
55
|
+
#
|
56
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
57
|
+
# @return [Numo::DFloat] The normalized samples.
|
58
|
+
def transform(x)
|
59
|
+
fit_transform(x)
|
60
|
+
end
|
52
61
|
end
|
53
62
|
end
|
54
63
|
end
|
data/lib/svmkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -157,6 +157,7 @@ files:
|
|
157
157
|
- lib/svmkit/optimizer/sgd.rb
|
158
158
|
- lib/svmkit/optimizer/yellow_fin.rb
|
159
159
|
- lib/svmkit/pairwise_metric.rb
|
160
|
+
- lib/svmkit/pipeline/pipeline.rb
|
160
161
|
- lib/svmkit/polynomial_model/factorization_machine_classifier.rb
|
161
162
|
- lib/svmkit/polynomial_model/factorization_machine_regressor.rb
|
162
163
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|