svmkit 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f708500e8ba450849736cd02b152b30e302c1bb
4
- data.tar.gz: 210256fcd91375e96b4d68015fae2bf2a5b0d4be
3
+ metadata.gz: f20192e678f6f066eb1d40c066f0e9a0efefd3a0
4
+ data.tar.gz: 1be802cdbbfb2ee7a641fb78d1409c2ee49b8450
5
5
  SHA512:
6
- metadata.gz: 4a03201b32dc7a5c0db43bfdfe742cc5b369449c2ce0dc63f4905e3e0b9186276f5b382c1b6bff45acdfb91f61712befc58538c1dfa74874722127bec5e2bf03
7
- data.tar.gz: deea7c688685935ebe2574448973903115e727cc479b12ea42f382ea697f7218b0de27cccf978c7ea0f4aa0f0fa0bfd5a97ccbbb88e2ea0b87cd1a5ca00cc5f3
6
+ metadata.gz: 43471c5a4ef290781d5d2270732313fbcffba60a4351805d6c7bb8abec7537bcd8ac50260600fbfb1ff52c947c45c3f6f19b9ccecd47e6015e6ac45da5c855a6
7
+ data.tar.gz: 908f675396a2da835b82da8cf117a4a17d6d90d489618cf110e993de6c03d6ec8e6651115df333033314b0f54c1e931f68da8ff541a1b5e22886741f48496259
data/.travis.yml CHANGED
@@ -3,10 +3,13 @@ os: linux
3
3
  dist: trusty
4
4
  language: ruby
5
5
  rvm:
6
+ - 2.1
6
7
  - 2.2
7
8
  - 2.3
8
9
  - 2.4
9
10
  - 2.5
10
11
  - 2.6
11
12
  before_install:
12
- - gem install --no-document bundler -v '>= 1.17'
13
+ - travis_retry gem update --system || travis_retry gem update --system 2.7.8
14
+ - travis_retry gem install bundler --no-document || travis_retry gem install bundler --no-document -v 1.17.3
15
+
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.7.2
2
+ - Add class for Pipeline that constructs chain of transformers and estimators.
3
+ - Fix some typos on document.
4
+
1
5
  # 0.7.1
2
6
  - Fix to use CSV class in parsing libsvm format file.
3
7
  - Refactor ensemble estimators.
data/README.md CHANGED
@@ -124,6 +124,39 @@ mean_logloss = report[:test_score].inject(:+) / kf.n_splits
124
124
  puts("5-CV mean log-loss: %.3f" % mean_logloss)
125
125
  ```
126
126
 
127
+ ### Example 3. Pipeline
128
+
129
+ ```ruby
130
+ require 'svmkit'
131
+
132
+ # Load dataset.
133
+ samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
134
+ samples = Numo::DFloat.cast(samples)
135
+
136
+ # Construct pipeline with kernel approximation and SVC.
137
+ rbf = SVMKit::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
138
+ svc = SVMKit::LinearModel::SVC.new(reg_param: 0.0001, max_iter: 1000, random_seed: 1)
139
+ pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
140
+
141
+ # Define the splitting strategy and cross validation.
142
+ kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
143
+ cv = SVMKit::ModelSelection::CrossValidation.new(estimator: pipeline, splitter: kf)
144
+
145
+ # Perform 5-cross validation.
146
+ report = cv.perform(samples, labels)
147
+
148
+ # Output result.
149
+ mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
150
+ puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
151
+ ```
152
+
153
+ Execution of the above scripts result in the following.
154
+
155
+ ```bash
156
+ $ ruby pipeline.rb
157
+ 5-CV mean accuracy: 99.2 %
158
+ ```
159
+
127
160
  ## Development
128
161
 
129
162
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/lib/svmkit.rb CHANGED
@@ -20,6 +20,7 @@ require 'svmkit/optimizer/sgd'
20
20
  require 'svmkit/optimizer/rmsprop'
21
21
  require 'svmkit/optimizer/nadam'
22
22
  require 'svmkit/optimizer/yellow_fin'
23
+ require 'svmkit/pipeline/pipeline'
23
24
  require 'svmkit/kernel_approximation/rbf'
24
25
  require 'svmkit/linear_model/sgd_linear_estimator'
25
26
  require 'svmkit/linear_model/svc'
@@ -8,7 +8,7 @@ require 'svmkit/tree/decision_tree_regressor'
8
8
 
9
9
  module SVMKit
10
10
  module Ensemble
11
- # AdaBoostRegressor is a class that implements random forest for regression
11
+ # AdaBoostRegressor is a class that implements random forest for regression.
12
12
  # This class uses decision tree for a weak learner.
13
13
  #
14
14
  # @example
@@ -12,7 +12,7 @@ module SVMKit
12
12
  #
13
13
  # @example
14
14
  # estimator =
15
- # SVMKit::NearestNeighbor::KNeighborsClassifier.new(n_neighbors = 5)
15
+ # SVMKit::NearestNeighbors::KNeighborsClassifier.new(n_neighbors = 5)
16
16
  # estimator.fit(training_samples, traininig_labels)
17
17
  # results = estimator.predict(testing_samples)
18
18
  #
@@ -11,7 +11,7 @@ module SVMKit
11
11
  #
12
12
  # @example
13
13
  # estimator =
14
- # SVMKit::NearestNeighbor::KNeighborsRegressor.new(n_neighbors = 5)
14
+ # SVMKit::NearestNeighbors::KNeighborsRegressor.new(n_neighbors = 5)
15
15
  # estimator.fit(training_samples, traininig_target_values)
16
16
  # results = estimator.predict(testing_samples)
17
17
  #
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+
6
+ module SVMKit
7
+ # Module implements utilities of pipeline that cosists of a chain of transfomers and estimators.
8
+ module Pipeline
9
+ # Pipeline is a class that implements the function to perform the transformers and estimators sequencially.
10
+ #
11
+ # @example
12
+ # rbf = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
13
+ # svc = SVMKit::LinearModel::SVC.new(reg_param: 1.0, fit_bias: true, max_iter: 5000, random_seed: 1)
14
+ # pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trs: rbf, est: svc })
15
+ # pipeline.fit(training_samples, traininig_labels)
16
+ # results = pipeline.predict(testing_samples)
17
+ #
18
+ class Pipeline
19
+ include Base::BaseEstimator
20
+ include Validation
21
+
22
+ # Return the steps.
23
+ # @return [Hash]
24
+ attr_reader :steps
25
+
26
+ # Create a new pipeline.
27
+ #
28
+ # @param steps [Hash] List of transformers and estimators. The order of transforms follows the insertion order of hash keys.
29
+ # The last entry is considered an estimator.
30
+ def initialize(steps:)
31
+ check_params_type(Hash, steps: steps)
32
+ validate_steps(steps)
33
+ @params = {}
34
+ @steps = steps
35
+ end
36
+
37
+ # Fit the model with given training data.
38
+ #
39
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
40
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
41
+ # @return [Pipeline] The learned pipeline itself.
42
+ def fit(x, y)
43
+ trans_x = apply_transforms(x, y, fit: true)
44
+ last_estimator.fit(trans_x, y) unless last_estimator.nil?
45
+ self
46
+ end
47
+
48
+ # Call the fit_predict method of last estimator after applying all transforms.
49
+ #
50
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
51
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
52
+ # @return [Numo::NArray] The predicted results by last estimator.
53
+ def fit_predict(x, y = nil)
54
+ trans_x = apply_transforms(x, y, fit: true)
55
+ last_estimator.fit_predict(trans_x)
56
+ end
57
+
58
+ # Call the fit_transform method of last estimator after applying all transforms.
59
+ #
60
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
61
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
62
+ # @return [Numo::NArray] The predicted results by last estimator.
63
+ def fit_transform(x, y = nil)
64
+ trans_x = apply_transforms(x, y, fit: true)
65
+ last_estimator.fit_transform(trans_x, y)
66
+ end
67
+
68
+ # Call the decision_function method of last estimator after applying all transforms.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
71
+ # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
72
+ def decision_function(x)
73
+ trans_x = apply_transforms(x)
74
+ last_estimator.decision_function(trans_x)
75
+ end
76
+
77
+ # Call the predict method of last estimator after applying all transforms.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
80
+ # @return [Numo::NArray] The predicted results by last estimator.
81
+ def predict(x)
82
+ trans_x = apply_transforms(x)
83
+ last_estimator.predict(trans_x)
84
+ end
85
+
86
+ # Call the predict_log_proba method of last estimator after applying all transforms.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
89
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
90
+ def predict_log_proba(x)
91
+ trans_x = apply_transforms(x)
92
+ last_estimator.predict_log_proba(trans_x)
93
+ end
94
+
95
+ # Call the predict_proba method of last estimator after applying all transforms.
96
+ #
97
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
98
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
99
+ def predict_proba(x)
100
+ trans_x = apply_transforms(x)
101
+ last_estimator.predict_proba(trans_x)
102
+ end
103
+
104
+ # Call the transform method of last estimator after applying all transforms.
105
+ #
106
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
107
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
108
+ def transform(x)
109
+ trans_x = apply_transforms(x)
110
+ last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
111
+ end
112
+
113
+ # Call the inverse_transform method in reverse order.
114
+ #
115
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
116
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
117
+ def inverse_transform(z)
118
+ itrans_z = z
119
+ @steps.keys.reverse.each do |name|
120
+ transformer = @steps[name]
121
+ next if transformer.nil?
122
+ itrans_z = transformer.inverse_transform(itrans_z)
123
+ end
124
+ itrans_z
125
+ end
126
+
127
+ # Call the score method of last estimator after applying all transforms.
128
+ #
129
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
130
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
131
+ # @return [Float] The score of last estimator
132
+ def score(x, y)
133
+ trans_x = apply_transforms(x)
134
+ last_estimator.score(trans_x, y)
135
+ end
136
+
137
+ # Dump marshal data.
138
+ # @return [Hash] The marshal data about Pipeline.
139
+ def marshal_dump
140
+ { params: @params,
141
+ steps: @steps }
142
+ end
143
+
144
+ # Load marshal data.
145
+ # @return [nil]
146
+ def marshal_load(obj)
147
+ @params = obj[:params]
148
+ @steps = obj[:steps]
149
+ nil
150
+ end
151
+
152
+ private
153
+
154
+ def validate_steps(steps)
155
+ steps.keys[0...-1].each do |name|
156
+ transformer = steps[name]
157
+ next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
158
+ raise TypeError,
159
+ 'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
160
+ "#{name} => #{transformer.class}"
161
+ end
162
+
163
+ estimator = steps[steps.keys.last]
164
+ unless estimator.nil? || estimator.class.method_defined?(:fit)
165
+ raise TypeError,
166
+ 'Class of last step in pipeline should be implemented fit method: ' \
167
+ "#{steps.keys.last} => #{estimator.class}"
168
+ end
169
+ end
170
+
171
+ def apply_transforms(x, y = nil, fit: false)
172
+ trans_x = x
173
+ @steps.keys[0...-1].each do |name|
174
+ transformer = @steps[name]
175
+ next if transformer.nil?
176
+ transformer.fit(trans_x, y) if fit
177
+ trans_x = transformer.transform(trans_x)
178
+ end
179
+ trans_x
180
+ end
181
+
182
+ def last_estimator
183
+ @steps[@steps.keys.last]
184
+ end
185
+ end
186
+ end
187
+ end
@@ -49,6 +49,15 @@ module SVMKit
49
49
  fit(x)
50
50
  x / @norm_vec.tile(x.shape[1], 1).transpose
51
51
  end
52
+
53
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
54
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
55
+ #
56
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
57
+ # @return [Numo::DFloat] The normalized samples.
58
+ def transform(x)
59
+ fit_transform(x)
60
+ end
52
61
  end
53
62
  end
54
63
  end
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.7.1'.freeze
6
+ VERSION = '0.7.2'.freeze
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-17 00:00:00.000000000 Z
11
+ date: 2019-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -157,6 +157,7 @@ files:
157
157
  - lib/svmkit/optimizer/sgd.rb
158
158
  - lib/svmkit/optimizer/yellow_fin.rb
159
159
  - lib/svmkit/pairwise_metric.rb
160
+ - lib/svmkit/pipeline/pipeline.rb
160
161
  - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
161
162
  - lib/svmkit/polynomial_model/factorization_machine_regressor.rb
162
163
  - lib/svmkit/preprocessing/l2_normalizer.rb