svmkit 0.7.1 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f708500e8ba450849736cd02b152b30e302c1bb
4
- data.tar.gz: 210256fcd91375e96b4d68015fae2bf2a5b0d4be
3
+ metadata.gz: f20192e678f6f066eb1d40c066f0e9a0efefd3a0
4
+ data.tar.gz: 1be802cdbbfb2ee7a641fb78d1409c2ee49b8450
5
5
  SHA512:
6
- metadata.gz: 4a03201b32dc7a5c0db43bfdfe742cc5b369449c2ce0dc63f4905e3e0b9186276f5b382c1b6bff45acdfb91f61712befc58538c1dfa74874722127bec5e2bf03
7
- data.tar.gz: deea7c688685935ebe2574448973903115e727cc479b12ea42f382ea697f7218b0de27cccf978c7ea0f4aa0f0fa0bfd5a97ccbbb88e2ea0b87cd1a5ca00cc5f3
6
+ metadata.gz: 43471c5a4ef290781d5d2270732313fbcffba60a4351805d6c7bb8abec7537bcd8ac50260600fbfb1ff52c947c45c3f6f19b9ccecd47e6015e6ac45da5c855a6
7
+ data.tar.gz: 908f675396a2da835b82da8cf117a4a17d6d90d489618cf110e993de6c03d6ec8e6651115df333033314b0f54c1e931f68da8ff541a1b5e22886741f48496259
data/.travis.yml CHANGED
@@ -3,10 +3,13 @@ os: linux
3
3
  dist: trusty
4
4
  language: ruby
5
5
  rvm:
6
+ - 2.1
6
7
  - 2.2
7
8
  - 2.3
8
9
  - 2.4
9
10
  - 2.5
10
11
  - 2.6
11
12
  before_install:
12
- - gem install --no-document bundler -v '>= 1.17'
13
+ - travis_retry gem update --system || travis_retry gem update --system 2.7.8
14
+ - travis_retry gem install bundler --no-document || travis_retry gem install bundler --no-document -v 1.17.3
15
+
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.7.2
2
+ - Add class for Pipeline that constructs chain of transformers and estimators.
3
+ - Fix some typos on document.
4
+
1
5
  # 0.7.1
2
6
  - Fix to use CSV class in parsing libsvm format file.
3
7
  - Refactor ensemble estimators.
data/README.md CHANGED
@@ -124,6 +124,39 @@ mean_logloss = report[:test_score].inject(:+) / kf.n_splits
124
124
  puts("5-CV mean log-loss: %.3f" % mean_logloss)
125
125
  ```
126
126
 
127
+ ### Example 3. Pipeline
128
+
129
+ ```ruby
130
+ require 'svmkit'
131
+
132
+ # Load dataset.
133
+ samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
134
+ samples = Numo::DFloat.cast(samples)
135
+
136
+ # Construct pipeline with kernel approximation and SVC.
137
+ rbf = SVMKit::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
138
+ svc = SVMKit::LinearModel::SVC.new(reg_param: 0.0001, max_iter: 1000, random_seed: 1)
139
+ pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
140
+
141
+ # Define the splitting strategy and cross validation.
142
+ kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
143
+ cv = SVMKit::ModelSelection::CrossValidation.new(estimator: pipeline, splitter: kf)
144
+
145
+ # Perform 5-cross validation.
146
+ report = cv.perform(samples, labels)
147
+
148
+ # Output result.
149
+ mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
150
+ puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
151
+ ```
152
+
153
+ Execution of the above scripts result in the following.
154
+
155
+ ```bash
156
+ $ ruby pipeline.rb
157
+ 5-CV mean accuracy: 99.2 %
158
+ ```
159
+
127
160
  ## Development
128
161
 
129
162
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/lib/svmkit.rb CHANGED
@@ -20,6 +20,7 @@ require 'svmkit/optimizer/sgd'
20
20
  require 'svmkit/optimizer/rmsprop'
21
21
  require 'svmkit/optimizer/nadam'
22
22
  require 'svmkit/optimizer/yellow_fin'
23
+ require 'svmkit/pipeline/pipeline'
23
24
  require 'svmkit/kernel_approximation/rbf'
24
25
  require 'svmkit/linear_model/sgd_linear_estimator'
25
26
  require 'svmkit/linear_model/svc'
@@ -8,7 +8,7 @@ require 'svmkit/tree/decision_tree_regressor'
8
8
 
9
9
  module SVMKit
10
10
  module Ensemble
11
- # AdaBoostRegressor is a class that implements random forest for regression
11
+ # AdaBoostRegressor is a class that implements random forest for regression.
12
12
  # This class uses decision tree for a weak learner.
13
13
  #
14
14
  # @example
@@ -12,7 +12,7 @@ module SVMKit
12
12
  #
13
13
  # @example
14
14
  # estimator =
15
- # SVMKit::NearestNeighbor::KNeighborsClassifier.new(n_neighbors = 5)
15
+ # SVMKit::NearestNeighbors::KNeighborsClassifier.new(n_neighbors = 5)
16
16
  # estimator.fit(training_samples, traininig_labels)
17
17
  # results = estimator.predict(testing_samples)
18
18
  #
@@ -11,7 +11,7 @@ module SVMKit
11
11
  #
12
12
  # @example
13
13
  # estimator =
14
- # SVMKit::NearestNeighbor::KNeighborsRegressor.new(n_neighbors = 5)
14
+ # SVMKit::NearestNeighbors::KNeighborsRegressor.new(n_neighbors = 5)
15
15
  # estimator.fit(training_samples, traininig_target_values)
16
16
  # results = estimator.predict(testing_samples)
17
17
  #
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+
6
+ module SVMKit
7
+ # Module implements utilities of pipeline that cosists of a chain of transfomers and estimators.
8
+ module Pipeline
9
+ # Pipeline is a class that implements the function to perform the transformers and estimators sequencially.
10
+ #
11
+ # @example
12
+ # rbf = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
13
+ # svc = SVMKit::LinearModel::SVC.new(reg_param: 1.0, fit_bias: true, max_iter: 5000, random_seed: 1)
14
+ # pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trs: rbf, est: svc })
15
+ # pipeline.fit(training_samples, traininig_labels)
16
+ # results = pipeline.predict(testing_samples)
17
+ #
18
+ class Pipeline
19
+ include Base::BaseEstimator
20
+ include Validation
21
+
22
+ # Return the steps.
23
+ # @return [Hash]
24
+ attr_reader :steps
25
+
26
+ # Create a new pipeline.
27
+ #
28
+ # @param steps [Hash] List of transformers and estimators. The order of transforms follows the insertion order of hash keys.
29
+ # The last entry is considered an estimator.
30
+ def initialize(steps:)
31
+ check_params_type(Hash, steps: steps)
32
+ validate_steps(steps)
33
+ @params = {}
34
+ @steps = steps
35
+ end
36
+
37
+ # Fit the model with given training data.
38
+ #
39
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
40
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
41
+ # @return [Pipeline] The learned pipeline itself.
42
+ def fit(x, y)
43
+ trans_x = apply_transforms(x, y, fit: true)
44
+ last_estimator.fit(trans_x, y) unless last_estimator.nil?
45
+ self
46
+ end
47
+
48
+ # Call the fit_predict method of last estimator after applying all transforms.
49
+ #
50
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
51
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
52
+ # @return [Numo::NArray] The predicted results by last estimator.
53
+ def fit_predict(x, y = nil)
54
+ trans_x = apply_transforms(x, y, fit: true)
55
+ last_estimator.fit_predict(trans_x)
56
+ end
57
+
58
+ # Call the fit_transform method of last estimator after applying all transforms.
59
+ #
60
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
61
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
62
+ # @return [Numo::NArray] The predicted results by last estimator.
63
+ def fit_transform(x, y = nil)
64
+ trans_x = apply_transforms(x, y, fit: true)
65
+ last_estimator.fit_transform(trans_x, y)
66
+ end
67
+
68
+ # Call the decision_function method of last estimator after applying all transforms.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
71
+ # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
72
+ def decision_function(x)
73
+ trans_x = apply_transforms(x)
74
+ last_estimator.decision_function(trans_x)
75
+ end
76
+
77
+ # Call the predict method of last estimator after applying all transforms.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
80
+ # @return [Numo::NArray] The predicted results by last estimator.
81
+ def predict(x)
82
+ trans_x = apply_transforms(x)
83
+ last_estimator.predict(trans_x)
84
+ end
85
+
86
+ # Call the predict_log_proba method of last estimator after applying all transforms.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
89
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
90
+ def predict_log_proba(x)
91
+ trans_x = apply_transforms(x)
92
+ last_estimator.predict_log_proba(trans_x)
93
+ end
94
+
95
+ # Call the predict_proba method of last estimator after applying all transforms.
96
+ #
97
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
98
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
99
+ def predict_proba(x)
100
+ trans_x = apply_transforms(x)
101
+ last_estimator.predict_proba(trans_x)
102
+ end
103
+
104
+ # Call the transform method of last estimator after applying all transforms.
105
+ #
106
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
107
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
108
+ def transform(x)
109
+ trans_x = apply_transforms(x)
110
+ last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
111
+ end
112
+
113
+ # Call the inverse_transform method in reverse order.
114
+ #
115
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
116
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
117
+ def inverse_transform(z)
118
+ itrans_z = z
119
+ @steps.keys.reverse.each do |name|
120
+ transformer = @steps[name]
121
+ next if transformer.nil?
122
+ itrans_z = transformer.inverse_transform(itrans_z)
123
+ end
124
+ itrans_z
125
+ end
126
+
127
+ # Call the score method of last estimator after applying all transforms.
128
+ #
129
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
130
+ # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
131
+ # @return [Float] The score of last estimator
132
+ def score(x, y)
133
+ trans_x = apply_transforms(x)
134
+ last_estimator.score(trans_x, y)
135
+ end
136
+
137
+ # Dump marshal data.
138
+ # @return [Hash] The marshal data about Pipeline.
139
+ def marshal_dump
140
+ { params: @params,
141
+ steps: @steps }
142
+ end
143
+
144
+ # Load marshal data.
145
+ # @return [nil]
146
+ def marshal_load(obj)
147
+ @params = obj[:params]
148
+ @steps = obj[:steps]
149
+ nil
150
+ end
151
+
152
+ private
153
+
154
+ def validate_steps(steps)
155
+ steps.keys[0...-1].each do |name|
156
+ transformer = steps[name]
157
+ next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
158
+ raise TypeError,
159
+ 'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
160
+ "#{name} => #{transformer.class}"
161
+ end
162
+
163
+ estimator = steps[steps.keys.last]
164
+ unless estimator.nil? || estimator.class.method_defined?(:fit)
165
+ raise TypeError,
166
+ 'Class of last step in pipeline should be implemented fit method: ' \
167
+ "#{steps.keys.last} => #{estimator.class}"
168
+ end
169
+ end
170
+
171
+ def apply_transforms(x, y = nil, fit: false)
172
+ trans_x = x
173
+ @steps.keys[0...-1].each do |name|
174
+ transformer = @steps[name]
175
+ next if transformer.nil?
176
+ transformer.fit(trans_x, y) if fit
177
+ trans_x = transformer.transform(trans_x)
178
+ end
179
+ trans_x
180
+ end
181
+
182
+ def last_estimator
183
+ @steps[@steps.keys.last]
184
+ end
185
+ end
186
+ end
187
+ end
@@ -49,6 +49,15 @@ module SVMKit
49
49
  fit(x)
50
50
  x / @norm_vec.tile(x.shape[1], 1).transpose
51
51
  end
52
+
53
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
54
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
55
+ #
56
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
57
+ # @return [Numo::DFloat] The normalized samples.
58
+ def transform(x)
59
+ fit_transform(x)
60
+ end
52
61
  end
53
62
  end
54
63
  end
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.7.1'.freeze
6
+ VERSION = '0.7.2'.freeze
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-12-17 00:00:00.000000000 Z
11
+ date: 2019-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -157,6 +157,7 @@ files:
157
157
  - lib/svmkit/optimizer/sgd.rb
158
158
  - lib/svmkit/optimizer/yellow_fin.rb
159
159
  - lib/svmkit/pairwise_metric.rb
160
+ - lib/svmkit/pipeline/pipeline.rb
160
161
  - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
161
162
  - lib/svmkit/polynomial_model/factorization_machine_regressor.rb
162
163
  - lib/svmkit/preprocessing/l2_normalizer.rb