RubyGems - svmkit - Versions diffs - 0.7.1 → 0.7.2 - Mend

svmkit 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/.travis.yml +4 -1
data/HISTORY.md +4 -0
data/README.md +33 -0
data/lib/svmkit.rb +1 -0
data/lib/svmkit/ensemble/ada_boost_regressor.rb +1 -1
data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +1 -1
data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +1 -1
data/lib/svmkit/pipeline/pipeline.rb +187 -0
data/lib/svmkit/preprocessing/l2_normalizer.rb +9 -0
data/lib/svmkit/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1f708500e8ba450849736cd02b152b30e302c1bb
-  data.tar.gz: 210256fcd91375e96b4d68015fae2bf2a5b0d4be
+  metadata.gz: f20192e678f6f066eb1d40c066f0e9a0efefd3a0
+  data.tar.gz: 1be802cdbbfb2ee7a641fb78d1409c2ee49b8450
 SHA512:
-  metadata.gz: 4a03201b32dc7a5c0db43bfdfe742cc5b369449c2ce0dc63f4905e3e0b9186276f5b382c1b6bff45acdfb91f61712befc58538c1dfa74874722127bec5e2bf03
-  data.tar.gz: deea7c688685935ebe2574448973903115e727cc479b12ea42f382ea697f7218b0de27cccf978c7ea0f4aa0f0fa0bfd5a97ccbbb88e2ea0b87cd1a5ca00cc5f3
+  metadata.gz: 43471c5a4ef290781d5d2270732313fbcffba60a4351805d6c7bb8abec7537bcd8ac50260600fbfb1ff52c947c45c3f6f19b9ccecd47e6015e6ac45da5c855a6
+  data.tar.gz: 908f675396a2da835b82da8cf117a4a17d6d90d489618cf110e993de6c03d6ec8e6651115df333033314b0f54c1e931f68da8ff541a1b5e22886741f48496259

data/.travis.yml CHANGED Viewed

@@ -3,10 +3,13 @@ os: linux
 dist: trusty
 language: ruby
 rvm:
+  - 2.1
   - 2.2
   - 2.3
   - 2.4
   - 2.5
   - 2.6
 before_install:
-  - gem install --no-document bundler -v '>= 1.17'
+  - travis_retry gem update --system || travis_retry gem update --system 2.7.8
+  - travis_retry gem install bundler --no-document || travis_retry gem install bundler --no-document -v 1.17.3

data/HISTORY.md CHANGED Viewed

@@ -1,3 +1,7 @@
+# 0.7.2
+- Add class for Pipeline that constructs chain of transformers and estimators.
+- Fix some typos on document.
 # 0.7.1
 - Fix to use CSV class in parsing libsvm format file.
 - Refactor ensemble estimators.

data/README.md CHANGED Viewed

@@ -124,6 +124,39 @@ mean_logloss = report[:test_score].inject(:+) / kf.n_splits
 puts("5-CV mean log-loss: %.3f" % mean_logloss)
 ```
+### Example 3. Pipeline
+```ruby
+require 'svmkit'
+# Load dataset.
+samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
+samples = Numo::DFloat.cast(samples)
+# Construct pipeline with kernel approximation and SVC.
+rbf = SVMKit::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
+svc = SVMKit::LinearModel::SVC.new(reg_param: 0.0001, max_iter: 1000, random_seed: 1)
+pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
+# Define the splitting strategy and cross validation.
+kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
+cv = SVMKit::ModelSelection::CrossValidation.new(estimator: pipeline, splitter: kf)
+# Perform 5-cross validation.
+report = cv.perform(samples, labels)
+# Output result.
+mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
+puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
+```
+Execution of the above scripts result in the following.
+```bash
+$ ruby pipeline.rb
+5-CV mean accuracy: 99.2 %
+```
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/svmkit.rb CHANGED Viewed

@@ -20,6 +20,7 @@ require 'svmkit/optimizer/sgd'
 require 'svmkit/optimizer/rmsprop'
 require 'svmkit/optimizer/nadam'
 require 'svmkit/optimizer/yellow_fin'
+require 'svmkit/pipeline/pipeline'
 require 'svmkit/kernel_approximation/rbf'
 require 'svmkit/linear_model/sgd_linear_estimator'
 require 'svmkit/linear_model/svc'

data/lib/svmkit/ensemble/ada_boost_regressor.rb CHANGED Viewed

@@ -8,7 +8,7 @@ require 'svmkit/tree/decision_tree_regressor'
 module SVMKit
   module Ensemble
-    # AdaBoostRegressor is a class that implements random forest for regression
+    # AdaBoostRegressor is a class that implements random forest for regression.
     # This class uses decision tree for a weak learner.
     #
     # @example

data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module SVMKit
     #
     # @example
     #   estimator =
-    #     SVMKit::NearestNeighbor::KNeighborsClassifier.new(n_neighbors = 5)
+    #     SVMKit::NearestNeighbors::KNeighborsClassifier.new(n_neighbors = 5)
     #   estimator.fit(training_samples, traininig_labels)
     #   results = estimator.predict(testing_samples)
     #

data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module SVMKit
     #
     # @example
     #   estimator =
-    #     SVMKit::NearestNeighbor::KNeighborsRegressor.new(n_neighbors = 5)
+    #     SVMKit::NearestNeighbors::KNeighborsRegressor.new(n_neighbors = 5)
     #   estimator.fit(training_samples, traininig_target_values)
     #   results = estimator.predict(testing_samples)
     #

data/lib/svmkit/pipeline/pipeline.rb ADDED Viewed

@@ -0,0 +1,187 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+module SVMKit
+  # Module implements utilities of pipeline that cosists of a chain of transfomers and estimators.
+  module Pipeline
+    # Pipeline is a class that implements the function to perform the transformers and estimators sequencially.
+    #
+    # @example
+    #   rbf = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
+    #   svc = SVMKit::LinearModel::SVC.new(reg_param: 1.0, fit_bias: true, max_iter: 5000, random_seed: 1)
+    #   pipeline = SVMKit::Pipeline::Pipeline.new(steps: { trs: rbf, est: svc })
+    #   pipeline.fit(training_samples, traininig_labels)
+    #   results = pipeline.predict(testing_samples)
+    #
+    class Pipeline
+      include Base::BaseEstimator
+      include Validation
+      # Return the steps.
+      # @return [Hash]
+      attr_reader :steps
+      # Create a new pipeline.
+      #
+      # @param steps [Hash] List of transformers and estimators. The order of transforms follows the insertion order of hash keys.
+      #   The last entry is considered an estimator.
+      def initialize(steps:)
+        check_params_type(Hash, steps: steps)
+        validate_steps(steps)
+        @params = {}
+        @steps = steps
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
+      # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
+      # @return [Pipeline] The learned pipeline itself.
+      def fit(x, y)
+        trans_x = apply_transforms(x, y, fit: true)
+        last_estimator.fit(trans_x, y) unless last_estimator.nil?
+        self
+      end
+      # Call the fit_predict method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
+      # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
+      # @return [Numo::NArray] The predicted results by last estimator.
+      def fit_predict(x, y = nil)
+        trans_x = apply_transforms(x, y, fit: true)
+        last_estimator.fit_predict(trans_x)
+      end
+      # Call the fit_transform method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be transformed and used for fitting the model.
+      # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
+      # @return [Numo::NArray] The predicted results by last estimator.
+      def fit_transform(x, y = nil)
+        trans_x = apply_transforms(x, y, fit: true)
+        last_estimator.fit_transform(trans_x, y)
+      end
+      # Call the decision_function method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
+      def decision_function(x)
+        trans_x = apply_transforms(x)
+        last_estimator.decision_function(trans_x)
+      end
+      # Call the predict method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
+      # @return [Numo::NArray] The predicted results by last estimator.
+      def predict(x)
+        trans_x = apply_transforms(x)
+        last_estimator.predict(trans_x)
+      end
+      # Call the predict_log_proba method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
+      def predict_log_proba(x)
+        trans_x = apply_transforms(x)
+        last_estimator.predict_log_proba(trans_x)
+      end
+      # Call the predict_proba method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
+      def predict_proba(x)
+        trans_x = apply_transforms(x)
+        last_estimator.predict_proba(trans_x)
+      end
+      # Call the transform method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
+      def transform(x)
+        trans_x = apply_transforms(x)
+        last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
+      end
+      # Call the inverse_transform method in reverse order.
+      #
+      # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
+      # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
+      def inverse_transform(z)
+        itrans_z = z
+        @steps.keys.reverse.each do |name|
+          transformer = @steps[name]
+          next if transformer.nil?
+          itrans_z = transformer.inverse_transform(itrans_z)
+        end
+        itrans_z
+      end
+      # Call the score method of last estimator after applying all transforms.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
+      # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
+      # @return [Float] The score of last estimator
+      def score(x, y)
+        trans_x = apply_transforms(x)
+        last_estimator.score(trans_x, y)
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about Pipeline.
+      def marshal_dump
+        { params: @params,
+          steps: @steps }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @steps = obj[:steps]
+        nil
+      end
+      private
+      def validate_steps(steps)
+        steps.keys[0...-1].each do |name|
+          transformer = steps[name]
+          next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
+          raise TypeError,
+                'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
+                "#{name} => #{transformer.class}"
+        end
+        estimator = steps[steps.keys.last]
+        unless estimator.nil? || estimator.class.method_defined?(:fit)
+          raise TypeError,
+                'Class of last step in pipeline should be implemented fit method: ' \
+                "#{steps.keys.last} => #{estimator.class}"
+        end
+      end
+      def apply_transforms(x, y = nil, fit: false)
+        trans_x = x
+        @steps.keys[0...-1].each do |name|
+          transformer = @steps[name]
+          next if transformer.nil?
+          transformer.fit(trans_x, y) if fit
+          trans_x = transformer.transform(trans_x)
+        end
+        trans_x
+      end
+      def last_estimator
+        @steps[@steps.keys.last]
+      end
+    end
+  end
+end

data/lib/svmkit/preprocessing/l2_normalizer.rb CHANGED Viewed

@@ -49,6 +49,15 @@ module SVMKit
         fit(x)
         x / @norm_vec.tile(x.shape[1], 1).transpose
       end
+      # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
+      # This method calls the fit_transform method. This method exists for the Pipeline class.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def transform(x)
+        fit_transform(x)
+      end
     end
   end
 end

data/lib/svmkit/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # SVMKit is a machine learning library in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.7.1'.freeze
+  VERSION = '0.7.2'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.7.1
+  version: 0.7.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-12-17 00:00:00.000000000 Z
+date: 2019-01-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -157,6 +157,7 @@ files:
 - lib/svmkit/optimizer/sgd.rb
 - lib/svmkit/optimizer/yellow_fin.rb
 - lib/svmkit/pairwise_metric.rb
+- lib/svmkit/pipeline/pipeline.rb
 - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
 - lib/svmkit/polynomial_model/factorization_machine_regressor.rb
 - lib/svmkit/preprocessing/l2_normalizer.rb