RubyGems - rumale - Versions diffs - 0.17.1 → 0.17.2 - Mend

rumale 0.17.1 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +4 -5
data/lib/rumale.rb +1 -0
data/lib/rumale/kernel_approximation/nystroem.rb +106 -0
data/lib/rumale/pipeline/pipeline.rb +0 -25
data/lib/rumale/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 54719fad09a9036dbdc1430323272af7f3c3e746aecac9b6199eef70bfe07856
-  data.tar.gz: b9df7b5ff0cc4feebb053955aea4d022e72263be1288578906b67f2100b97160
+  metadata.gz: '09585216da740231269c5675c48a8fa2ee3a8aba3f5c5b7d671429d113aaa4ba'
+  data.tar.gz: 66eb473c718f6a03938db19df06694373c781e098d68e2f2c20162865dc4f9f8
 SHA512:
-  metadata.gz: 42aedd744761fb61e6fcfa32643ee56b267ea163a2376f3eb308b181cd5981ceef4270e88ed1fe322e5f198d94b3b34af277f246e99f0439cfc3699c3838d76e
-  data.tar.gz: '082dda62d97b2655413ef599185b38c88cceacdec3ceecb99aab67b222306b5478963dfa7e216daa56838c6c3a9ac65ced53e51d513ae23ddf0954fb2c5982e5'
+  metadata.gz: d68eaa297116c4b834cbf2745355d07e4e4b0e50b170c631c417f623d7a0e75a515389903b6e2cb6f22355f9cf5eded4968eac5c7f216f5a98698bb5283c2e00
+  data.tar.gz: ddb3be08dc88cc99f5c7086e8bea83496f2644316d382f052a954ce6d1056bc2aabfbd2664ff572a55fdb1017a9bbb0d790d4d83b3788b781d133a97d26bd92e

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+# 0.17.2
+- Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
+- Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
 # 0.17.1
 - Add transformer class for [PolynomialFeatures](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/PolynomialFeatures.html)
 - Add verbose and tol parameter to [FactorizationMachineClassifier](https://yoshoku.github.io/rumale/doc/Rumale/PolynomialModel/FactorizationMachineClassifier.html)

data/README.md CHANGED Viewed

@@ -50,22 +50,21 @@ First, let's classify simple xor data.
 require 'rumale'
 # Prepare XOR data.
-features = [[0, 0], [0, 1], [1, 0], [1, 1]]
+samples = [[0, 0], [0, 1], [1, 0], [1, 1]]
 labels = [0, 1, 1, 0]
 # Train classifier with nearest neighbor rule.
 estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
-estimator.fit(x, y)
+estimator.fit(samples, labels)
 # Predict labels.
-p y
-p estimator.predict(x)
+p labels
+p estimator.predict(samples)
 ```
 Execution of the above script result in the following.
 ```ruby
-Numo::Int32#shape=[4]
 [0, 1, 1, 0]
 Numo::Int32#shape=[4]
 [0, 1, 1, 0]

data/lib/rumale.rb CHANGED Viewed

@@ -26,6 +26,7 @@ require 'rumale/optimizer/nadam'
 require 'rumale/optimizer/yellow_fin'
 require 'rumale/pipeline/pipeline'
 require 'rumale/kernel_approximation/rbf'
+require 'rumale/kernel_approximation/nystroem'
 require 'rumale/linear_model/base_linear_model'
 require 'rumale/linear_model/base_sgd'
 require 'rumale/linear_model/svc'

data/lib/rumale/kernel_approximation/nystroem.rb ADDED Viewed

@@ -0,0 +1,106 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+require 'rumale/pairwise_metric'
+module Rumale
+  module KernelApproximation
+    # Nystroem is a class that implements feature mapping with Nystroem method.
+    #
+    # @example
+    #   require 'numo/linalg/autoloader'
+    #
+    #   transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
+    #   new_training_samples = transformer.fit_transform(training_samples)
+    #   new_testing_samples = transformer.transform(testing_samples)
+    #
+    # *Reference*
+    # 1. T. Yang, Y. Li, M. Mahdavi, R. Jin, and Z-H. Zhou, "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
+    class Nystroem
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Returns the randomly sampled training data for feature mapping.
+      # @return [Numo::DFloat] (shape: n_components, n_features])
+      attr_reader :components
+      # Returns the indices sampled training data.
+      # @return [Numo::Int32] (shape: [n_components])
+      attr_reader :component_indices
+      # Returns the normalizing factors.
+      # @return [Numo::DFloat] (shape: [n_components, n_components])
+      attr_reader :normalizer
+      # Return the random generator for transformation.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new transformer for mapping to kernel feature space with Nystrom method.
+      #
+      # @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
+      # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
+      # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
+        check_params_numeric(gamma: gamma, n_components: n_components)
+        check_params_numeric_or_nil(random_seed: random_seed)
+        @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
+        @params[:random_seed] ||= srand
+        @rng = Random.new(@params[:random_seed])
+        @component_indices = nil
+        @components = nil
+        @normalizer = nil
+      end
+      # Fit the model with given training data.
+      #
+      # @overload fit(x) -> RBF
+      #   @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @return [Nystroem] The learned transformer itself.
+      def fit(x, _y = nil)
+        x = check_convert_sample_array(x)
+        raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
+        # initialize some variables.
+        sub_rng = @rng.dup
+        n_samples = x.shape[0]
+        n_components = [1, [@params[:n_components], n_samples].min].max
+        # random sampling.
+        @component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
+        @components = x[@component_indices, true]
+        # calculate normalizing factor.
+        kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
+        eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
+        la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
+        u = eig_vecs.reverse(1)
+        @normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, _y = nil)
+        x = check_convert_sample_array(x)
+        fit(x).transform(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = check_convert_sample_array(x)
+        z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
+        z.dot(@normalizer)
+      end
+    end
+  end
+end

data/lib/rumale/pipeline/pipeline.rb CHANGED Viewed

@@ -40,7 +40,6 @@ module Rumale
       # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
       # @return [Pipeline] The learned pipeline itself.
       def fit(x, y)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x, y, fit: true)
         last_estimator&.fit(trans_x, y)
         self
@@ -52,7 +51,6 @@ module Rumale
       # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
       # @return [Numo::NArray] The predicted results by last estimator.
       def fit_predict(x, y = nil)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x, y, fit: true)
         last_estimator.fit_predict(trans_x)
       end
@@ -63,7 +61,6 @@ module Rumale
       # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
       # @return [Numo::NArray] The predicted results by last estimator.
       def fit_transform(x, y = nil)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x, y, fit: true)
         last_estimator.fit_transform(trans_x, y)
       end
@@ -73,7 +70,6 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
       # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
       def decision_function(x)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x)
         last_estimator.decision_function(trans_x)
       end
@@ -83,7 +79,6 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
       # @return [Numo::NArray] The predicted results by last estimator.
       def predict(x)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x)
         last_estimator.predict(trans_x)
       end
@@ -93,7 +88,6 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
       # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
       def predict_log_proba(x)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x)
         last_estimator.predict_log_proba(trans_x)
       end
@@ -103,7 +97,6 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
       # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
       def predict_proba(x)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x)
         last_estimator.predict_proba(trans_x)
       end
@@ -113,7 +106,6 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
       # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
       def transform(x)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x)
         last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
       end
@@ -123,7 +115,6 @@ module Rumale
       # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
       # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
       def inverse_transform(z)
-        z = check_convert_sample_array(z)
         itrans_z = z
         @steps.keys.reverse_each do |name|
           transformer = @steps[name]
@@ -139,26 +130,10 @@ module Rumale
       # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
       # @return [Float] The score of last estimator
       def score(x, y)
-        x = check_convert_sample_array(x)
         trans_x = apply_transforms(x)
         last_estimator.score(trans_x, y)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data about Pipeline.
-      def marshal_dump
-        { params: @params,
-          steps: @steps }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @steps = obj[:steps]
-        nil
-      end
       private
       def validate_steps(steps)

data/lib/rumale/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.17.1'
+  VERSION = '0.17.2'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.17.1
+  version: 0.17.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-01-26 00:00:00.000000000 Z
+date: 2020-02-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -209,6 +209,7 @@ files:
 - lib/rumale/evaluation_measure/silhouette_score.rb
 - lib/rumale/feature_extraction/feature_hasher.rb
 - lib/rumale/feature_extraction/hash_vectorizer.rb
+- lib/rumale/kernel_approximation/nystroem.rb
 - lib/rumale/kernel_approximation/rbf.rb
 - lib/rumale/kernel_machine/kernel_pca.rb
 - lib/rumale/kernel_machine/kernel_ridge.rb