RubyGems - rumale - Versions diffs - 0.13.5 → 0.13.6 - Mend

rumale 0.13.5 → 0.13.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/.rubocop.yml +20 -3
data/CHANGELOG.md +4 -0
data/README.md +1 -1
data/lib/rumale.rb +1 -0
data/lib/rumale/decomposition/fast_ica.rb +212 -0
data/lib/rumale/manifold/tsne.rb +3 -1
data/lib/rumale/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b9c82fecc8a90ec9d4379249b749e9257b2738c3
-  data.tar.gz: 1b4ca375174ee9d1c16a35d50e68ee9b5bd471c5
+  metadata.gz: e78d2a2eeb35fc8409dac683e2f8a3b90e5c396d
+  data.tar.gz: 8ae3c1396efeac327288a5fe534661ea65d0d766
 SHA512:
-  metadata.gz: 3d9bc3c21b951f5738cbf8a2e947d94d6fd735ebbcdc41fdb450d8c4f4f28a9788100c595b1b29bbe81e7124f0ca2b5703c4d6107730fab6f9ab6fe67db4fa1d
-  data.tar.gz: 2f93f78169dd8f694ca65634f3e4619ad83df72a59d0e2eacd8c686b2fdc82011f53b6b49e7c18e1c9a9d86904c77709d9743689e39e8db697f469a77a336fe7
+  metadata.gz: f25dee4375b9d9707374341b7cebe19973ea66f8e8b42af92806aa50b2b41323fcd0ba1470b9a87266859e1c30eea8a1a583ffa620519998df6da91bca8e1b23
+  data.tar.gz: 2e9a3cd0d87aae35e180e74c8335a93b1126f57fd9d4f5c727e60decf5b81982b96f0bdcd2e99c4104169341a1dc4db9a6d631c1fe5cf8eb709ccc5ae9f377e2

data/.rubocop.yml CHANGED

@@ -1,4 +1,6 @@
-require: rubocop-performance
+require:
+  - rubocop-performance
+  - rubocop-rspec
 AllCops:
   TargetRubyVersion: 2.3
@@ -10,7 +12,7 @@ AllCops:
     - 'Rakefile'
     - 'Gemfile'
-Documentation:
+Style/Documentation:
   Enabled: false
 Metrics/LineLength:
@@ -40,7 +42,7 @@ Metrics/BlockLength:
   Exclude:
     - 'spec/**/*'
-ParameterLists:
+Metrics/ParameterLists:
   Max: 10
 Security/MarshalLoad:
@@ -49,6 +51,9 @@ Security/MarshalLoad:
 Naming/UncommunicativeMethodParamName:
   Enabled: false
+Naming/ConstantName:
+  Enabled: false
 Style/FormatStringToken:
   Enabled: false
@@ -57,3 +62,15 @@ Style/NumericLiterals:
 Layout/EmptyLineAfterGuardClause:
   Enabled: false
+RSpec/MultipleExpectations:
+  Enabled: false
+RSpec/ExampleLength:
+  Max: 40
+RSpec/InstanceVariable:
+  Enabled: false
+RSpec/LeakyConstantDeclaration:
+  Enabled: false

data/CHANGELOG.md CHANGED

@@ -1,3 +1,7 @@
+# 0.13.6
+- Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
+- Fix a typo on README ([#13](https://github.com/yoshoku/rumale/pull/13)).
 # 0.13.5
 - Add transformer class for [Factor Analysis](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FactorAnalysis.html).
 - Add covariance_type parameter to [Rumale::Clustering::GaussianMixture](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/GaussianMixture.html).

data/README.md CHANGED

@@ -13,7 +13,7 @@ Rumale provides machine learning algorithms with interfaces similar to Scikit-Le
 Rumale supports Linear / Kernel Support Vector Machine,
 Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
 Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
-K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
+K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
 Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
 This project was formerly known as "SVMKit".

data/lib/rumale.rb CHANGED

@@ -70,6 +70,7 @@ require 'rumale/clustering/single_linkage'
 require 'rumale/decomposition/pca'
 require 'rumale/decomposition/nmf'
 require 'rumale/decomposition/factor_analysis'
+require 'rumale/decomposition/fast_ica'
 require 'rumale/manifold/tsne'
 require 'rumale/manifold/mds'
 require 'rumale/preprocessing/l2_normalizer'

data/lib/rumale/decomposition/fast_ica.rb ADDED

@@ -0,0 +1,212 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+module Rumale
+  module Decomposition
+    # FastICA is a class that implments Fast Independent Component Analaysis.
+    #
+    # @example
+    #   require 'numo/linalg/autoloader'
+    #
+    #   transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
+    #   source_data = transformer.fit_transform(observed_data)
+    #
+    # *Reference*
+    # - A. Hyvarinen "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
+    # - A. Hyvarinen and E. Oja, "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
+    class FastICA
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Returns the unmixing matrix.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+      # Returns the mixing matrix.
+      # @return [Numo::DFloat] (shape: [n_features, n_components])
+      attr_reader :mixing
+      # Returns the number of iterations when converged.
+      # @return [Integer]
+      attr_reader :n_iter
+      # Return the random generator.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new transformer with FastICA.
+      #
+      # @param n_components [Integer] The number of independent components.
+      # @param whiten [Boolean] The flag indicating whether to perform whitening.
+      # @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
+      # @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
+      #   If fun = 'cube', this parameter is ignored.
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
+        check_params_integer(n_components: n_components, max_iter: max_iter)
+        check_params_boolean(whiten: whiten)
+        check_params_string(fun: fun)
+        check_params_float(alpha: alpha, tol: tol)
+        check_params_type_or_nil(Integer, random_seed: random_seed)
+        check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
+        @params = {}
+        @params[:n_components] = n_components
+        @params[:whiten] = whiten
+        @params[:fun] = fun
+        @params[:alpha] = alpha
+        @params[:max_iter] = max_iter
+        @params[:tol] = tol
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @components = nil
+        @mixing = nil
+        @n_iter = nil
+        @mean = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @overload fit(x) -> FastICA
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @return [FastICA] The learned transformer itself.
+      def fit(x, _y = nil)
+        check_sample_array(x)
+        raise 'FastICA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
+        @mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
+        wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
+        unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
+        @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
+        @mixing = Numo::Linalg.pinv(@components)
+        if @params[:n_components] == 1
+          @components = @components.flatten.dup
+          @mixing = @mixing.flatten.dup
+        end
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, _y = nil)
+        check_sample_array(x)
+        raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded.' unless enable_linalg?
+        fit(x).transform(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        check_sample_array(x)
+        cx = @params[:whiten] ? (x - @mean) : x
+        cx.dot(@components.transpose)
+      end
+      # Inverse transform the given transformed data with the learned model.
+      #
+      # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
+      # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
+      def inverse_transform(z)
+        check_sample_array(z)
+        m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
+        x = z.dot(m.transpose)
+        x += @mean if @params[:whiten]
+        x
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data.
+      def marshal_dump
+        { params: @params,
+          components: @components,
+          mixing: @mixing,
+          n_iter: @n_iter,
+          mean: @mean,
+          rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @components = obj[:components]
+        @mixing = obj[:mixing]
+        @n_iter = obj[:n_iter]
+        @mean = obj[:mean]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def whitening(x, n_components)
+        n_samples, n_features = x.shape
+        mean_vec = x.mean(0)
+        centered_x = x - mean_vec
+        covar_mat = centered_x.transpose.dot(centered_x) / n_samples
+        eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
+        [mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
+      end
+      def ica(x, fun, max_iter, tol, sub_rng)
+        n_samples, n_components = x.shape
+        w = decorrelation(Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
+        n_iters = 0
+        max_iter.times do |t|
+          n_iters = t + 1
+          gx, ggx = gradient(x.dot(w.transpose), fun)
+          new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
+          err = (new_w - w).abs.max
+          w = new_w
+          break if err <= tol
+        end
+        [w, n_iters]
+      end
+      def decorrelation(w)
+        eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
+        decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
+        decorr_mat.dot(w)
+      end
+      def gradient(x, func)
+        case func
+        when 'exp'
+          grad_exp(x, @params[:alpha])
+        when 'cube'
+          grad_cube(x)
+        else
+          grad_logcosh(x, @params[:alpha])
+        end
+      end
+      def grad_logcosh(x, alpha)
+        gx = Numo::NMath.tanh(alpha * x)
+        ggx = (alpha * (1 - gx**2)).sum(0)
+        [gx, ggx]
+      end
+      def grad_exp(x, alpha)
+        squared_x = x**2
+        exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
+        gx = exp_x * x
+        ggx = (exp_x * (1 - alpha * squared_x)).sum(0)
+        [gx, ggx]
+      end
+      def grad_cube(x)
+        [x**3, (3 * x**2).sum(0)]
+      end
+    end
+  end
+end

data/lib/rumale/manifold/tsne.rb CHANGED

@@ -106,7 +106,9 @@ module Rumale
           y = (b.dot(one_vec) * y + (a - b).dot(y)) / a.dot(one_vec)
           lo_prob_mat = t_distributed_probability_matrix(y)
           @n_iter = t + 1
-          puts "[t-SNE] KL divergence after #{@n_iter} iterations: #{cost(hi_prob_mat, lo_prob_mat)}" if @params[:verbose] && (@n_iter % 100).zero?
+          if @params[:verbose] && (@n_iter % 100).zero?
+            puts "[t-SNE] KL divergence after #{@n_iter} iterations: #{cost(hi_prob_mat, lo_prob_mat)}"
+          end
         end
         # store results.
         @embedding = y

data/lib/rumale/version.rb CHANGED

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.13.5'
+  VERSION = '0.13.6'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.13.5
+  version: 0.13.6
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-10-03 00:00:00.000000000 Z
+date: 2019-10-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -172,6 +172,7 @@ files:
 - lib/rumale/clustering/spectral_clustering.rb
 - lib/rumale/dataset.rb
 - lib/rumale/decomposition/factor_analysis.rb
+- lib/rumale/decomposition/fast_ica.rb
 - lib/rumale/decomposition/nmf.rb
 - lib/rumale/decomposition/pca.rb
 - lib/rumale/ensemble/ada_boost_classifier.rb