RubyGems - rumale-decomposition - Versions diffs - 0.24.0 - Mend

rumale-decomposition 0.24.0

Files changed (10) hide show

checksums.yaml +7 -0
data/LICENSE.txt +27 -0
data/README.md +34 -0
data/lib/rumale/decomposition/factor_analysis.rb +146 -0
data/lib/rumale/decomposition/fast_ica.rb +184 -0
data/lib/rumale/decomposition/nmf.rb +125 -0
data/lib/rumale/decomposition/pca.rb +150 -0
data/lib/rumale/decomposition/version.rb +10 -0
data/lib/rumale/decomposition.rb +9 -0
metadata +88 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 4a2420f4a6ada0919ed266e0cbf84eb2b52b5d17a86a3d0273646e8ee1253435
+  data.tar.gz: 32194d34d4165898dd8478ef8fd56b6556cee1832140ae028daf707c0d1d4f93
+SHA512:
+  metadata.gz: a94a284b3692806962acab85c22f7c96595a00b2dd27e4fd638cbb9cd15b08e087651ec5e7c933a239df0b5ebdb4da86394c646bbc8daa517c5d880ab7afac0b
+  data.tar.gz: 4b9105d8e34bf11be4ed2e563431ee2755c949684530b9a0020a4b05acdb504d75acdb8f2b39559a9ba3e90e07b99e153b9724c2c3f3f5de452849e7eb207431

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,27 @@
+Copyright (c) 2022 Atsushi Tatsuma
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

data/README.md ADDED Viewed

@@ -0,0 +1,34 @@
+# Rumale::Decomposition
+[![Gem Version](https://badge.fury.io/rb/rumale-decomposition.svg)](https://badge.fury.io/rb/rumale-decomposition)
+[![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-decomposition/LICENSE.txt)
+[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
+Rumale is a machine learning library in Ruby.
+Rumale::Decomposition provides matrix decomposition algorithms,
+such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
+with Rumale interface.
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'rumale-decomposition'
+```
+And then execute:
+    $ bundle install
+Or install it yourself as:
+    $ gem install rumale-decomposition
+## Documentation
+- [Rumale API Documentation - Decomposition](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
+## License
+The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).

data/lib/rumale/decomposition/factor_analysis.rb ADDED Viewed

@@ -0,0 +1,146 @@
+# frozen_string_literal: true
+require 'rumale/base/estimator'
+require 'rumale/base/transformer'
+require 'rumale/validation'
+module Rumale
+  module Decomposition
+    # FactorAnalysis is a class that implements fator analysis with EM algorithm.
+    #
+    # @example
+    #   require 'numo/linalg/autoloader'
+    #   require 'rumale/decomposition/factor_analysis'
+    #
+    #   decomposer = Rumale::Decomposition::FactorAnalysis.new(n_components: 2)
+    #   representaion = decomposer.fit_transform(samples)
+    #
+    # *Reference*
+    # - Barber, D., "Bayesian Reasoning and Machine Learning," Cambridge University Press, 2012.
+    class FactorAnalysis < ::Rumale::Base::Estimator
+      include ::Rumale::Base::Transformer
+      # Returns the mean vector.
+      # @return [Numo::DFloat] (shape: [n_features])
+      attr_reader :mean
+      # Returns the estimated noise variance for each feature.
+      # @return [Numo::DFloat] (shape: [n_features])
+      attr_reader :noise_variance
+      # Returns the components with maximum variance.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+      # Returns the log likelihood at each iteration.
+      # @return [Numo::DFloat] (shape: [n_iter])
+      attr_reader :loglike
+      # Return the number of iterations run for optimization
+      # @return [Integer]
+      attr_reader :n_iter
+      # Create a new transformer with factor analysis.
+      #
+      # @param n_components [Integer] The number of components (dimensionality of latent space).
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float/Nil] The tolerance of termination criterion for EM algorithm.
+      #   If nil is given, iterate EM steps up to the maximum number of iterations.
+      def initialize(n_components: 2, max_iter: 100, tol: 1e-8)
+        super()
+        @params = {
+          n_components: n_components,
+          max_iter: max_iter,
+          tol: tol
+        }
+      end
+      # Fit the model with given training data.
+      #
+      # @overload fit(x) -> FactorAnalysis
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      #   @return [FactorAnalysis] The learned transformer itself.
+      def fit(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        raise 'FactorAnalysis#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
+        # initialize some variables.
+        n_samples, n_features = x.shape
+        @mean = x.mean(0)
+        centered_x = x - @mean
+        cov_mat = centered_x.transpose.dot(centered_x) / n_samples
+        sample_vars = x.var(0)
+        sqrt_n_samples = Math.sqrt(n_samples)
+        @noise_variance = Numo::DFloat.ones(n_features)
+        # run optimization.
+        old_loglike = 0.0
+        @n_iter = 0
+        @loglike = [] unless @params[:tol].nil?
+        @params[:max_iter].times do |t|
+          @n_iter = t + 1
+          sqrt_noise_variance = Numo::NMath.sqrt(@noise_variance)
+          scaled_x = centered_x / (sqrt_noise_variance * sqrt_n_samples + 1e-12)
+          s, u = truncate_svd(scaled_x, @params[:n_components])
+          scaler = Numo::NMath.sqrt(Numo::DFloat.maximum(s**2 - 1.0, 0.0))
+          @components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
+          @noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
+          next if @params[:tol].nil?
+          new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
+          @loglike.push(new_loglike)
+          break if (old_loglike - new_loglike).abs <= @params[:tol]
+          old_loglike = new_loglike
+        end
+        @loglike = Numo::DFloat.cast(@loglike) unless @params[:tol].nil?
+        @components = @components[0, true].dup if @params[:n_components] == 1
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      #   @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        raise 'FactorAnalysis#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
+        fit(x).transform(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        raise 'FactorAnalysis#transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
+        factors = @params[:n_components] == 1 ? @components.expand_dims(0) : @components
+        centered_x = x - @mean
+        beta = Numo::Linalg.inv(Numo::DFloat.eye(factors.shape[0]) + (factors / @noise_variance).dot(factors.transpose))
+        z = centered_x.dot((beta.dot(factors) / @noise_variance).transpose)
+        @params[:n_components] == 1 ? z[true, 0].dup : z
+      end
+      private
+      def log_likelihood(cov_mat, factors, noise_vars)
+        n_samples = noise_vars.size
+        fact_cov_mat = factors.transpose.dot(factors) + noise_vars.diag
+        n_samples.fdiv(2) * Math.log(Numo::Linalg.det(fact_cov_mat)) + Numo::Linalg.inv(fact_cov_mat).dot(cov_mat).trace
+      end
+      def truncate_svd(x, k)
+        m = x.shape[1]
+        eig_vals, eig_vecs = Numo::Linalg.eigh(x.transpose.dot(x), vals_range: (m - k)...m)
+        s = Numo::NMath.sqrt(eig_vals.reverse.dup)
+        u = eig_vecs.reverse(1).dup
+        [s, u]
+      end
+    end
+  end
+end

data/lib/rumale/decomposition/fast_ica.rb ADDED Viewed

@@ -0,0 +1,184 @@
+# frozen_string_literal: true
+require 'rumale/base/estimator'
+require 'rumale/base/transformer'
+require 'rumale/utils'
+require 'rumale/validation'
+module Rumale
+  module Decomposition
+    # FastICA is a class that implments Fast Independent Component Analaysis.
+    #
+    # @example
+    #   require 'numo/linalg/autoloader'
+    #   require 'rumale/decomposition/fast_ica'
+    #
+    #   transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
+    #   source_data = transformer.fit_transform(observed_data)
+    #
+    # *Reference*
+    # - Hyvarinen, A., "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
+    # - Hyvarinen, A., and Oja, E., "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
+    class FastICA < ::Rumale::Base::Estimator
+      include ::Rumale::Base::Transformer
+      # Returns the unmixing matrix.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+      # Returns the mixing matrix.
+      # @return [Numo::DFloat] (shape: [n_features, n_components])
+      attr_reader :mixing
+      # Returns the number of iterations when converged.
+      # @return [Integer]
+      attr_reader :n_iter
+      # Return the random generator.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new transformer with FastICA.
+      #
+      # @param n_components [Integer] The number of independent components.
+      # @param whiten [Boolean] The flag indicating whether to perform whitening.
+      # @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
+      # @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
+      #   If fun = 'cube', this parameter is ignored.
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
+        super()
+        @params = {
+          n_components: n_components,
+          whiten: whiten,
+          fun: fun,
+          alpha: alpha,
+          max_iter: max_iter,
+          tol: tol,
+          random_seed: (random_seed || srand)
+        }
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @overload fit(x) -> FastICA
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @return [FastICA] The learned transformer itself.
+      def fit(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        raise 'FastICA#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
+        @mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
+        wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
+        unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
+        @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
+        @mixing = Numo::Linalg.pinv(@components).dup
+        if @params[:n_components] == 1
+          @components = @components.flatten.dup
+          @mixing = @mixing.flatten.dup
+        end
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
+        fit(x).transform(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        cx = @params[:whiten] ? (x - @mean) : x
+        cx.dot(@components.transpose)
+      end
+      # Inverse transform the given transformed data with the learned model.
+      #
+      # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
+      # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
+      def inverse_transform(z)
+        z = ::Rumale::Validation.check_convert_sample_array(z)
+        m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
+        x = z.dot(m.transpose)
+        x += @mean if @params[:whiten]
+        x
+      end
+      private
+      def whitening(x, n_components)
+        n_samples, n_features = x.shape
+        mean_vec = x.mean(0)
+        centered_x = x - mean_vec
+        covar_mat = centered_x.transpose.dot(centered_x) / n_samples
+        eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
+        [mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
+      end
+      def ica(x, fun, max_iter, tol, sub_rng)
+        n_samples, n_components = x.shape
+        w = decorrelation(::Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
+        n_iters = 0
+        max_iter.times do |t|
+          n_iters = t + 1
+          gx, ggx = gradient(x.dot(w.transpose), fun)
+          new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
+          err = (new_w - w).abs.max
+          w = new_w
+          break if err <= tol
+        end
+        [w, n_iters]
+      end
+      def decorrelation(w)
+        eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
+        decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
+        decorr_mat.dot(w)
+      end
+      def gradient(x, func)
+        case func
+        when 'exp'
+          grad_exp(x, @params[:alpha])
+        when 'cube'
+          grad_cube(x)
+        else
+          grad_logcosh(x, @params[:alpha])
+        end
+      end
+      def grad_logcosh(x, alpha)
+        gx = Numo::NMath.tanh(alpha * x)
+        ggx = (alpha * (1 - gx**2)).sum(axis: 0)
+        [gx, ggx]
+      end
+      def grad_exp(x, alpha)
+        squared_x = x**2
+        exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
+        gx = exp_x * x
+        ggx = (exp_x * (1 - alpha * squared_x)).sum(axis: 0)
+        [gx, ggx]
+      end
+      def grad_cube(x)
+        [x**3, (3 * x**2).sum(axis: 0)]
+      end
+    end
+  end
+end

data/lib/rumale/decomposition/nmf.rb ADDED Viewed

@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+require 'rumale/base/estimator'
+require 'rumale/base/transformer'
+require 'rumale/utils'
+require 'rumale/validation'
+module Rumale
+  module Decomposition
+    # NMF is a class that implements Non-negative Matrix Factorization.
+    #
+    # @example
+    #   require 'rumale/decomposition/nmf'
+    #
+    #   decomposer = Rumale::Decomposition::NMF.new(n_components: 2)
+    #   representaion = decomposer.fit_transform(samples)
+    #
+    # *Reference*
+    # - Xu, W., Liu, X., and Gong, Y., "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
+    class NMF < ::Rumale::Base::Estimator
+      include ::Rumale::Base::Transformer
+      # Returns the factorization matrix.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+      # Return the random generator.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new transformer with NMF.
+      #
+      # @param n_components [Integer] The number of components.
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float] The tolerance of termination criterion.
+      # @param eps [Float] A small value close to zero to avoid zero division error.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
+        super()
+        @params = {
+          n_components: n_components,
+          max_iter: max_iter,
+          tol: tol,
+          eps: eps,
+          random_seed: (random_seed || srand)
+        }
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @overload fit(x) -> NMF
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      #   @return [NMF] The learned transformer itself.
+      def fit(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        partial_fit(x)
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      #   @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        partial_fit(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        partial_fit(x, update_comps: false)
+      end
+      # Inverse transform the given transformed data with the learned model.
+      #
+      # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
+      def inverse_transform(z)
+        z = ::Rumale::Validation.check_convert_sample_array(z)
+        z.dot(@components)
+      end
+      private
+      def partial_fit(x, update_comps: true)
+        # initialize some variables.
+        n_samples, n_features = x.shape
+        scale = Math.sqrt(x.mean / @params[:n_components])
+        sub_rng = @rng.dup
+        @components = ::Rumale::Utils.rand_uniform([@params[:n_components], n_features], sub_rng) * scale if update_comps
+        coefficients = ::Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) * scale
+        # optimization.
+        @params[:max_iter].times do
+          # update
+          if update_comps
+            nume = coefficients.transpose.dot(x)
+            deno = coefficients.transpose.dot(coefficients).dot(@components) + @params[:eps]
+            @components *= (nume / deno)
+          end
+          nume = x.dot(@components.transpose)
+          deno = coefficients.dot(@components).dot(@components.transpose) + @params[:eps]
+          coefficients *= (nume / deno)
+          # normalize
+          norm = Numo::NMath.sqrt((@components**2).sum(axis: 1)) + @params[:eps]
+          @components /= norm.expand_dims(1) if update_comps
+          coefficients *= norm
+          # check convergence
+          err = ((x - coefficients.dot(@components))**2).sum(axis: 1).mean
+          break if err < @params[:tol]
+        end
+        coefficients
+      end
+    end
+  end
+end

data/lib/rumale/decomposition/pca.rb ADDED Viewed

@@ -0,0 +1,150 @@
+# frozen_string_literal: true
+require 'rumale/base/estimator'
+require 'rumale/base/transformer'
+require 'rumale/validation'
+module Rumale
+  # Module for matrix decomposition algorithms.
+  module Decomposition
+    # PCA is a class that implements Principal Component Analysis.
+    #
+    # @example
+    #   require 'rumale/decomposition/pca'
+    #
+    #   decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'fpt')
+    #   representaion = decomposer.fit_transform(samples)
+    #
+    #   # If Numo::Linalg is installed, you can specify 'evd' for the solver option.
+    #   require 'numo/linalg/autoloader'
+    #   require 'rumale/decomposition/pca'
+    #
+    #   decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'evd')
+    #   representaion = decomposer.fit_transform(samples)
+    #
+    #   # If Numo::Linalg is loaded and the solver option is not given,
+    #   # the solver option is choosen 'evd' automatically.
+    #   decomposer = Rumale::Decomposition::PCA.new(n_components: 2)
+    #   representaion = decomposer.fit_transform(samples)
+    #
+    # *Reference*
+    # - Sharma, A., and Paliwal, K K., "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
+    class PCA < ::Rumale::Base::Estimator
+      include ::Rumale::Base::Transformer
+      # Returns the principal components.
+      # @return [Numo::DFloat] (shape: [n_components, n_features])
+      attr_reader :components
+      # Returns the mean vector.
+      # @return [Numo::DFloat] (shape: [n_features])
+      attr_reader :mean
+      # Return the random generator.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new transformer with PCA.
+      #
+      # @param n_components [Integer] The number of principal components.
+      # @param solver [String] The algorithm for the optimization ('auto', 'fpt' or 'evd').
+      #   'auto' chooses the 'evd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'fpt' solver.
+      #   'fpt' uses the fixed-point algorithm.
+      #   'evd' performs eigen value decomposition of the covariance matrix of samples.
+      # @param max_iter [Integer] The maximum number of iterations. If solver = 'evd', this parameter is ignored.
+      # @param tol [Float] The tolerance of termination criterion. If solver = 'evd', this parameter is ignored.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_components: 2, solver: 'auto', max_iter: 100, tol: 1.0e-4, random_seed: nil)
+        super()
+        @params = {
+          n_components: n_components,
+          solver: 'fpt',
+          max_iter: max_iter,
+          tol: tol,
+          random_seed: (random_seed || srand)
+        }
+        @params[:solver] = 'evd' if (solver == 'auto' && enable_linalg?(warning: false)) || solver == 'evd'
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @overload fit(x) -> PCA
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      #   @return [PCA] The learned transformer itself.
+      def fit(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        # initialize some variables.
+        @components = nil
+        n_samples, n_features = x.shape
+        sub_rng = @rng.dup
+        # centering.
+        @mean = x.mean(0)
+        centered_x = x - @mean
+        # optimization.
+        covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
+        if @params[:solver] == 'evd' && enable_linalg?
+          _, evecs = Numo::Linalg.eigh(covariance_mat, vals_range: (n_features - @params[:n_components])...n_features)
+          comps = evecs.reverse(1).transpose
+          @components = @params[:n_components] == 1 ? comps[0, true].dup : comps.dup
+        else
+          @params[:n_components].times do
+            comp_vec = ::Rumale::Utils.rand_uniform(n_features, sub_rng)
+            @params[:max_iter].times do
+              updated = orthogonalize(covariance_mat.dot(comp_vec))
+              break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
+              comp_vec = updated
+            end
+            @components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
+          end
+        end
+        self
+      end
+      # Fit the model with training data, and then transform them with the learned model.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      #   @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
+      def fit_transform(x, _y = nil)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        fit(x).transform(x)
+      end
+      # Transform the given data with the learned model.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
+      def transform(x)
+        x = ::Rumale::Validation.check_convert_sample_array(x)
+        (x - @mean).dot(@components.transpose)
+      end
+      # Inverse transform the given transformed data with the learned model.
+      #
+      # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
+      # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
+      def inverse_transform(z)
+        z = ::Rumale::Validation.check_convert_sample_array(z)
+        c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
+        z.dot(c) + @mean
+      end
+      private
+      def orthogonalize(pcvec)
+        unless @components.nil?
+          delta = @components.dot(pcvec) * @components.transpose
+          delta = delta.sum(axis: 1) unless delta.shape[1].nil?
+          pcvec -= delta
+        end
+        pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
+      end
+    end
+  end
+end

data/lib/rumale/decomposition/version.rb ADDED Viewed

@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+# Rumale is a machine learning library in Ruby.
+module Rumale
+  # Module for matrix decomposition algorithms.
+  module Decomposition
+    # @!visibility private
+    VERSION = '0.24.0'
+  end
+end

data/lib/rumale/decomposition.rb ADDED Viewed

@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+require 'numo/narray'
+require_relative 'decomposition/factor_analysis'
+require_relative 'decomposition/fast_ica'
+require_relative 'decomposition/nmf'
+require_relative 'decomposition/pca'
+require_relative 'decomposition/version'

metadata ADDED Viewed

@@ -0,0 +1,88 @@
+--- !ruby/object:Gem::Specification
+name: rumale-decomposition
+version: !ruby/object:Gem::Version
+  version: 0.24.0
+platform: ruby
+authors:
+- yoshoku
+autorequire:
+bindir: exe
+cert_chain: []
+date: 2022-12-31 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: numo-narray
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.9.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.9.1
+- !ruby/object:Gem::Dependency
+  name: rumale-core
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.24.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.24.0
+description: |
+  Rumale::Decomposition provides matrix decomposition algorithms,
+  such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
+  with Rumale interface.
+email:
+- yoshoku@outlook.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- LICENSE.txt
+- README.md
+- lib/rumale/decomposition.rb
+- lib/rumale/decomposition/factor_analysis.rb
+- lib/rumale/decomposition/fast_ica.rb
+- lib/rumale/decomposition/nmf.rb
+- lib/rumale/decomposition/pca.rb
+- lib/rumale/decomposition/version.rb
+homepage: https://github.com/yoshoku/rumale
+licenses:
+- BSD-3-Clause
+metadata:
+  homepage_uri: https://github.com/yoshoku/rumale
+  source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-decomposition
+  changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
+  documentation_uri: https://yoshoku.github.io/rumale/doc/
+  rubygems_mfa_required: 'true'
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.3.26
+signing_key:
+specification_version: 4
+summary: Rumale::Decomposition provides matrix decomposition algorithms with Rumale
+  interface
+test_files: []