rumale-decomposition 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4a2420f4a6ada0919ed266e0cbf84eb2b52b5d17a86a3d0273646e8ee1253435
4
+ data.tar.gz: 32194d34d4165898dd8478ef8fd56b6556cee1832140ae028daf707c0d1d4f93
5
+ SHA512:
6
+ metadata.gz: a94a284b3692806962acab85c22f7c96595a00b2dd27e4fd638cbb9cd15b08e087651ec5e7c933a239df0b5ebdb4da86394c646bbc8daa517c5d880ab7afac0b
7
+ data.tar.gz: 4b9105d8e34bf11be4ed2e563431ee2755c949684530b9a0020a4b05acdb504d75acdb8f2b39559a9ba3e90e07b99e153b9724c2c3f3f5de452849e7eb207431
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Rumale::Decomposition
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-decomposition.svg)](https://badge.fury.io/rb/rumale-decomposition)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-decomposition/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::Decomposition provides matrix decomposition algorithms,
9
+ such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
10
+ with Rumale interface.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'rumale-decomposition'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle install
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install rumale-decomposition
27
+
28
+ ## Documentation
29
+
30
+ - [Rumale API Documentation - Decomposition](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Decomposition
9
+ # FactorAnalysis is a class that implements fator analysis with EM algorithm.
10
+ #
11
+ # @example
12
+ # require 'numo/linalg/autoloader'
13
+ # require 'rumale/decomposition/factor_analysis'
14
+ #
15
+ # decomposer = Rumale::Decomposition::FactorAnalysis.new(n_components: 2)
16
+ # representaion = decomposer.fit_transform(samples)
17
+ #
18
+ # *Reference*
19
+ # - Barber, D., "Bayesian Reasoning and Machine Learning," Cambridge University Press, 2012.
20
+ class FactorAnalysis < ::Rumale::Base::Estimator
21
+ include ::Rumale::Base::Transformer
22
+
23
+ # Returns the mean vector.
24
+ # @return [Numo::DFloat] (shape: [n_features])
25
+ attr_reader :mean
26
+
27
+ # Returns the estimated noise variance for each feature.
28
+ # @return [Numo::DFloat] (shape: [n_features])
29
+ attr_reader :noise_variance
30
+
31
+ # Returns the components with maximum variance.
32
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
33
+ attr_reader :components
34
+
35
+ # Returns the log likelihood at each iteration.
36
+ # @return [Numo::DFloat] (shape: [n_iter])
37
+ attr_reader :loglike
38
+
39
+ # Return the number of iterations run for optimization
40
+ # @return [Integer]
41
+ attr_reader :n_iter
42
+
43
+ # Create a new transformer with factor analysis.
44
+ #
45
+ # @param n_components [Integer] The number of components (dimensionality of latent space).
46
+ # @param max_iter [Integer] The maximum number of iterations.
47
+ # @param tol [Float/Nil] The tolerance of termination criterion for EM algorithm.
48
+ # If nil is given, iterate EM steps up to the maximum number of iterations.
49
+ def initialize(n_components: 2, max_iter: 100, tol: 1e-8)
50
+ super()
51
+ @params = {
52
+ n_components: n_components,
53
+ max_iter: max_iter,
54
+ tol: tol
55
+ }
56
+ end
57
+
58
+ # Fit the model with given training data.
59
+ #
60
+ # @overload fit(x) -> FactorAnalysis
61
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
62
+ # @return [FactorAnalysis] The learned transformer itself.
63
+ def fit(x, _y = nil)
64
+ x = ::Rumale::Validation.check_convert_sample_array(x)
65
+ raise 'FactorAnalysis#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
66
+
67
+ # initialize some variables.
68
+ n_samples, n_features = x.shape
69
+ @mean = x.mean(0)
70
+ centered_x = x - @mean
71
+ cov_mat = centered_x.transpose.dot(centered_x) / n_samples
72
+ sample_vars = x.var(0)
73
+ sqrt_n_samples = Math.sqrt(n_samples)
74
+ @noise_variance = Numo::DFloat.ones(n_features)
75
+
76
+ # run optimization.
77
+ old_loglike = 0.0
78
+ @n_iter = 0
79
+ @loglike = [] unless @params[:tol].nil?
80
+ @params[:max_iter].times do |t|
81
+ @n_iter = t + 1
82
+ sqrt_noise_variance = Numo::NMath.sqrt(@noise_variance)
83
+ scaled_x = centered_x / (sqrt_noise_variance * sqrt_n_samples + 1e-12)
84
+ s, u = truncate_svd(scaled_x, @params[:n_components])
85
+ scaler = Numo::NMath.sqrt(Numo::DFloat.maximum(s**2 - 1.0, 0.0))
86
+ @components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
87
+ @noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
88
+ next if @params[:tol].nil?
89
+
90
+ new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
91
+ @loglike.push(new_loglike)
92
+ break if (old_loglike - new_loglike).abs <= @params[:tol]
93
+
94
+ old_loglike = new_loglike
95
+ end
96
+
97
+ @loglike = Numo::DFloat.cast(@loglike) unless @params[:tol].nil?
98
+ @components = @components[0, true].dup if @params[:n_components] == 1
99
+ self
100
+ end
101
+
102
+ # Fit the model with training data, and then transform them with the learned model.
103
+ #
104
+ # @overload fit_transform(x) -> Numo::DFloat
105
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
106
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
107
+ def fit_transform(x, _y = nil)
108
+ x = ::Rumale::Validation.check_convert_sample_array(x)
109
+ raise 'FactorAnalysis#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
110
+
111
+ fit(x).transform(x)
112
+ end
113
+
114
+ # Transform the given data with the learned model.
115
+ #
116
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
117
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
118
+ def transform(x)
119
+ x = ::Rumale::Validation.check_convert_sample_array(x)
120
+ raise 'FactorAnalysis#transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
121
+
122
+ factors = @params[:n_components] == 1 ? @components.expand_dims(0) : @components
123
+ centered_x = x - @mean
124
+ beta = Numo::Linalg.inv(Numo::DFloat.eye(factors.shape[0]) + (factors / @noise_variance).dot(factors.transpose))
125
+ z = centered_x.dot((beta.dot(factors) / @noise_variance).transpose)
126
+ @params[:n_components] == 1 ? z[true, 0].dup : z
127
+ end
128
+
129
+ private
130
+
131
+ def log_likelihood(cov_mat, factors, noise_vars)
132
+ n_samples = noise_vars.size
133
+ fact_cov_mat = factors.transpose.dot(factors) + noise_vars.diag
134
+ n_samples.fdiv(2) * Math.log(Numo::Linalg.det(fact_cov_mat)) + Numo::Linalg.inv(fact_cov_mat).dot(cov_mat).trace
135
+ end
136
+
137
+ def truncate_svd(x, k)
138
+ m = x.shape[1]
139
+ eig_vals, eig_vecs = Numo::Linalg.eigh(x.transpose.dot(x), vals_range: (m - k)...m)
140
+ s = Numo::NMath.sqrt(eig_vals.reverse.dup)
141
+ u = eig_vecs.reverse(1).dup
142
+ [s, u]
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/utils'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module Decomposition
10
+ # FastICA is a class that implments Fast Independent Component Analaysis.
11
+ #
12
+ # @example
13
+ # require 'numo/linalg/autoloader'
14
+ # require 'rumale/decomposition/fast_ica'
15
+ #
16
+ # transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
17
+ # source_data = transformer.fit_transform(observed_data)
18
+ #
19
+ # *Reference*
20
+ # - Hyvarinen, A., "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
21
+ # - Hyvarinen, A., and Oja, E., "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
22
+ class FastICA < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Transformer
24
+
25
+ # Returns the unmixing matrix.
26
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
27
+ attr_reader :components
28
+
29
+ # Returns the mixing matrix.
30
+ # @return [Numo::DFloat] (shape: [n_features, n_components])
31
+ attr_reader :mixing
32
+
33
+ # Returns the number of iterations when converged.
34
+ # @return [Integer]
35
+ attr_reader :n_iter
36
+
37
+ # Return the random generator.
38
+ # @return [Random]
39
+ attr_reader :rng
40
+
41
+ # Create a new transformer with FastICA.
42
+ #
43
+ # @param n_components [Integer] The number of independent components.
44
+ # @param whiten [Boolean] The flag indicating whether to perform whitening.
45
+ # @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
46
+ # @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
47
+ # If fun = 'cube', this parameter is ignored.
48
+ # @param max_iter [Integer] The maximum number of iterations.
49
+ # @param tol [Float] The tolerance of termination criterion.
50
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
51
+ def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
52
+ super()
53
+ @params = {
54
+ n_components: n_components,
55
+ whiten: whiten,
56
+ fun: fun,
57
+ alpha: alpha,
58
+ max_iter: max_iter,
59
+ tol: tol,
60
+ random_seed: (random_seed || srand)
61
+ }
62
+ @rng = Random.new(@params[:random_seed])
63
+ end
64
+
65
+ # Fit the model with given training data.
66
+ #
67
+ # @overload fit(x) -> FastICA
68
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
69
+ # @return [FastICA] The learned transformer itself.
70
+ def fit(x, _y = nil)
71
+ x = ::Rumale::Validation.check_convert_sample_array(x)
72
+ raise 'FastICA#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
73
+
74
+ @mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
75
+ wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
76
+ unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
77
+ @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
78
+ @mixing = Numo::Linalg.pinv(@components).dup
79
+ if @params[:n_components] == 1
80
+ @components = @components.flatten.dup
81
+ @mixing = @mixing.flatten.dup
82
+ end
83
+ self
84
+ end
85
+
86
+ # Fit the model with training data, and then transform them with the learned model.
87
+ #
88
+ # @overload fit_transform(x) -> Numo::DFloat
89
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
91
+ def fit_transform(x, _y = nil)
92
+ x = ::Rumale::Validation.check_convert_sample_array(x)
93
+ raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
94
+
95
+ fit(x).transform(x)
96
+ end
97
+
98
+ # Transform the given data with the learned model.
99
+ #
100
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
101
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
102
+ def transform(x)
103
+ x = ::Rumale::Validation.check_convert_sample_array(x)
104
+
105
+ cx = @params[:whiten] ? (x - @mean) : x
106
+ cx.dot(@components.transpose)
107
+ end
108
+
109
+ # Inverse transform the given transformed data with the learned model.
110
+ #
111
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
112
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
113
+ def inverse_transform(z)
114
+ z = ::Rumale::Validation.check_convert_sample_array(z)
115
+
116
+ m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
117
+ x = z.dot(m.transpose)
118
+ x += @mean if @params[:whiten]
119
+ x
120
+ end
121
+
122
+ private
123
+
124
+ def whitening(x, n_components)
125
+ n_samples, n_features = x.shape
126
+ mean_vec = x.mean(0)
127
+ centered_x = x - mean_vec
128
+ covar_mat = centered_x.transpose.dot(centered_x) / n_samples
129
+ eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
130
+ [mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
131
+ end
132
+
133
+ def ica(x, fun, max_iter, tol, sub_rng)
134
+ n_samples, n_components = x.shape
135
+ w = decorrelation(::Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
136
+ n_iters = 0
137
+ max_iter.times do |t|
138
+ n_iters = t + 1
139
+ gx, ggx = gradient(x.dot(w.transpose), fun)
140
+ new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
141
+ err = (new_w - w).abs.max
142
+ w = new_w
143
+ break if err <= tol
144
+ end
145
+ [w, n_iters]
146
+ end
147
+
148
+ def decorrelation(w)
149
+ eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
150
+ decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
151
+ decorr_mat.dot(w)
152
+ end
153
+
154
+ def gradient(x, func)
155
+ case func
156
+ when 'exp'
157
+ grad_exp(x, @params[:alpha])
158
+ when 'cube'
159
+ grad_cube(x)
160
+ else
161
+ grad_logcosh(x, @params[:alpha])
162
+ end
163
+ end
164
+
165
+ def grad_logcosh(x, alpha)
166
+ gx = Numo::NMath.tanh(alpha * x)
167
+ ggx = (alpha * (1 - gx**2)).sum(axis: 0)
168
+ [gx, ggx]
169
+ end
170
+
171
+ def grad_exp(x, alpha)
172
+ squared_x = x**2
173
+ exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
174
+ gx = exp_x * x
175
+ ggx = (exp_x * (1 - alpha * squared_x)).sum(axis: 0)
176
+ [gx, ggx]
177
+ end
178
+
179
+ def grad_cube(x)
180
+ [x**3, (3 * x**2).sum(axis: 0)]
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/utils'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module Decomposition
10
+ # NMF is a class that implements Non-negative Matrix Factorization.
11
+ #
12
+ # @example
13
+ # require 'rumale/decomposition/nmf'
14
+ #
15
+ # decomposer = Rumale::Decomposition::NMF.new(n_components: 2)
16
+ # representaion = decomposer.fit_transform(samples)
17
+ #
18
+ # *Reference*
19
+ # - Xu, W., Liu, X., and Gong, Y., "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
20
+ class NMF < ::Rumale::Base::Estimator
21
+ include ::Rumale::Base::Transformer
22
+
23
+ # Returns the factorization matrix.
24
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
25
+ attr_reader :components
26
+
27
+ # Return the random generator.
28
+ # @return [Random]
29
+ attr_reader :rng
30
+
31
+ # Create a new transformer with NMF.
32
+ #
33
+ # @param n_components [Integer] The number of components.
34
+ # @param max_iter [Integer] The maximum number of iterations.
35
+ # @param tol [Float] The tolerance of termination criterion.
36
+ # @param eps [Float] A small value close to zero to avoid zero division error.
37
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
38
+ def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
39
+ super()
40
+ @params = {
41
+ n_components: n_components,
42
+ max_iter: max_iter,
43
+ tol: tol,
44
+ eps: eps,
45
+ random_seed: (random_seed || srand)
46
+ }
47
+ @rng = Random.new(@params[:random_seed])
48
+ end
49
+
50
+ # Fit the model with given training data.
51
+ #
52
+ # @overload fit(x) -> NMF
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
54
+ # @return [NMF] The learned transformer itself.
55
+ def fit(x, _y = nil)
56
+ x = ::Rumale::Validation.check_convert_sample_array(x)
57
+
58
+ partial_fit(x)
59
+ self
60
+ end
61
+
62
+ # Fit the model with training data, and then transform them with the learned model.
63
+ #
64
+ # @overload fit_transform(x) -> Numo::DFloat
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
66
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
67
+ def fit_transform(x, _y = nil)
68
+ x = ::Rumale::Validation.check_convert_sample_array(x)
69
+
70
+ partial_fit(x)
71
+ end
72
+
73
+ # Transform the given data with the learned model.
74
+ #
75
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
76
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
77
+ def transform(x)
78
+ x = ::Rumale::Validation.check_convert_sample_array(x)
79
+
80
+ partial_fit(x, update_comps: false)
81
+ end
82
+
83
+ # Inverse transform the given transformed data with the learned model.
84
+ #
85
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
86
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
87
+ def inverse_transform(z)
88
+ z = ::Rumale::Validation.check_convert_sample_array(z)
89
+
90
+ z.dot(@components)
91
+ end
92
+
93
+ private
94
+
95
+ def partial_fit(x, update_comps: true)
96
+ # initialize some variables.
97
+ n_samples, n_features = x.shape
98
+ scale = Math.sqrt(x.mean / @params[:n_components])
99
+ sub_rng = @rng.dup
100
+ @components = ::Rumale::Utils.rand_uniform([@params[:n_components], n_features], sub_rng) * scale if update_comps
101
+ coefficients = ::Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) * scale
102
+ # optimization.
103
+ @params[:max_iter].times do
104
+ # update
105
+ if update_comps
106
+ nume = coefficients.transpose.dot(x)
107
+ deno = coefficients.transpose.dot(coefficients).dot(@components) + @params[:eps]
108
+ @components *= (nume / deno)
109
+ end
110
+ nume = x.dot(@components.transpose)
111
+ deno = coefficients.dot(@components).dot(@components.transpose) + @params[:eps]
112
+ coefficients *= (nume / deno)
113
+ # normalize
114
+ norm = Numo::NMath.sqrt((@components**2).sum(axis: 1)) + @params[:eps]
115
+ @components /= norm.expand_dims(1) if update_comps
116
+ coefficients *= norm
117
+ # check convergence
118
+ err = ((x - coefficients.dot(@components))**2).sum(axis: 1).mean
119
+ break if err < @params[:tol]
120
+ end
121
+ coefficients
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ # Module for matrix decomposition algorithms.
9
+ module Decomposition
10
+ # PCA is a class that implements Principal Component Analysis.
11
+ #
12
+ # @example
13
+ # require 'rumale/decomposition/pca'
14
+ #
15
+ # decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'fpt')
16
+ # representaion = decomposer.fit_transform(samples)
17
+ #
18
+ # # If Numo::Linalg is installed, you can specify 'evd' for the solver option.
19
+ # require 'numo/linalg/autoloader'
20
+ # require 'rumale/decomposition/pca'
21
+ #
22
+ # decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'evd')
23
+ # representaion = decomposer.fit_transform(samples)
24
+ #
25
+ # # If Numo::Linalg is loaded and the solver option is not given,
26
+ # # the solver option is choosen 'evd' automatically.
27
+ # decomposer = Rumale::Decomposition::PCA.new(n_components: 2)
28
+ # representaion = decomposer.fit_transform(samples)
29
+ #
30
+ # *Reference*
31
+ # - Sharma, A., and Paliwal, K K., "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
32
+ class PCA < ::Rumale::Base::Estimator
33
+ include ::Rumale::Base::Transformer
34
+
35
+ # Returns the principal components.
36
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
37
+ attr_reader :components
38
+
39
+ # Returns the mean vector.
40
+ # @return [Numo::DFloat] (shape: [n_features])
41
+ attr_reader :mean
42
+
43
+ # Return the random generator.
44
+ # @return [Random]
45
+ attr_reader :rng
46
+
47
+ # Create a new transformer with PCA.
48
+ #
49
+ # @param n_components [Integer] The number of principal components.
50
+ # @param solver [String] The algorithm for the optimization ('auto', 'fpt' or 'evd').
51
+ # 'auto' chooses the 'evd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'fpt' solver.
52
+ # 'fpt' uses the fixed-point algorithm.
53
+ # 'evd' performs eigen value decomposition of the covariance matrix of samples.
54
+ # @param max_iter [Integer] The maximum number of iterations. If solver = 'evd', this parameter is ignored.
55
+ # @param tol [Float] The tolerance of termination criterion. If solver = 'evd', this parameter is ignored.
56
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
57
+ def initialize(n_components: 2, solver: 'auto', max_iter: 100, tol: 1.0e-4, random_seed: nil)
58
+ super()
59
+ @params = {
60
+ n_components: n_components,
61
+ solver: 'fpt',
62
+ max_iter: max_iter,
63
+ tol: tol,
64
+ random_seed: (random_seed || srand)
65
+ }
66
+ @params[:solver] = 'evd' if (solver == 'auto' && enable_linalg?(warning: false)) || solver == 'evd'
67
+ @rng = Random.new(@params[:random_seed])
68
+ end
69
+
70
+ # Fit the model with given training data.
71
+ #
72
+ # @overload fit(x) -> PCA
73
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
74
+ # @return [PCA] The learned transformer itself.
75
+ def fit(x, _y = nil)
76
+ x = ::Rumale::Validation.check_convert_sample_array(x)
77
+
78
+ # initialize some variables.
79
+ @components = nil
80
+ n_samples, n_features = x.shape
81
+ sub_rng = @rng.dup
82
+ # centering.
83
+ @mean = x.mean(0)
84
+ centered_x = x - @mean
85
+ # optimization.
86
+ covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
87
+ if @params[:solver] == 'evd' && enable_linalg?
88
+ _, evecs = Numo::Linalg.eigh(covariance_mat, vals_range: (n_features - @params[:n_components])...n_features)
89
+ comps = evecs.reverse(1).transpose
90
+ @components = @params[:n_components] == 1 ? comps[0, true].dup : comps.dup
91
+ else
92
+ @params[:n_components].times do
93
+ comp_vec = ::Rumale::Utils.rand_uniform(n_features, sub_rng)
94
+ @params[:max_iter].times do
95
+ updated = orthogonalize(covariance_mat.dot(comp_vec))
96
+ break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
97
+
98
+ comp_vec = updated
99
+ end
100
+ @components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
101
+ end
102
+ end
103
+ self
104
+ end
105
+
106
+ # Fit the model with training data, and then transform them with the learned model.
107
+ #
108
+ # @overload fit_transform(x) -> Numo::DFloat
109
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
110
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
111
+ def fit_transform(x, _y = nil)
112
+ x = ::Rumale::Validation.check_convert_sample_array(x)
113
+
114
+ fit(x).transform(x)
115
+ end
116
+
117
+ # Transform the given data with the learned model.
118
+ #
119
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
120
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
121
+ def transform(x)
122
+ x = ::Rumale::Validation.check_convert_sample_array(x)
123
+
124
+ (x - @mean).dot(@components.transpose)
125
+ end
126
+
127
+ # Inverse transform the given transformed data with the learned model.
128
+ #
129
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
130
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
131
+ def inverse_transform(z)
132
+ z = ::Rumale::Validation.check_convert_sample_array(z)
133
+
134
+ c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
135
+ z.dot(c) + @mean
136
+ end
137
+
138
+ private
139
+
140
+ def orthogonalize(pcvec)
141
+ unless @components.nil?
142
+ delta = @components.dot(pcvec) * @components.transpose
143
+ delta = delta.sum(axis: 1) unless delta.shape[1].nil?
144
+ pcvec -= delta
145
+ end
146
+ pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # Module for matrix decomposition algorithms.
6
+ module Decomposition
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'decomposition/factor_analysis'
6
+ require_relative 'decomposition/fast_ica'
7
+ require_relative 'decomposition/nmf'
8
+ require_relative 'decomposition/pca'
9
+ require_relative 'decomposition/version'
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-decomposition
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: |
42
+ Rumale::Decomposition provides matrix decomposition algorithms,
43
+ such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
44
+ with Rumale interface.
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/decomposition.rb
54
+ - lib/rumale/decomposition/factor_analysis.rb
55
+ - lib/rumale/decomposition/fast_ica.rb
56
+ - lib/rumale/decomposition/nmf.rb
57
+ - lib/rumale/decomposition/pca.rb
58
+ - lib/rumale/decomposition/version.rb
59
+ homepage: https://github.com/yoshoku/rumale
60
+ licenses:
61
+ - BSD-3-Clause
62
+ metadata:
63
+ homepage_uri: https://github.com/yoshoku/rumale
64
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-decomposition
65
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
66
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
67
+ rubygems_mfa_required: 'true'
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.3.26
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Rumale::Decomposition provides matrix decomposition algorithms with Rumale
87
+ interface
88
+ test_files: []