rumale-decomposition 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4a2420f4a6ada0919ed266e0cbf84eb2b52b5d17a86a3d0273646e8ee1253435
4
+ data.tar.gz: 32194d34d4165898dd8478ef8fd56b6556cee1832140ae028daf707c0d1d4f93
5
+ SHA512:
6
+ metadata.gz: a94a284b3692806962acab85c22f7c96595a00b2dd27e4fd638cbb9cd15b08e087651ec5e7c933a239df0b5ebdb4da86394c646bbc8daa517c5d880ab7afac0b
7
+ data.tar.gz: 4b9105d8e34bf11be4ed2e563431ee2755c949684530b9a0020a4b05acdb504d75acdb8f2b39559a9ba3e90e07b99e153b9724c2c3f3f5de452849e7eb207431
data/LICENSE.txt ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2022 Atsushi Tatsuma
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice, this
8
+ list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright holder nor the names of its
15
+ contributors may be used to endorse or promote products derived from
16
+ this software without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # Rumale::Decomposition
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/rumale-decomposition.svg)](https://badge.fury.io/rb/rumale-decomposition)
4
+ [![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-decomposition/LICENSE.txt)
5
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
6
+
7
+ Rumale is a machine learning library in Ruby.
8
+ Rumale::Decomposition provides matrix decomposition algorithms,
9
+ such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
10
+ with Rumale interface.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'rumale-decomposition'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle install
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install rumale-decomposition
27
+
28
+ ## Documentation
29
+
30
+ - [Rumale API Documentation - Decomposition](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
31
+
32
+ ## License
33
+
34
+ The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ module Decomposition
9
+ # FactorAnalysis is a class that implements fator analysis with EM algorithm.
10
+ #
11
+ # @example
12
+ # require 'numo/linalg/autoloader'
13
+ # require 'rumale/decomposition/factor_analysis'
14
+ #
15
+ # decomposer = Rumale::Decomposition::FactorAnalysis.new(n_components: 2)
16
+ # representaion = decomposer.fit_transform(samples)
17
+ #
18
+ # *Reference*
19
+ # - Barber, D., "Bayesian Reasoning and Machine Learning," Cambridge University Press, 2012.
20
+ class FactorAnalysis < ::Rumale::Base::Estimator
21
+ include ::Rumale::Base::Transformer
22
+
23
+ # Returns the mean vector.
24
+ # @return [Numo::DFloat] (shape: [n_features])
25
+ attr_reader :mean
26
+
27
+ # Returns the estimated noise variance for each feature.
28
+ # @return [Numo::DFloat] (shape: [n_features])
29
+ attr_reader :noise_variance
30
+
31
+ # Returns the components with maximum variance.
32
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
33
+ attr_reader :components
34
+
35
+ # Returns the log likelihood at each iteration.
36
+ # @return [Numo::DFloat] (shape: [n_iter])
37
+ attr_reader :loglike
38
+
39
+ # Return the number of iterations run for optimization
40
+ # @return [Integer]
41
+ attr_reader :n_iter
42
+
43
+ # Create a new transformer with factor analysis.
44
+ #
45
+ # @param n_components [Integer] The number of components (dimensionality of latent space).
46
+ # @param max_iter [Integer] The maximum number of iterations.
47
+ # @param tol [Float/Nil] The tolerance of termination criterion for EM algorithm.
48
+ # If nil is given, iterate EM steps up to the maximum number of iterations.
49
+ def initialize(n_components: 2, max_iter: 100, tol: 1e-8)
50
+ super()
51
+ @params = {
52
+ n_components: n_components,
53
+ max_iter: max_iter,
54
+ tol: tol
55
+ }
56
+ end
57
+
58
+ # Fit the model with given training data.
59
+ #
60
+ # @overload fit(x) -> FactorAnalysis
61
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
62
+ # @return [FactorAnalysis] The learned transformer itself.
63
+ def fit(x, _y = nil)
64
+ x = ::Rumale::Validation.check_convert_sample_array(x)
65
+ raise 'FactorAnalysis#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
66
+
67
+ # initialize some variables.
68
+ n_samples, n_features = x.shape
69
+ @mean = x.mean(0)
70
+ centered_x = x - @mean
71
+ cov_mat = centered_x.transpose.dot(centered_x) / n_samples
72
+ sample_vars = x.var(0)
73
+ sqrt_n_samples = Math.sqrt(n_samples)
74
+ @noise_variance = Numo::DFloat.ones(n_features)
75
+
76
+ # run optimization.
77
+ old_loglike = 0.0
78
+ @n_iter = 0
79
+ @loglike = [] unless @params[:tol].nil?
80
+ @params[:max_iter].times do |t|
81
+ @n_iter = t + 1
82
+ sqrt_noise_variance = Numo::NMath.sqrt(@noise_variance)
83
+ scaled_x = centered_x / (sqrt_noise_variance * sqrt_n_samples + 1e-12)
84
+ s, u = truncate_svd(scaled_x, @params[:n_components])
85
+ scaler = Numo::NMath.sqrt(Numo::DFloat.maximum(s**2 - 1.0, 0.0))
86
+ @components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
87
+ @noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
88
+ next if @params[:tol].nil?
89
+
90
+ new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
91
+ @loglike.push(new_loglike)
92
+ break if (old_loglike - new_loglike).abs <= @params[:tol]
93
+
94
+ old_loglike = new_loglike
95
+ end
96
+
97
+ @loglike = Numo::DFloat.cast(@loglike) unless @params[:tol].nil?
98
+ @components = @components[0, true].dup if @params[:n_components] == 1
99
+ self
100
+ end
101
+
102
+ # Fit the model with training data, and then transform them with the learned model.
103
+ #
104
+ # @overload fit_transform(x) -> Numo::DFloat
105
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
106
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
107
+ def fit_transform(x, _y = nil)
108
+ x = ::Rumale::Validation.check_convert_sample_array(x)
109
+ raise 'FactorAnalysis#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
110
+
111
+ fit(x).transform(x)
112
+ end
113
+
114
+ # Transform the given data with the learned model.
115
+ #
116
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
117
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
118
+ def transform(x)
119
+ x = ::Rumale::Validation.check_convert_sample_array(x)
120
+ raise 'FactorAnalysis#transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
121
+
122
+ factors = @params[:n_components] == 1 ? @components.expand_dims(0) : @components
123
+ centered_x = x - @mean
124
+ beta = Numo::Linalg.inv(Numo::DFloat.eye(factors.shape[0]) + (factors / @noise_variance).dot(factors.transpose))
125
+ z = centered_x.dot((beta.dot(factors) / @noise_variance).transpose)
126
+ @params[:n_components] == 1 ? z[true, 0].dup : z
127
+ end
128
+
129
+ private
130
+
131
+ def log_likelihood(cov_mat, factors, noise_vars)
132
+ n_samples = noise_vars.size
133
+ fact_cov_mat = factors.transpose.dot(factors) + noise_vars.diag
134
+ n_samples.fdiv(2) * Math.log(Numo::Linalg.det(fact_cov_mat)) + Numo::Linalg.inv(fact_cov_mat).dot(cov_mat).trace
135
+ end
136
+
137
+ def truncate_svd(x, k)
138
+ m = x.shape[1]
139
+ eig_vals, eig_vecs = Numo::Linalg.eigh(x.transpose.dot(x), vals_range: (m - k)...m)
140
+ s = Numo::NMath.sqrt(eig_vals.reverse.dup)
141
+ u = eig_vecs.reverse(1).dup
142
+ [s, u]
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/utils'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module Decomposition
10
+ # FastICA is a class that implments Fast Independent Component Analaysis.
11
+ #
12
+ # @example
13
+ # require 'numo/linalg/autoloader'
14
+ # require 'rumale/decomposition/fast_ica'
15
+ #
16
+ # transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
17
+ # source_data = transformer.fit_transform(observed_data)
18
+ #
19
+ # *Reference*
20
+ # - Hyvarinen, A., "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
21
+ # - Hyvarinen, A., and Oja, E., "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
22
+ class FastICA < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Transformer
24
+
25
+ # Returns the unmixing matrix.
26
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
27
+ attr_reader :components
28
+
29
+ # Returns the mixing matrix.
30
+ # @return [Numo::DFloat] (shape: [n_features, n_components])
31
+ attr_reader :mixing
32
+
33
+ # Returns the number of iterations when converged.
34
+ # @return [Integer]
35
+ attr_reader :n_iter
36
+
37
+ # Return the random generator.
38
+ # @return [Random]
39
+ attr_reader :rng
40
+
41
+ # Create a new transformer with FastICA.
42
+ #
43
+ # @param n_components [Integer] The number of independent components.
44
+ # @param whiten [Boolean] The flag indicating whether to perform whitening.
45
+ # @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
46
+ # @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
47
+ # If fun = 'cube', this parameter is ignored.
48
+ # @param max_iter [Integer] The maximum number of iterations.
49
+ # @param tol [Float] The tolerance of termination criterion.
50
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
51
+ def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
52
+ super()
53
+ @params = {
54
+ n_components: n_components,
55
+ whiten: whiten,
56
+ fun: fun,
57
+ alpha: alpha,
58
+ max_iter: max_iter,
59
+ tol: tol,
60
+ random_seed: (random_seed || srand)
61
+ }
62
+ @rng = Random.new(@params[:random_seed])
63
+ end
64
+
65
+ # Fit the model with given training data.
66
+ #
67
+ # @overload fit(x) -> FastICA
68
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
69
+ # @return [FastICA] The learned transformer itself.
70
+ def fit(x, _y = nil)
71
+ x = ::Rumale::Validation.check_convert_sample_array(x)
72
+ raise 'FastICA#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
73
+
74
+ @mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
75
+ wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
76
+ unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
77
+ @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
78
+ @mixing = Numo::Linalg.pinv(@components).dup
79
+ if @params[:n_components] == 1
80
+ @components = @components.flatten.dup
81
+ @mixing = @mixing.flatten.dup
82
+ end
83
+ self
84
+ end
85
+
86
+ # Fit the model with training data, and then transform them with the learned model.
87
+ #
88
+ # @overload fit_transform(x) -> Numo::DFloat
89
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
91
+ def fit_transform(x, _y = nil)
92
+ x = ::Rumale::Validation.check_convert_sample_array(x)
93
+ raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
94
+
95
+ fit(x).transform(x)
96
+ end
97
+
98
+ # Transform the given data with the learned model.
99
+ #
100
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
101
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
102
+ def transform(x)
103
+ x = ::Rumale::Validation.check_convert_sample_array(x)
104
+
105
+ cx = @params[:whiten] ? (x - @mean) : x
106
+ cx.dot(@components.transpose)
107
+ end
108
+
109
+ # Inverse transform the given transformed data with the learned model.
110
+ #
111
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
112
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
113
+ def inverse_transform(z)
114
+ z = ::Rumale::Validation.check_convert_sample_array(z)
115
+
116
+ m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
117
+ x = z.dot(m.transpose)
118
+ x += @mean if @params[:whiten]
119
+ x
120
+ end
121
+
122
+ private
123
+
124
+ def whitening(x, n_components)
125
+ n_samples, n_features = x.shape
126
+ mean_vec = x.mean(0)
127
+ centered_x = x - mean_vec
128
+ covar_mat = centered_x.transpose.dot(centered_x) / n_samples
129
+ eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
130
+ [mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
131
+ end
132
+
133
+ def ica(x, fun, max_iter, tol, sub_rng)
134
+ n_samples, n_components = x.shape
135
+ w = decorrelation(::Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
136
+ n_iters = 0
137
+ max_iter.times do |t|
138
+ n_iters = t + 1
139
+ gx, ggx = gradient(x.dot(w.transpose), fun)
140
+ new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
141
+ err = (new_w - w).abs.max
142
+ w = new_w
143
+ break if err <= tol
144
+ end
145
+ [w, n_iters]
146
+ end
147
+
148
+ def decorrelation(w)
149
+ eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
150
+ decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
151
+ decorr_mat.dot(w)
152
+ end
153
+
154
+ def gradient(x, func)
155
+ case func
156
+ when 'exp'
157
+ grad_exp(x, @params[:alpha])
158
+ when 'cube'
159
+ grad_cube(x)
160
+ else
161
+ grad_logcosh(x, @params[:alpha])
162
+ end
163
+ end
164
+
165
+ def grad_logcosh(x, alpha)
166
+ gx = Numo::NMath.tanh(alpha * x)
167
+ ggx = (alpha * (1 - gx**2)).sum(axis: 0)
168
+ [gx, ggx]
169
+ end
170
+
171
+ def grad_exp(x, alpha)
172
+ squared_x = x**2
173
+ exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
174
+ gx = exp_x * x
175
+ ggx = (exp_x * (1 - alpha * squared_x)).sum(axis: 0)
176
+ [gx, ggx]
177
+ end
178
+
179
+ def grad_cube(x)
180
+ [x**3, (3 * x**2).sum(axis: 0)]
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/utils'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module Decomposition
10
+ # NMF is a class that implements Non-negative Matrix Factorization.
11
+ #
12
+ # @example
13
+ # require 'rumale/decomposition/nmf'
14
+ #
15
+ # decomposer = Rumale::Decomposition::NMF.new(n_components: 2)
16
+ # representaion = decomposer.fit_transform(samples)
17
+ #
18
+ # *Reference*
19
+ # - Xu, W., Liu, X., and Gong, Y., "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
20
+ class NMF < ::Rumale::Base::Estimator
21
+ include ::Rumale::Base::Transformer
22
+
23
+ # Returns the factorization matrix.
24
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
25
+ attr_reader :components
26
+
27
+ # Return the random generator.
28
+ # @return [Random]
29
+ attr_reader :rng
30
+
31
+ # Create a new transformer with NMF.
32
+ #
33
+ # @param n_components [Integer] The number of components.
34
+ # @param max_iter [Integer] The maximum number of iterations.
35
+ # @param tol [Float] The tolerance of termination criterion.
36
+ # @param eps [Float] A small value close to zero to avoid zero division error.
37
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
38
+ def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
39
+ super()
40
+ @params = {
41
+ n_components: n_components,
42
+ max_iter: max_iter,
43
+ tol: tol,
44
+ eps: eps,
45
+ random_seed: (random_seed || srand)
46
+ }
47
+ @rng = Random.new(@params[:random_seed])
48
+ end
49
+
50
+ # Fit the model with given training data.
51
+ #
52
+ # @overload fit(x) -> NMF
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
54
+ # @return [NMF] The learned transformer itself.
55
+ def fit(x, _y = nil)
56
+ x = ::Rumale::Validation.check_convert_sample_array(x)
57
+
58
+ partial_fit(x)
59
+ self
60
+ end
61
+
62
+ # Fit the model with training data, and then transform them with the learned model.
63
+ #
64
+ # @overload fit_transform(x) -> Numo::DFloat
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
66
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
67
+ def fit_transform(x, _y = nil)
68
+ x = ::Rumale::Validation.check_convert_sample_array(x)
69
+
70
+ partial_fit(x)
71
+ end
72
+
73
+ # Transform the given data with the learned model.
74
+ #
75
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
76
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
77
+ def transform(x)
78
+ x = ::Rumale::Validation.check_convert_sample_array(x)
79
+
80
+ partial_fit(x, update_comps: false)
81
+ end
82
+
83
+ # Inverse transform the given transformed data with the learned model.
84
+ #
85
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
86
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
87
+ def inverse_transform(z)
88
+ z = ::Rumale::Validation.check_convert_sample_array(z)
89
+
90
+ z.dot(@components)
91
+ end
92
+
93
+ private
94
+
95
+ def partial_fit(x, update_comps: true)
96
+ # initialize some variables.
97
+ n_samples, n_features = x.shape
98
+ scale = Math.sqrt(x.mean / @params[:n_components])
99
+ sub_rng = @rng.dup
100
+ @components = ::Rumale::Utils.rand_uniform([@params[:n_components], n_features], sub_rng) * scale if update_comps
101
+ coefficients = ::Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) * scale
102
+ # optimization.
103
+ @params[:max_iter].times do
104
+ # update
105
+ if update_comps
106
+ nume = coefficients.transpose.dot(x)
107
+ deno = coefficients.transpose.dot(coefficients).dot(@components) + @params[:eps]
108
+ @components *= (nume / deno)
109
+ end
110
+ nume = x.dot(@components.transpose)
111
+ deno = coefficients.dot(@components).dot(@components.transpose) + @params[:eps]
112
+ coefficients *= (nume / deno)
113
+ # normalize
114
+ norm = Numo::NMath.sqrt((@components**2).sum(axis: 1)) + @params[:eps]
115
+ @components /= norm.expand_dims(1) if update_comps
116
+ coefficients *= norm
117
+ # check convergence
118
+ err = ((x - coefficients.dot(@components))**2).sum(axis: 1).mean
119
+ break if err < @params[:tol]
120
+ end
121
+ coefficients
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/validation'
6
+
7
+ module Rumale
8
+ # Module for matrix decomposition algorithms.
9
+ module Decomposition
10
+ # PCA is a class that implements Principal Component Analysis.
11
+ #
12
+ # @example
13
+ # require 'rumale/decomposition/pca'
14
+ #
15
+ # decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'fpt')
16
+ # representaion = decomposer.fit_transform(samples)
17
+ #
18
+ # # If Numo::Linalg is installed, you can specify 'evd' for the solver option.
19
+ # require 'numo/linalg/autoloader'
20
+ # require 'rumale/decomposition/pca'
21
+ #
22
+ # decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'evd')
23
+ # representaion = decomposer.fit_transform(samples)
24
+ #
25
+ # # If Numo::Linalg is loaded and the solver option is not given,
26
+ # # the solver option is choosen 'evd' automatically.
27
+ # decomposer = Rumale::Decomposition::PCA.new(n_components: 2)
28
+ # representaion = decomposer.fit_transform(samples)
29
+ #
30
+ # *Reference*
31
+ # - Sharma, A., and Paliwal, K K., "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
32
+ class PCA < ::Rumale::Base::Estimator
33
+ include ::Rumale::Base::Transformer
34
+
35
+ # Returns the principal components.
36
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
37
+ attr_reader :components
38
+
39
+ # Returns the mean vector.
40
+ # @return [Numo::DFloat] (shape: [n_features])
41
+ attr_reader :mean
42
+
43
+ # Return the random generator.
44
+ # @return [Random]
45
+ attr_reader :rng
46
+
47
+ # Create a new transformer with PCA.
48
+ #
49
+ # @param n_components [Integer] The number of principal components.
50
+ # @param solver [String] The algorithm for the optimization ('auto', 'fpt' or 'evd').
51
+ # 'auto' chooses the 'evd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'fpt' solver.
52
+ # 'fpt' uses the fixed-point algorithm.
53
+ # 'evd' performs eigen value decomposition of the covariance matrix of samples.
54
+ # @param max_iter [Integer] The maximum number of iterations. If solver = 'evd', this parameter is ignored.
55
+ # @param tol [Float] The tolerance of termination criterion. If solver = 'evd', this parameter is ignored.
56
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
57
+ def initialize(n_components: 2, solver: 'auto', max_iter: 100, tol: 1.0e-4, random_seed: nil)
58
+ super()
59
+ @params = {
60
+ n_components: n_components,
61
+ solver: 'fpt',
62
+ max_iter: max_iter,
63
+ tol: tol,
64
+ random_seed: (random_seed || srand)
65
+ }
66
+ @params[:solver] = 'evd' if (solver == 'auto' && enable_linalg?(warning: false)) || solver == 'evd'
67
+ @rng = Random.new(@params[:random_seed])
68
+ end
69
+
70
+ # Fit the model with given training data.
71
+ #
72
+ # @overload fit(x) -> PCA
73
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
74
+ # @return [PCA] The learned transformer itself.
75
+ def fit(x, _y = nil)
76
+ x = ::Rumale::Validation.check_convert_sample_array(x)
77
+
78
+ # initialize some variables.
79
+ @components = nil
80
+ n_samples, n_features = x.shape
81
+ sub_rng = @rng.dup
82
+ # centering.
83
+ @mean = x.mean(0)
84
+ centered_x = x - @mean
85
+ # optimization.
86
+ covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
87
+ if @params[:solver] == 'evd' && enable_linalg?
88
+ _, evecs = Numo::Linalg.eigh(covariance_mat, vals_range: (n_features - @params[:n_components])...n_features)
89
+ comps = evecs.reverse(1).transpose
90
+ @components = @params[:n_components] == 1 ? comps[0, true].dup : comps.dup
91
+ else
92
+ @params[:n_components].times do
93
+ comp_vec = ::Rumale::Utils.rand_uniform(n_features, sub_rng)
94
+ @params[:max_iter].times do
95
+ updated = orthogonalize(covariance_mat.dot(comp_vec))
96
+ break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
97
+
98
+ comp_vec = updated
99
+ end
100
+ @components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
101
+ end
102
+ end
103
+ self
104
+ end
105
+
106
+ # Fit the model with training data, and then transform them with the learned model.
107
+ #
108
+ # @overload fit_transform(x) -> Numo::DFloat
109
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
110
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
111
+ def fit_transform(x, _y = nil)
112
+ x = ::Rumale::Validation.check_convert_sample_array(x)
113
+
114
+ fit(x).transform(x)
115
+ end
116
+
117
+ # Transform the given data with the learned model.
118
+ #
119
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
120
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
121
+ def transform(x)
122
+ x = ::Rumale::Validation.check_convert_sample_array(x)
123
+
124
+ (x - @mean).dot(@components.transpose)
125
+ end
126
+
127
+ # Inverse transform the given transformed data with the learned model.
128
+ #
129
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
130
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
131
+ def inverse_transform(z)
132
+ z = ::Rumale::Validation.check_convert_sample_array(z)
133
+
134
+ c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
135
+ z.dot(c) + @mean
136
+ end
137
+
138
+ private
139
+
140
+ def orthogonalize(pcvec)
141
+ unless @components.nil?
142
+ delta = @components.dot(pcvec) * @components.transpose
143
+ delta = delta.sum(axis: 1) unless delta.shape[1].nil?
144
+ pcvec -= delta
145
+ end
146
+ pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # Module for matrix decomposition algorithms.
6
+ module Decomposition
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'decomposition/factor_analysis'
6
+ require_relative 'decomposition/fast_ica'
7
+ require_relative 'decomposition/nmf'
8
+ require_relative 'decomposition/pca'
9
+ require_relative 'decomposition/version'
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-decomposition
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: |
42
+ Rumale::Decomposition provides matrix decomposition algorithms,
43
+ such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
44
+ with Rumale interface.
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/decomposition.rb
54
+ - lib/rumale/decomposition/factor_analysis.rb
55
+ - lib/rumale/decomposition/fast_ica.rb
56
+ - lib/rumale/decomposition/nmf.rb
57
+ - lib/rumale/decomposition/pca.rb
58
+ - lib/rumale/decomposition/version.rb
59
+ homepage: https://github.com/yoshoku/rumale
60
+ licenses:
61
+ - BSD-3-Clause
62
+ metadata:
63
+ homepage_uri: https://github.com/yoshoku/rumale
64
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-decomposition
65
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
66
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
67
+ rubygems_mfa_required: 'true'
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.3.26
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Rumale::Decomposition provides matrix decomposition algorithms with Rumale
87
+ interface
88
+ test_files: []