rumale-decomposition 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/decomposition/factor_analysis.rb +146 -0
- data/lib/rumale/decomposition/fast_ica.rb +184 -0
- data/lib/rumale/decomposition/nmf.rb +125 -0
- data/lib/rumale/decomposition/pca.rb +150 -0
- data/lib/rumale/decomposition/version.rb +10 -0
- data/lib/rumale/decomposition.rb +9 -0
- metadata +88 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4a2420f4a6ada0919ed266e0cbf84eb2b52b5d17a86a3d0273646e8ee1253435
|
4
|
+
data.tar.gz: 32194d34d4165898dd8478ef8fd56b6556cee1832140ae028daf707c0d1d4f93
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a94a284b3692806962acab85c22f7c96595a00b2dd27e4fd638cbb9cd15b08e087651ec5e7c933a239df0b5ebdb4da86394c646bbc8daa517c5d880ab7afac0b
|
7
|
+
data.tar.gz: 4b9105d8e34bf11be4ed2e563431ee2755c949684530b9a0020a4b05acdb504d75acdb8f2b39559a9ba3e90e07b99e153b9724c2c3f3f5de452849e7eb207431
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::Decomposition
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/rumale-decomposition)
|
4
|
+
[](https://github.com/yoshoku/rumale/blob/main/rumale-decomposition/LICENSE.txt)
|
5
|
+
[](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::Decomposition provides matrix decomposition algorithms,
|
9
|
+
such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-decomposition'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-decomposition
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - Decomposition](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Decomposition
|
9
|
+
# FactorAnalysis is a class that implements fator analysis with EM algorithm.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'numo/linalg/autoloader'
|
13
|
+
# require 'rumale/decomposition/factor_analysis'
|
14
|
+
#
|
15
|
+
# decomposer = Rumale::Decomposition::FactorAnalysis.new(n_components: 2)
|
16
|
+
# representaion = decomposer.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - Barber, D., "Bayesian Reasoning and Machine Learning," Cambridge University Press, 2012.
|
20
|
+
class FactorAnalysis < ::Rumale::Base::Estimator
|
21
|
+
include ::Rumale::Base::Transformer
|
22
|
+
|
23
|
+
# Returns the mean vector.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
25
|
+
attr_reader :mean
|
26
|
+
|
27
|
+
# Returns the estimated noise variance for each feature.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
29
|
+
attr_reader :noise_variance
|
30
|
+
|
31
|
+
# Returns the components with maximum variance.
|
32
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
33
|
+
attr_reader :components
|
34
|
+
|
35
|
+
# Returns the log likelihood at each iteration.
|
36
|
+
# @return [Numo::DFloat] (shape: [n_iter])
|
37
|
+
attr_reader :loglike
|
38
|
+
|
39
|
+
# Return the number of iterations run for optimization
|
40
|
+
# @return [Integer]
|
41
|
+
attr_reader :n_iter
|
42
|
+
|
43
|
+
# Create a new transformer with factor analysis.
|
44
|
+
#
|
45
|
+
# @param n_components [Integer] The number of components (dimensionality of latent space).
|
46
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
47
|
+
# @param tol [Float/Nil] The tolerance of termination criterion for EM algorithm.
|
48
|
+
# If nil is given, iterate EM steps up to the maximum number of iterations.
|
49
|
+
def initialize(n_components: 2, max_iter: 100, tol: 1e-8)
|
50
|
+
super()
|
51
|
+
@params = {
|
52
|
+
n_components: n_components,
|
53
|
+
max_iter: max_iter,
|
54
|
+
tol: tol
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
# Fit the model with given training data.
|
59
|
+
#
|
60
|
+
# @overload fit(x) -> FactorAnalysis
|
61
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
62
|
+
# @return [FactorAnalysis] The learned transformer itself.
|
63
|
+
def fit(x, _y = nil)
|
64
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
65
|
+
raise 'FactorAnalysis#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
66
|
+
|
67
|
+
# initialize some variables.
|
68
|
+
n_samples, n_features = x.shape
|
69
|
+
@mean = x.mean(0)
|
70
|
+
centered_x = x - @mean
|
71
|
+
cov_mat = centered_x.transpose.dot(centered_x) / n_samples
|
72
|
+
sample_vars = x.var(0)
|
73
|
+
sqrt_n_samples = Math.sqrt(n_samples)
|
74
|
+
@noise_variance = Numo::DFloat.ones(n_features)
|
75
|
+
|
76
|
+
# run optimization.
|
77
|
+
old_loglike = 0.0
|
78
|
+
@n_iter = 0
|
79
|
+
@loglike = [] unless @params[:tol].nil?
|
80
|
+
@params[:max_iter].times do |t|
|
81
|
+
@n_iter = t + 1
|
82
|
+
sqrt_noise_variance = Numo::NMath.sqrt(@noise_variance)
|
83
|
+
scaled_x = centered_x / (sqrt_noise_variance * sqrt_n_samples + 1e-12)
|
84
|
+
s, u = truncate_svd(scaled_x, @params[:n_components])
|
85
|
+
scaler = Numo::NMath.sqrt(Numo::DFloat.maximum(s**2 - 1.0, 0.0))
|
86
|
+
@components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
|
87
|
+
@noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
|
88
|
+
next if @params[:tol].nil?
|
89
|
+
|
90
|
+
new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
|
91
|
+
@loglike.push(new_loglike)
|
92
|
+
break if (old_loglike - new_loglike).abs <= @params[:tol]
|
93
|
+
|
94
|
+
old_loglike = new_loglike
|
95
|
+
end
|
96
|
+
|
97
|
+
@loglike = Numo::DFloat.cast(@loglike) unless @params[:tol].nil?
|
98
|
+
@components = @components[0, true].dup if @params[:n_components] == 1
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# Fit the model with training data, and then transform them with the learned model.
|
103
|
+
#
|
104
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
105
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
106
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
107
|
+
def fit_transform(x, _y = nil)
|
108
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
109
|
+
raise 'FactorAnalysis#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
110
|
+
|
111
|
+
fit(x).transform(x)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Transform the given data with the learned model.
|
115
|
+
#
|
116
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
117
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
118
|
+
def transform(x)
|
119
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
120
|
+
raise 'FactorAnalysis#transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
121
|
+
|
122
|
+
factors = @params[:n_components] == 1 ? @components.expand_dims(0) : @components
|
123
|
+
centered_x = x - @mean
|
124
|
+
beta = Numo::Linalg.inv(Numo::DFloat.eye(factors.shape[0]) + (factors / @noise_variance).dot(factors.transpose))
|
125
|
+
z = centered_x.dot((beta.dot(factors) / @noise_variance).transpose)
|
126
|
+
@params[:n_components] == 1 ? z[true, 0].dup : z
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def log_likelihood(cov_mat, factors, noise_vars)
|
132
|
+
n_samples = noise_vars.size
|
133
|
+
fact_cov_mat = factors.transpose.dot(factors) + noise_vars.diag
|
134
|
+
n_samples.fdiv(2) * Math.log(Numo::Linalg.det(fact_cov_mat)) + Numo::Linalg.inv(fact_cov_mat).dot(cov_mat).trace
|
135
|
+
end
|
136
|
+
|
137
|
+
def truncate_svd(x, k)
|
138
|
+
m = x.shape[1]
|
139
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(x.transpose.dot(x), vals_range: (m - k)...m)
|
140
|
+
s = Numo::NMath.sqrt(eig_vals.reverse.dup)
|
141
|
+
u = eig_vecs.reverse(1).dup
|
142
|
+
[s, u]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Decomposition
|
10
|
+
# FastICA is a class that implments Fast Independent Component Analaysis.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale/decomposition/fast_ica'
|
15
|
+
#
|
16
|
+
# transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
|
17
|
+
# source_data = transformer.fit_transform(observed_data)
|
18
|
+
#
|
19
|
+
# *Reference*
|
20
|
+
# - Hyvarinen, A., "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
|
21
|
+
# - Hyvarinen, A., and Oja, E., "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
|
22
|
+
class FastICA < ::Rumale::Base::Estimator
|
23
|
+
include ::Rumale::Base::Transformer
|
24
|
+
|
25
|
+
# Returns the unmixing matrix.
|
26
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
27
|
+
attr_reader :components
|
28
|
+
|
29
|
+
# Returns the mixing matrix.
|
30
|
+
# @return [Numo::DFloat] (shape: [n_features, n_components])
|
31
|
+
attr_reader :mixing
|
32
|
+
|
33
|
+
# Returns the number of iterations when converged.
|
34
|
+
# @return [Integer]
|
35
|
+
attr_reader :n_iter
|
36
|
+
|
37
|
+
# Return the random generator.
|
38
|
+
# @return [Random]
|
39
|
+
attr_reader :rng
|
40
|
+
|
41
|
+
# Create a new transformer with FastICA.
|
42
|
+
#
|
43
|
+
# @param n_components [Integer] The number of independent components.
|
44
|
+
# @param whiten [Boolean] The flag indicating whether to perform whitening.
|
45
|
+
# @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
|
46
|
+
# @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
|
47
|
+
# If fun = 'cube', this parameter is ignored.
|
48
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
49
|
+
# @param tol [Float] The tolerance of termination criterion.
|
50
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
|
+
def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
|
52
|
+
super()
|
53
|
+
@params = {
|
54
|
+
n_components: n_components,
|
55
|
+
whiten: whiten,
|
56
|
+
fun: fun,
|
57
|
+
alpha: alpha,
|
58
|
+
max_iter: max_iter,
|
59
|
+
tol: tol,
|
60
|
+
random_seed: (random_seed || srand)
|
61
|
+
}
|
62
|
+
@rng = Random.new(@params[:random_seed])
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit the model with given training data.
|
66
|
+
#
|
67
|
+
# @overload fit(x) -> FastICA
|
68
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
69
|
+
# @return [FastICA] The learned transformer itself.
|
70
|
+
def fit(x, _y = nil)
|
71
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
72
|
+
raise 'FastICA#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
73
|
+
|
74
|
+
@mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
|
75
|
+
wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
|
76
|
+
unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
|
77
|
+
@components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
|
78
|
+
@mixing = Numo::Linalg.pinv(@components).dup
|
79
|
+
if @params[:n_components] == 1
|
80
|
+
@components = @components.flatten.dup
|
81
|
+
@mixing = @mixing.flatten.dup
|
82
|
+
end
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# Fit the model with training data, and then transform them with the learned model.
|
87
|
+
#
|
88
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
89
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
90
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
91
|
+
def fit_transform(x, _y = nil)
|
92
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
93
|
+
raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
94
|
+
|
95
|
+
fit(x).transform(x)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Transform the given data with the learned model.
|
99
|
+
#
|
100
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
101
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
102
|
+
def transform(x)
|
103
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
104
|
+
|
105
|
+
cx = @params[:whiten] ? (x - @mean) : x
|
106
|
+
cx.dot(@components.transpose)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Inverse transform the given transformed data with the learned model.
|
110
|
+
#
|
111
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
|
112
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
|
113
|
+
def inverse_transform(z)
|
114
|
+
z = ::Rumale::Validation.check_convert_sample_array(z)
|
115
|
+
|
116
|
+
m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
|
117
|
+
x = z.dot(m.transpose)
|
118
|
+
x += @mean if @params[:whiten]
|
119
|
+
x
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def whitening(x, n_components)
|
125
|
+
n_samples, n_features = x.shape
|
126
|
+
mean_vec = x.mean(0)
|
127
|
+
centered_x = x - mean_vec
|
128
|
+
covar_mat = centered_x.transpose.dot(centered_x) / n_samples
|
129
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
|
130
|
+
[mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
|
131
|
+
end
|
132
|
+
|
133
|
+
def ica(x, fun, max_iter, tol, sub_rng)
|
134
|
+
n_samples, n_components = x.shape
|
135
|
+
w = decorrelation(::Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
|
136
|
+
n_iters = 0
|
137
|
+
max_iter.times do |t|
|
138
|
+
n_iters = t + 1
|
139
|
+
gx, ggx = gradient(x.dot(w.transpose), fun)
|
140
|
+
new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
|
141
|
+
err = (new_w - w).abs.max
|
142
|
+
w = new_w
|
143
|
+
break if err <= tol
|
144
|
+
end
|
145
|
+
[w, n_iters]
|
146
|
+
end
|
147
|
+
|
148
|
+
def decorrelation(w)
|
149
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
|
150
|
+
decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
|
151
|
+
decorr_mat.dot(w)
|
152
|
+
end
|
153
|
+
|
154
|
+
def gradient(x, func)
|
155
|
+
case func
|
156
|
+
when 'exp'
|
157
|
+
grad_exp(x, @params[:alpha])
|
158
|
+
when 'cube'
|
159
|
+
grad_cube(x)
|
160
|
+
else
|
161
|
+
grad_logcosh(x, @params[:alpha])
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def grad_logcosh(x, alpha)
|
166
|
+
gx = Numo::NMath.tanh(alpha * x)
|
167
|
+
ggx = (alpha * (1 - gx**2)).sum(axis: 0)
|
168
|
+
[gx, ggx]
|
169
|
+
end
|
170
|
+
|
171
|
+
def grad_exp(x, alpha)
|
172
|
+
squared_x = x**2
|
173
|
+
exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
|
174
|
+
gx = exp_x * x
|
175
|
+
ggx = (exp_x * (1 - alpha * squared_x)).sum(axis: 0)
|
176
|
+
[gx, ggx]
|
177
|
+
end
|
178
|
+
|
179
|
+
def grad_cube(x)
|
180
|
+
[x**3, (3 * x**2).sum(axis: 0)]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Decomposition
|
10
|
+
# NMF is a class that implements Non-negative Matrix Factorization.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/decomposition/nmf'
|
14
|
+
#
|
15
|
+
# decomposer = Rumale::Decomposition::NMF.new(n_components: 2)
|
16
|
+
# representaion = decomposer.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - Xu, W., Liu, X., and Gong, Y., "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
|
20
|
+
class NMF < ::Rumale::Base::Estimator
|
21
|
+
include ::Rumale::Base::Transformer
|
22
|
+
|
23
|
+
# Returns the factorization matrix.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Return the random generator.
|
28
|
+
# @return [Random]
|
29
|
+
attr_reader :rng
|
30
|
+
|
31
|
+
# Create a new transformer with NMF.
|
32
|
+
#
|
33
|
+
# @param n_components [Integer] The number of components.
|
34
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
35
|
+
# @param tol [Float] The tolerance of termination criterion.
|
36
|
+
# @param eps [Float] A small value close to zero to avoid zero division error.
|
37
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
38
|
+
def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
|
39
|
+
super()
|
40
|
+
@params = {
|
41
|
+
n_components: n_components,
|
42
|
+
max_iter: max_iter,
|
43
|
+
tol: tol,
|
44
|
+
eps: eps,
|
45
|
+
random_seed: (random_seed || srand)
|
46
|
+
}
|
47
|
+
@rng = Random.new(@params[:random_seed])
|
48
|
+
end
|
49
|
+
|
50
|
+
# Fit the model with given training data.
|
51
|
+
#
|
52
|
+
# @overload fit(x) -> NMF
|
53
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
54
|
+
# @return [NMF] The learned transformer itself.
|
55
|
+
def fit(x, _y = nil)
|
56
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
57
|
+
|
58
|
+
partial_fit(x)
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Fit the model with training data, and then transform them with the learned model.
|
63
|
+
#
|
64
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
65
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
66
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
67
|
+
def fit_transform(x, _y = nil)
|
68
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
69
|
+
|
70
|
+
partial_fit(x)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Transform the given data with the learned model.
|
74
|
+
#
|
75
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
76
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
77
|
+
def transform(x)
|
78
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
79
|
+
|
80
|
+
partial_fit(x, update_comps: false)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Inverse transform the given transformed data with the learned model.
|
84
|
+
#
|
85
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
86
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
87
|
+
def inverse_transform(z)
|
88
|
+
z = ::Rumale::Validation.check_convert_sample_array(z)
|
89
|
+
|
90
|
+
z.dot(@components)
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def partial_fit(x, update_comps: true)
|
96
|
+
# initialize some variables.
|
97
|
+
n_samples, n_features = x.shape
|
98
|
+
scale = Math.sqrt(x.mean / @params[:n_components])
|
99
|
+
sub_rng = @rng.dup
|
100
|
+
@components = ::Rumale::Utils.rand_uniform([@params[:n_components], n_features], sub_rng) * scale if update_comps
|
101
|
+
coefficients = ::Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) * scale
|
102
|
+
# optimization.
|
103
|
+
@params[:max_iter].times do
|
104
|
+
# update
|
105
|
+
if update_comps
|
106
|
+
nume = coefficients.transpose.dot(x)
|
107
|
+
deno = coefficients.transpose.dot(coefficients).dot(@components) + @params[:eps]
|
108
|
+
@components *= (nume / deno)
|
109
|
+
end
|
110
|
+
nume = x.dot(@components.transpose)
|
111
|
+
deno = coefficients.dot(@components).dot(@components.transpose) + @params[:eps]
|
112
|
+
coefficients *= (nume / deno)
|
113
|
+
# normalize
|
114
|
+
norm = Numo::NMath.sqrt((@components**2).sum(axis: 1)) + @params[:eps]
|
115
|
+
@components /= norm.expand_dims(1) if update_comps
|
116
|
+
coefficients *= norm
|
117
|
+
# check convergence
|
118
|
+
err = ((x - coefficients.dot(@components))**2).sum(axis: 1).mean
|
119
|
+
break if err < @params[:tol]
|
120
|
+
end
|
121
|
+
coefficients
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
# Module for matrix decomposition algorithms.
|
9
|
+
module Decomposition
|
10
|
+
# PCA is a class that implements Principal Component Analysis.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/decomposition/pca'
|
14
|
+
#
|
15
|
+
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'fpt')
|
16
|
+
# representaion = decomposer.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# # If Numo::Linalg is installed, you can specify 'evd' for the solver option.
|
19
|
+
# require 'numo/linalg/autoloader'
|
20
|
+
# require 'rumale/decomposition/pca'
|
21
|
+
#
|
22
|
+
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'evd')
|
23
|
+
# representaion = decomposer.fit_transform(samples)
|
24
|
+
#
|
25
|
+
# # If Numo::Linalg is loaded and the solver option is not given,
|
26
|
+
# # the solver option is choosen 'evd' automatically.
|
27
|
+
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2)
|
28
|
+
# representaion = decomposer.fit_transform(samples)
|
29
|
+
#
|
30
|
+
# *Reference*
|
31
|
+
# - Sharma, A., and Paliwal, K K., "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
|
32
|
+
class PCA < ::Rumale::Base::Estimator
|
33
|
+
include ::Rumale::Base::Transformer
|
34
|
+
|
35
|
+
# Returns the principal components.
|
36
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
37
|
+
attr_reader :components
|
38
|
+
|
39
|
+
# Returns the mean vector.
|
40
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
41
|
+
attr_reader :mean
|
42
|
+
|
43
|
+
# Return the random generator.
|
44
|
+
# @return [Random]
|
45
|
+
attr_reader :rng
|
46
|
+
|
47
|
+
# Create a new transformer with PCA.
|
48
|
+
#
|
49
|
+
# @param n_components [Integer] The number of principal components.
|
50
|
+
# @param solver [String] The algorithm for the optimization ('auto', 'fpt' or 'evd').
|
51
|
+
# 'auto' chooses the 'evd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'fpt' solver.
|
52
|
+
# 'fpt' uses the fixed-point algorithm.
|
53
|
+
# 'evd' performs eigen value decomposition of the covariance matrix of samples.
|
54
|
+
# @param max_iter [Integer] The maximum number of iterations. If solver = 'evd', this parameter is ignored.
|
55
|
+
# @param tol [Float] The tolerance of termination criterion. If solver = 'evd', this parameter is ignored.
|
56
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
57
|
+
def initialize(n_components: 2, solver: 'auto', max_iter: 100, tol: 1.0e-4, random_seed: nil)
|
58
|
+
super()
|
59
|
+
@params = {
|
60
|
+
n_components: n_components,
|
61
|
+
solver: 'fpt',
|
62
|
+
max_iter: max_iter,
|
63
|
+
tol: tol,
|
64
|
+
random_seed: (random_seed || srand)
|
65
|
+
}
|
66
|
+
@params[:solver] = 'evd' if (solver == 'auto' && enable_linalg?(warning: false)) || solver == 'evd'
|
67
|
+
@rng = Random.new(@params[:random_seed])
|
68
|
+
end
|
69
|
+
|
70
|
+
# Fit the model with given training data.
|
71
|
+
#
|
72
|
+
# @overload fit(x) -> PCA
|
73
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
74
|
+
# @return [PCA] The learned transformer itself.
|
75
|
+
def fit(x, _y = nil)
|
76
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
77
|
+
|
78
|
+
# initialize some variables.
|
79
|
+
@components = nil
|
80
|
+
n_samples, n_features = x.shape
|
81
|
+
sub_rng = @rng.dup
|
82
|
+
# centering.
|
83
|
+
@mean = x.mean(0)
|
84
|
+
centered_x = x - @mean
|
85
|
+
# optimization.
|
86
|
+
covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
|
87
|
+
if @params[:solver] == 'evd' && enable_linalg?
|
88
|
+
_, evecs = Numo::Linalg.eigh(covariance_mat, vals_range: (n_features - @params[:n_components])...n_features)
|
89
|
+
comps = evecs.reverse(1).transpose
|
90
|
+
@components = @params[:n_components] == 1 ? comps[0, true].dup : comps.dup
|
91
|
+
else
|
92
|
+
@params[:n_components].times do
|
93
|
+
comp_vec = ::Rumale::Utils.rand_uniform(n_features, sub_rng)
|
94
|
+
@params[:max_iter].times do
|
95
|
+
updated = orthogonalize(covariance_mat.dot(comp_vec))
|
96
|
+
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
|
97
|
+
|
98
|
+
comp_vec = updated
|
99
|
+
end
|
100
|
+
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
self
|
104
|
+
end
|
105
|
+
|
106
|
+
# Fit the model with training data, and then transform them with the learned model.
|
107
|
+
#
|
108
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
109
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
110
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
111
|
+
def fit_transform(x, _y = nil)
|
112
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
113
|
+
|
114
|
+
fit(x).transform(x)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Transform the given data with the learned model.
|
118
|
+
#
|
119
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
120
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
121
|
+
def transform(x)
|
122
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
123
|
+
|
124
|
+
(x - @mean).dot(@components.transpose)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Inverse transform the given transformed data with the learned model.
|
128
|
+
#
|
129
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
130
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
131
|
+
def inverse_transform(z)
|
132
|
+
z = ::Rumale::Validation.check_convert_sample_array(z)
|
133
|
+
|
134
|
+
c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
|
135
|
+
z.dot(c) + @mean
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def orthogonalize(pcvec)
|
141
|
+
unless @components.nil?
|
142
|
+
delta = @components.dot(pcvec) * @components.transpose
|
143
|
+
delta = delta.sum(axis: 1) unless delta.shape[1].nil?
|
144
|
+
pcvec -= delta
|
145
|
+
end
|
146
|
+
pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require_relative 'decomposition/factor_analysis'
|
6
|
+
require_relative 'decomposition/fast_ica'
|
7
|
+
require_relative 'decomposition/nmf'
|
8
|
+
require_relative 'decomposition/pca'
|
9
|
+
require_relative 'decomposition/version'
|
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rumale-decomposition
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.24.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: numo-narray
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.9.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rumale-core
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.24.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.24.0
|
41
|
+
description: |
|
42
|
+
Rumale::Decomposition provides matrix decomposition algorithms,
|
43
|
+
such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
|
44
|
+
with Rumale interface.
|
45
|
+
email:
|
46
|
+
- yoshoku@outlook.com
|
47
|
+
executables: []
|
48
|
+
extensions: []
|
49
|
+
extra_rdoc_files: []
|
50
|
+
files:
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- lib/rumale/decomposition.rb
|
54
|
+
- lib/rumale/decomposition/factor_analysis.rb
|
55
|
+
- lib/rumale/decomposition/fast_ica.rb
|
56
|
+
- lib/rumale/decomposition/nmf.rb
|
57
|
+
- lib/rumale/decomposition/pca.rb
|
58
|
+
- lib/rumale/decomposition/version.rb
|
59
|
+
homepage: https://github.com/yoshoku/rumale
|
60
|
+
licenses:
|
61
|
+
- BSD-3-Clause
|
62
|
+
metadata:
|
63
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
64
|
+
source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-decomposition
|
65
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
66
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
67
|
+
rubygems_mfa_required: 'true'
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
requirements: []
|
83
|
+
rubygems_version: 3.3.26
|
84
|
+
signing_key:
|
85
|
+
specification_version: 4
|
86
|
+
summary: Rumale::Decomposition provides matrix decomposition algorithms with Rumale
|
87
|
+
interface
|
88
|
+
test_files: []
|