rumale-decomposition 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/decomposition/factor_analysis.rb +146 -0
- data/lib/rumale/decomposition/fast_ica.rb +184 -0
- data/lib/rumale/decomposition/nmf.rb +125 -0
- data/lib/rumale/decomposition/pca.rb +150 -0
- data/lib/rumale/decomposition/version.rb +10 -0
- data/lib/rumale/decomposition.rb +9 -0
- metadata +88 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4a2420f4a6ada0919ed266e0cbf84eb2b52b5d17a86a3d0273646e8ee1253435
|
4
|
+
data.tar.gz: 32194d34d4165898dd8478ef8fd56b6556cee1832140ae028daf707c0d1d4f93
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a94a284b3692806962acab85c22f7c96595a00b2dd27e4fd638cbb9cd15b08e087651ec5e7c933a239df0b5ebdb4da86394c646bbc8daa517c5d880ab7afac0b
|
7
|
+
data.tar.gz: 4b9105d8e34bf11be4ed2e563431ee2755c949684530b9a0020a4b05acdb504d75acdb8f2b39559a9ba3e90e07b99e153b9724c2c3f3f5de452849e7eb207431
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::Decomposition
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/rumale-decomposition.svg)](https://badge.fury.io/rb/rumale-decomposition)
|
4
|
+
[![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-decomposition/LICENSE.txt)
|
5
|
+
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::Decomposition provides matrix decomposition algorithms,
|
9
|
+
such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-decomposition'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-decomposition
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - Decomposition](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Decomposition
|
9
|
+
# FactorAnalysis is a class that implements fator analysis with EM algorithm.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'numo/linalg/autoloader'
|
13
|
+
# require 'rumale/decomposition/factor_analysis'
|
14
|
+
#
|
15
|
+
# decomposer = Rumale::Decomposition::FactorAnalysis.new(n_components: 2)
|
16
|
+
# representaion = decomposer.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - Barber, D., "Bayesian Reasoning and Machine Learning," Cambridge University Press, 2012.
|
20
|
+
class FactorAnalysis < ::Rumale::Base::Estimator
|
21
|
+
include ::Rumale::Base::Transformer
|
22
|
+
|
23
|
+
# Returns the mean vector.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
25
|
+
attr_reader :mean
|
26
|
+
|
27
|
+
# Returns the estimated noise variance for each feature.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
29
|
+
attr_reader :noise_variance
|
30
|
+
|
31
|
+
# Returns the components with maximum variance.
|
32
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
33
|
+
attr_reader :components
|
34
|
+
|
35
|
+
# Returns the log likelihood at each iteration.
|
36
|
+
# @return [Numo::DFloat] (shape: [n_iter])
|
37
|
+
attr_reader :loglike
|
38
|
+
|
39
|
+
# Return the number of iterations run for optimization
|
40
|
+
# @return [Integer]
|
41
|
+
attr_reader :n_iter
|
42
|
+
|
43
|
+
# Create a new transformer with factor analysis.
|
44
|
+
#
|
45
|
+
# @param n_components [Integer] The number of components (dimensionality of latent space).
|
46
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
47
|
+
# @param tol [Float/Nil] The tolerance of termination criterion for EM algorithm.
|
48
|
+
# If nil is given, iterate EM steps up to the maximum number of iterations.
|
49
|
+
def initialize(n_components: 2, max_iter: 100, tol: 1e-8)
|
50
|
+
super()
|
51
|
+
@params = {
|
52
|
+
n_components: n_components,
|
53
|
+
max_iter: max_iter,
|
54
|
+
tol: tol
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
# Fit the model with given training data.
|
59
|
+
#
|
60
|
+
# @overload fit(x) -> FactorAnalysis
|
61
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
62
|
+
# @return [FactorAnalysis] The learned transformer itself.
|
63
|
+
def fit(x, _y = nil)
|
64
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
65
|
+
raise 'FactorAnalysis#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
66
|
+
|
67
|
+
# initialize some variables.
|
68
|
+
n_samples, n_features = x.shape
|
69
|
+
@mean = x.mean(0)
|
70
|
+
centered_x = x - @mean
|
71
|
+
cov_mat = centered_x.transpose.dot(centered_x) / n_samples
|
72
|
+
sample_vars = x.var(0)
|
73
|
+
sqrt_n_samples = Math.sqrt(n_samples)
|
74
|
+
@noise_variance = Numo::DFloat.ones(n_features)
|
75
|
+
|
76
|
+
# run optimization.
|
77
|
+
old_loglike = 0.0
|
78
|
+
@n_iter = 0
|
79
|
+
@loglike = [] unless @params[:tol].nil?
|
80
|
+
@params[:max_iter].times do |t|
|
81
|
+
@n_iter = t + 1
|
82
|
+
sqrt_noise_variance = Numo::NMath.sqrt(@noise_variance)
|
83
|
+
scaled_x = centered_x / (sqrt_noise_variance * sqrt_n_samples + 1e-12)
|
84
|
+
s, u = truncate_svd(scaled_x, @params[:n_components])
|
85
|
+
scaler = Numo::NMath.sqrt(Numo::DFloat.maximum(s**2 - 1.0, 0.0))
|
86
|
+
@components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
|
87
|
+
@noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
|
88
|
+
next if @params[:tol].nil?
|
89
|
+
|
90
|
+
new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
|
91
|
+
@loglike.push(new_loglike)
|
92
|
+
break if (old_loglike - new_loglike).abs <= @params[:tol]
|
93
|
+
|
94
|
+
old_loglike = new_loglike
|
95
|
+
end
|
96
|
+
|
97
|
+
@loglike = Numo::DFloat.cast(@loglike) unless @params[:tol].nil?
|
98
|
+
@components = @components[0, true].dup if @params[:n_components] == 1
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# Fit the model with training data, and then transform them with the learned model.
|
103
|
+
#
|
104
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
105
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
106
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
107
|
+
def fit_transform(x, _y = nil)
|
108
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
109
|
+
raise 'FactorAnalysis#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
110
|
+
|
111
|
+
fit(x).transform(x)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Transform the given data with the learned model.
|
115
|
+
#
|
116
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
117
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
118
|
+
def transform(x)
|
119
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
120
|
+
raise 'FactorAnalysis#transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
121
|
+
|
122
|
+
factors = @params[:n_components] == 1 ? @components.expand_dims(0) : @components
|
123
|
+
centered_x = x - @mean
|
124
|
+
beta = Numo::Linalg.inv(Numo::DFloat.eye(factors.shape[0]) + (factors / @noise_variance).dot(factors.transpose))
|
125
|
+
z = centered_x.dot((beta.dot(factors) / @noise_variance).transpose)
|
126
|
+
@params[:n_components] == 1 ? z[true, 0].dup : z
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def log_likelihood(cov_mat, factors, noise_vars)
|
132
|
+
n_samples = noise_vars.size
|
133
|
+
fact_cov_mat = factors.transpose.dot(factors) + noise_vars.diag
|
134
|
+
n_samples.fdiv(2) * Math.log(Numo::Linalg.det(fact_cov_mat)) + Numo::Linalg.inv(fact_cov_mat).dot(cov_mat).trace
|
135
|
+
end
|
136
|
+
|
137
|
+
def truncate_svd(x, k)
|
138
|
+
m = x.shape[1]
|
139
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(x.transpose.dot(x), vals_range: (m - k)...m)
|
140
|
+
s = Numo::NMath.sqrt(eig_vals.reverse.dup)
|
141
|
+
u = eig_vecs.reverse(1).dup
|
142
|
+
[s, u]
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Decomposition
|
10
|
+
# FastICA is a class that implments Fast Independent Component Analaysis.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale/decomposition/fast_ica'
|
15
|
+
#
|
16
|
+
# transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
|
17
|
+
# source_data = transformer.fit_transform(observed_data)
|
18
|
+
#
|
19
|
+
# *Reference*
|
20
|
+
# - Hyvarinen, A., "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
|
21
|
+
# - Hyvarinen, A., and Oja, E., "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
|
22
|
+
class FastICA < ::Rumale::Base::Estimator
|
23
|
+
include ::Rumale::Base::Transformer
|
24
|
+
|
25
|
+
# Returns the unmixing matrix.
|
26
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
27
|
+
attr_reader :components
|
28
|
+
|
29
|
+
# Returns the mixing matrix.
|
30
|
+
# @return [Numo::DFloat] (shape: [n_features, n_components])
|
31
|
+
attr_reader :mixing
|
32
|
+
|
33
|
+
# Returns the number of iterations when converged.
|
34
|
+
# @return [Integer]
|
35
|
+
attr_reader :n_iter
|
36
|
+
|
37
|
+
# Return the random generator.
|
38
|
+
# @return [Random]
|
39
|
+
attr_reader :rng
|
40
|
+
|
41
|
+
# Create a new transformer with FastICA.
|
42
|
+
#
|
43
|
+
# @param n_components [Integer] The number of independent components.
|
44
|
+
# @param whiten [Boolean] The flag indicating whether to perform whitening.
|
45
|
+
# @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
|
46
|
+
# @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
|
47
|
+
# If fun = 'cube', this parameter is ignored.
|
48
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
49
|
+
# @param tol [Float] The tolerance of termination criterion.
|
50
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
|
+
def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
|
52
|
+
super()
|
53
|
+
@params = {
|
54
|
+
n_components: n_components,
|
55
|
+
whiten: whiten,
|
56
|
+
fun: fun,
|
57
|
+
alpha: alpha,
|
58
|
+
max_iter: max_iter,
|
59
|
+
tol: tol,
|
60
|
+
random_seed: (random_seed || srand)
|
61
|
+
}
|
62
|
+
@rng = Random.new(@params[:random_seed])
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit the model with given training data.
|
66
|
+
#
|
67
|
+
# @overload fit(x) -> FastICA
|
68
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
69
|
+
# @return [FastICA] The learned transformer itself.
|
70
|
+
def fit(x, _y = nil)
|
71
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
72
|
+
raise 'FastICA#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
73
|
+
|
74
|
+
@mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
|
75
|
+
wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
|
76
|
+
unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
|
77
|
+
@components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
|
78
|
+
@mixing = Numo::Linalg.pinv(@components).dup
|
79
|
+
if @params[:n_components] == 1
|
80
|
+
@components = @components.flatten.dup
|
81
|
+
@mixing = @mixing.flatten.dup
|
82
|
+
end
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# Fit the model with training data, and then transform them with the learned model.
|
87
|
+
#
|
88
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
89
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
90
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
91
|
+
def fit_transform(x, _y = nil)
|
92
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
93
|
+
raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
94
|
+
|
95
|
+
fit(x).transform(x)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Transform the given data with the learned model.
|
99
|
+
#
|
100
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
101
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
102
|
+
def transform(x)
|
103
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
104
|
+
|
105
|
+
cx = @params[:whiten] ? (x - @mean) : x
|
106
|
+
cx.dot(@components.transpose)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Inverse transform the given transformed data with the learned model.
|
110
|
+
#
|
111
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
|
112
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
|
113
|
+
def inverse_transform(z)
|
114
|
+
z = ::Rumale::Validation.check_convert_sample_array(z)
|
115
|
+
|
116
|
+
m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
|
117
|
+
x = z.dot(m.transpose)
|
118
|
+
x += @mean if @params[:whiten]
|
119
|
+
x
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def whitening(x, n_components)
|
125
|
+
n_samples, n_features = x.shape
|
126
|
+
mean_vec = x.mean(0)
|
127
|
+
centered_x = x - mean_vec
|
128
|
+
covar_mat = centered_x.transpose.dot(centered_x) / n_samples
|
129
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
|
130
|
+
[mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
|
131
|
+
end
|
132
|
+
|
133
|
+
def ica(x, fun, max_iter, tol, sub_rng)
|
134
|
+
n_samples, n_components = x.shape
|
135
|
+
w = decorrelation(::Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
|
136
|
+
n_iters = 0
|
137
|
+
max_iter.times do |t|
|
138
|
+
n_iters = t + 1
|
139
|
+
gx, ggx = gradient(x.dot(w.transpose), fun)
|
140
|
+
new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
|
141
|
+
err = (new_w - w).abs.max
|
142
|
+
w = new_w
|
143
|
+
break if err <= tol
|
144
|
+
end
|
145
|
+
[w, n_iters]
|
146
|
+
end
|
147
|
+
|
148
|
+
def decorrelation(w)
|
149
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
|
150
|
+
decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
|
151
|
+
decorr_mat.dot(w)
|
152
|
+
end
|
153
|
+
|
154
|
+
def gradient(x, func)
|
155
|
+
case func
|
156
|
+
when 'exp'
|
157
|
+
grad_exp(x, @params[:alpha])
|
158
|
+
when 'cube'
|
159
|
+
grad_cube(x)
|
160
|
+
else
|
161
|
+
grad_logcosh(x, @params[:alpha])
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def grad_logcosh(x, alpha)
|
166
|
+
gx = Numo::NMath.tanh(alpha * x)
|
167
|
+
ggx = (alpha * (1 - gx**2)).sum(axis: 0)
|
168
|
+
[gx, ggx]
|
169
|
+
end
|
170
|
+
|
171
|
+
def grad_exp(x, alpha)
|
172
|
+
squared_x = x**2
|
173
|
+
exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
|
174
|
+
gx = exp_x * x
|
175
|
+
ggx = (exp_x * (1 - alpha * squared_x)).sum(axis: 0)
|
176
|
+
[gx, ggx]
|
177
|
+
end
|
178
|
+
|
179
|
+
def grad_cube(x)
|
180
|
+
[x**3, (3 * x**2).sum(axis: 0)]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Decomposition
|
10
|
+
# NMF is a class that implements Non-negative Matrix Factorization.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/decomposition/nmf'
|
14
|
+
#
|
15
|
+
# decomposer = Rumale::Decomposition::NMF.new(n_components: 2)
|
16
|
+
# representaion = decomposer.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - Xu, W., Liu, X., and Gong, Y., "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
|
20
|
+
class NMF < ::Rumale::Base::Estimator
|
21
|
+
include ::Rumale::Base::Transformer
|
22
|
+
|
23
|
+
# Returns the factorization matrix.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Return the random generator.
|
28
|
+
# @return [Random]
|
29
|
+
attr_reader :rng
|
30
|
+
|
31
|
+
# Create a new transformer with NMF.
|
32
|
+
#
|
33
|
+
# @param n_components [Integer] The number of components.
|
34
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
35
|
+
# @param tol [Float] The tolerance of termination criterion.
|
36
|
+
# @param eps [Float] A small value close to zero to avoid zero division error.
|
37
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
38
|
+
def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
|
39
|
+
super()
|
40
|
+
@params = {
|
41
|
+
n_components: n_components,
|
42
|
+
max_iter: max_iter,
|
43
|
+
tol: tol,
|
44
|
+
eps: eps,
|
45
|
+
random_seed: (random_seed || srand)
|
46
|
+
}
|
47
|
+
@rng = Random.new(@params[:random_seed])
|
48
|
+
end
|
49
|
+
|
50
|
+
# Fit the model with given training data.
|
51
|
+
#
|
52
|
+
# @overload fit(x) -> NMF
|
53
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
54
|
+
# @return [NMF] The learned transformer itself.
|
55
|
+
def fit(x, _y = nil)
|
56
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
57
|
+
|
58
|
+
partial_fit(x)
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Fit the model with training data, and then transform them with the learned model.
|
63
|
+
#
|
64
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
65
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
66
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
67
|
+
def fit_transform(x, _y = nil)
|
68
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
69
|
+
|
70
|
+
partial_fit(x)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Transform the given data with the learned model.
|
74
|
+
#
|
75
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
76
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
77
|
+
def transform(x)
|
78
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
79
|
+
|
80
|
+
partial_fit(x, update_comps: false)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Inverse transform the given transformed data with the learned model.
|
84
|
+
#
|
85
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
86
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
87
|
+
def inverse_transform(z)
|
88
|
+
z = ::Rumale::Validation.check_convert_sample_array(z)
|
89
|
+
|
90
|
+
z.dot(@components)
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def partial_fit(x, update_comps: true)
|
96
|
+
# initialize some variables.
|
97
|
+
n_samples, n_features = x.shape
|
98
|
+
scale = Math.sqrt(x.mean / @params[:n_components])
|
99
|
+
sub_rng = @rng.dup
|
100
|
+
@components = ::Rumale::Utils.rand_uniform([@params[:n_components], n_features], sub_rng) * scale if update_comps
|
101
|
+
coefficients = ::Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) * scale
|
102
|
+
# optimization.
|
103
|
+
@params[:max_iter].times do
|
104
|
+
# update
|
105
|
+
if update_comps
|
106
|
+
nume = coefficients.transpose.dot(x)
|
107
|
+
deno = coefficients.transpose.dot(coefficients).dot(@components) + @params[:eps]
|
108
|
+
@components *= (nume / deno)
|
109
|
+
end
|
110
|
+
nume = x.dot(@components.transpose)
|
111
|
+
deno = coefficients.dot(@components).dot(@components.transpose) + @params[:eps]
|
112
|
+
coefficients *= (nume / deno)
|
113
|
+
# normalize
|
114
|
+
norm = Numo::NMath.sqrt((@components**2).sum(axis: 1)) + @params[:eps]
|
115
|
+
@components /= norm.expand_dims(1) if update_comps
|
116
|
+
coefficients *= norm
|
117
|
+
# check convergence
|
118
|
+
err = ((x - coefficients.dot(@components))**2).sum(axis: 1).mean
|
119
|
+
break if err < @params[:tol]
|
120
|
+
end
|
121
|
+
coefficients
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
# Module for matrix decomposition algorithms.
|
9
|
+
module Decomposition
|
10
|
+
# PCA is a class that implements Principal Component Analysis.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'rumale/decomposition/pca'
|
14
|
+
#
|
15
|
+
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'fpt')
|
16
|
+
# representaion = decomposer.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# # If Numo::Linalg is installed, you can specify 'evd' for the solver option.
|
19
|
+
# require 'numo/linalg/autoloader'
|
20
|
+
# require 'rumale/decomposition/pca'
|
21
|
+
#
|
22
|
+
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'evd')
|
23
|
+
# representaion = decomposer.fit_transform(samples)
|
24
|
+
#
|
25
|
+
# # If Numo::Linalg is loaded and the solver option is not given,
|
26
|
+
# # the solver option is choosen 'evd' automatically.
|
27
|
+
# decomposer = Rumale::Decomposition::PCA.new(n_components: 2)
|
28
|
+
# representaion = decomposer.fit_transform(samples)
|
29
|
+
#
|
30
|
+
# *Reference*
|
31
|
+
# - Sharma, A., and Paliwal, K K., "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
|
32
|
+
class PCA < ::Rumale::Base::Estimator
|
33
|
+
include ::Rumale::Base::Transformer
|
34
|
+
|
35
|
+
# Returns the principal components.
|
36
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
37
|
+
attr_reader :components
|
38
|
+
|
39
|
+
# Returns the mean vector.
|
40
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
41
|
+
attr_reader :mean
|
42
|
+
|
43
|
+
# Return the random generator.
|
44
|
+
# @return [Random]
|
45
|
+
attr_reader :rng
|
46
|
+
|
47
|
+
# Create a new transformer with PCA.
|
48
|
+
#
|
49
|
+
# @param n_components [Integer] The number of principal components.
|
50
|
+
# @param solver [String] The algorithm for the optimization ('auto', 'fpt' or 'evd').
|
51
|
+
# 'auto' chooses the 'evd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'fpt' solver.
|
52
|
+
# 'fpt' uses the fixed-point algorithm.
|
53
|
+
# 'evd' performs eigen value decomposition of the covariance matrix of samples.
|
54
|
+
# @param max_iter [Integer] The maximum number of iterations. If solver = 'evd', this parameter is ignored.
|
55
|
+
# @param tol [Float] The tolerance of termination criterion. If solver = 'evd', this parameter is ignored.
|
56
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
57
|
+
def initialize(n_components: 2, solver: 'auto', max_iter: 100, tol: 1.0e-4, random_seed: nil)
|
58
|
+
super()
|
59
|
+
@params = {
|
60
|
+
n_components: n_components,
|
61
|
+
solver: 'fpt',
|
62
|
+
max_iter: max_iter,
|
63
|
+
tol: tol,
|
64
|
+
random_seed: (random_seed || srand)
|
65
|
+
}
|
66
|
+
@params[:solver] = 'evd' if (solver == 'auto' && enable_linalg?(warning: false)) || solver == 'evd'
|
67
|
+
@rng = Random.new(@params[:random_seed])
|
68
|
+
end
|
69
|
+
|
70
|
+
# Fit the model with given training data.
|
71
|
+
#
|
72
|
+
# @overload fit(x) -> PCA
|
73
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
74
|
+
# @return [PCA] The learned transformer itself.
|
75
|
+
def fit(x, _y = nil)
|
76
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
77
|
+
|
78
|
+
# initialize some variables.
|
79
|
+
@components = nil
|
80
|
+
n_samples, n_features = x.shape
|
81
|
+
sub_rng = @rng.dup
|
82
|
+
# centering.
|
83
|
+
@mean = x.mean(0)
|
84
|
+
centered_x = x - @mean
|
85
|
+
# optimization.
|
86
|
+
covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
|
87
|
+
if @params[:solver] == 'evd' && enable_linalg?
|
88
|
+
_, evecs = Numo::Linalg.eigh(covariance_mat, vals_range: (n_features - @params[:n_components])...n_features)
|
89
|
+
comps = evecs.reverse(1).transpose
|
90
|
+
@components = @params[:n_components] == 1 ? comps[0, true].dup : comps.dup
|
91
|
+
else
|
92
|
+
@params[:n_components].times do
|
93
|
+
comp_vec = ::Rumale::Utils.rand_uniform(n_features, sub_rng)
|
94
|
+
@params[:max_iter].times do
|
95
|
+
updated = orthogonalize(covariance_mat.dot(comp_vec))
|
96
|
+
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
|
97
|
+
|
98
|
+
comp_vec = updated
|
99
|
+
end
|
100
|
+
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
self
|
104
|
+
end
|
105
|
+
|
106
|
+
# Fit the model with training data, and then transform them with the learned model.
|
107
|
+
#
|
108
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
109
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
110
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
111
|
+
def fit_transform(x, _y = nil)
|
112
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
113
|
+
|
114
|
+
fit(x).transform(x)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Transform the given data with the learned model.
|
118
|
+
#
|
119
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
120
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
121
|
+
def transform(x)
|
122
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
123
|
+
|
124
|
+
(x - @mean).dot(@components.transpose)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Inverse transform the given transformed data with the learned model.
|
128
|
+
#
|
129
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
130
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
131
|
+
def inverse_transform(z)
|
132
|
+
z = ::Rumale::Validation.check_convert_sample_array(z)
|
133
|
+
|
134
|
+
c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
|
135
|
+
z.dot(c) + @mean
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def orthogonalize(pcvec)
|
141
|
+
unless @components.nil?
|
142
|
+
delta = @components.dot(pcvec) * @components.transpose
|
143
|
+
delta = delta.sum(axis: 1) unless delta.shape[1].nil?
|
144
|
+
pcvec -= delta
|
145
|
+
end
|
146
|
+
pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require_relative 'decomposition/factor_analysis'
|
6
|
+
require_relative 'decomposition/fast_ica'
|
7
|
+
require_relative 'decomposition/nmf'
|
8
|
+
require_relative 'decomposition/pca'
|
9
|
+
require_relative 'decomposition/version'
|
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rumale-decomposition
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.24.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: numo-narray
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.9.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rumale-core
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.24.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.24.0
|
41
|
+
description: |
|
42
|
+
Rumale::Decomposition provides matrix decomposition algorithms,
|
43
|
+
such as Principal Component Analysis, Non-negative Matrix Factorization, Factor Analysis, and Independent Component Analysis,
|
44
|
+
with Rumale interface.
|
45
|
+
email:
|
46
|
+
- yoshoku@outlook.com
|
47
|
+
executables: []
|
48
|
+
extensions: []
|
49
|
+
extra_rdoc_files: []
|
50
|
+
files:
|
51
|
+
- LICENSE.txt
|
52
|
+
- README.md
|
53
|
+
- lib/rumale/decomposition.rb
|
54
|
+
- lib/rumale/decomposition/factor_analysis.rb
|
55
|
+
- lib/rumale/decomposition/fast_ica.rb
|
56
|
+
- lib/rumale/decomposition/nmf.rb
|
57
|
+
- lib/rumale/decomposition/pca.rb
|
58
|
+
- lib/rumale/decomposition/version.rb
|
59
|
+
homepage: https://github.com/yoshoku/rumale
|
60
|
+
licenses:
|
61
|
+
- BSD-3-Clause
|
62
|
+
metadata:
|
63
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
64
|
+
source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-decomposition
|
65
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
66
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
67
|
+
rubygems_mfa_required: 'true'
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
requirements: []
|
83
|
+
rubygems_version: 3.3.26
|
84
|
+
signing_key:
|
85
|
+
specification_version: 4
|
86
|
+
summary: Rumale::Decomposition provides matrix decomposition algorithms with Rumale
|
87
|
+
interface
|
88
|
+
test_files: []
|