rumale 0.13.5 → 0.13.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -3
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/lib/rumale.rb +1 -0
- data/lib/rumale/decomposition/fast_ica.rb +212 -0
- data/lib/rumale/manifold/tsne.rb +3 -1
- data/lib/rumale/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e78d2a2eeb35fc8409dac683e2f8a3b90e5c396d
|
4
|
+
data.tar.gz: 8ae3c1396efeac327288a5fe534661ea65d0d766
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f25dee4375b9d9707374341b7cebe19973ea66f8e8b42af92806aa50b2b41323fcd0ba1470b9a87266859e1c30eea8a1a583ffa620519998df6da91bca8e1b23
|
7
|
+
data.tar.gz: 2e9a3cd0d87aae35e180e74c8335a93b1126f57fd9d4f5c727e60decf5b81982b96f0bdcd2e99c4104169341a1dc4db9a6d631c1fe5cf8eb709ccc5ae9f377e2
|
data/.rubocop.yml
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
require:
|
1
|
+
require:
|
2
|
+
- rubocop-performance
|
3
|
+
- rubocop-rspec
|
2
4
|
|
3
5
|
AllCops:
|
4
6
|
TargetRubyVersion: 2.3
|
@@ -10,7 +12,7 @@ AllCops:
|
|
10
12
|
- 'Rakefile'
|
11
13
|
- 'Gemfile'
|
12
14
|
|
13
|
-
Documentation:
|
15
|
+
Style/Documentation:
|
14
16
|
Enabled: false
|
15
17
|
|
16
18
|
Metrics/LineLength:
|
@@ -40,7 +42,7 @@ Metrics/BlockLength:
|
|
40
42
|
Exclude:
|
41
43
|
- 'spec/**/*'
|
42
44
|
|
43
|
-
ParameterLists:
|
45
|
+
Metrics/ParameterLists:
|
44
46
|
Max: 10
|
45
47
|
|
46
48
|
Security/MarshalLoad:
|
@@ -49,6 +51,9 @@ Security/MarshalLoad:
|
|
49
51
|
Naming/UncommunicativeMethodParamName:
|
50
52
|
Enabled: false
|
51
53
|
|
54
|
+
Naming/ConstantName:
|
55
|
+
Enabled: false
|
56
|
+
|
52
57
|
Style/FormatStringToken:
|
53
58
|
Enabled: false
|
54
59
|
|
@@ -57,3 +62,15 @@ Style/NumericLiterals:
|
|
57
62
|
|
58
63
|
Layout/EmptyLineAfterGuardClause:
|
59
64
|
Enabled: false
|
65
|
+
|
66
|
+
RSpec/MultipleExpectations:
|
67
|
+
Enabled: false
|
68
|
+
|
69
|
+
RSpec/ExampleLength:
|
70
|
+
Max: 40
|
71
|
+
|
72
|
+
RSpec/InstanceVariable:
|
73
|
+
Enabled: false
|
74
|
+
|
75
|
+
RSpec/LeakyConstantDeclaration:
|
76
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.13.6
|
2
|
+
- Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
|
3
|
+
- Fix a typo on README ([#13](https://github.com/yoshoku/rumale/pull/13)).
|
4
|
+
|
1
5
|
# 0.13.5
|
2
6
|
- Add transformer class for [Factor Analysis](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FactorAnalysis.html).
|
3
7
|
- Add covariance_type parameter to [Rumale::Clustering::GaussianMixture](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/GaussianMixture.html).
|
data/README.md
CHANGED
@@ -13,7 +13,7 @@ Rumale provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
13
13
|
Rumale supports Linear / Kernel Support Vector Machine,
|
14
14
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
15
15
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
|
16
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN,
|
16
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
17
17
|
Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
|
18
18
|
|
19
19
|
This project was formerly known as "SVMKit".
|
data/lib/rumale.rb
CHANGED
@@ -70,6 +70,7 @@ require 'rumale/clustering/single_linkage'
|
|
70
70
|
require 'rumale/decomposition/pca'
|
71
71
|
require 'rumale/decomposition/nmf'
|
72
72
|
require 'rumale/decomposition/factor_analysis'
|
73
|
+
require 'rumale/decomposition/fast_ica'
|
73
74
|
require 'rumale/manifold/tsne'
|
74
75
|
require 'rumale/manifold/mds'
|
75
76
|
require 'rumale/preprocessing/l2_normalizer'
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Decomposition
|
8
|
+
# FastICA is a class that implments Fast Independent Component Analaysis.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'numo/linalg/autoloader'
|
12
|
+
#
|
13
|
+
# transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
|
14
|
+
# source_data = transformer.fit_transform(observed_data)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - A. Hyvarinen "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
|
18
|
+
# - A. Hyvarinen and E. Oja, "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
|
19
|
+
class FastICA
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::Transformer
|
22
|
+
|
23
|
+
# Returns the unmixing matrix.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Returns the mixing matrix.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_features, n_components])
|
29
|
+
attr_reader :mixing
|
30
|
+
|
31
|
+
# Returns the number of iterations when converged.
|
32
|
+
# @return [Integer]
|
33
|
+
attr_reader :n_iter
|
34
|
+
|
35
|
+
# Return the random generator.
|
36
|
+
# @return [Random]
|
37
|
+
attr_reader :rng
|
38
|
+
|
39
|
+
# Create a new transformer with FastICA.
|
40
|
+
#
|
41
|
+
# @param n_components [Integer] The number of independent components.
|
42
|
+
# @param whiten [Boolean] The flag indicating whether to perform whitening.
|
43
|
+
# @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
|
44
|
+
# @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
|
45
|
+
# If fun = 'cube', this parameter is ignored.
|
46
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
47
|
+
# @param tol [Float] The tolerance of termination criterion.
|
48
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
|
+
def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
|
50
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
51
|
+
check_params_boolean(whiten: whiten)
|
52
|
+
check_params_string(fun: fun)
|
53
|
+
check_params_float(alpha: alpha, tol: tol)
|
54
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
55
|
+
check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
|
56
|
+
@params = {}
|
57
|
+
@params[:n_components] = n_components
|
58
|
+
@params[:whiten] = whiten
|
59
|
+
@params[:fun] = fun
|
60
|
+
@params[:alpha] = alpha
|
61
|
+
@params[:max_iter] = max_iter
|
62
|
+
@params[:tol] = tol
|
63
|
+
@params[:random_seed] = random_seed
|
64
|
+
@params[:random_seed] ||= srand
|
65
|
+
@components = nil
|
66
|
+
@mixing = nil
|
67
|
+
@n_iter = nil
|
68
|
+
@mean = nil
|
69
|
+
@rng = Random.new(@params[:random_seed])
|
70
|
+
end
|
71
|
+
|
72
|
+
# Fit the model with given training data.
|
73
|
+
#
|
74
|
+
# @overload fit(x) -> FastICA
|
75
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
76
|
+
# @return [FastICA] The learned transformer itself.
|
77
|
+
def fit(x, _y = nil)
|
78
|
+
check_sample_array(x)
|
79
|
+
raise 'FastICA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
80
|
+
|
81
|
+
@mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
|
82
|
+
wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
|
83
|
+
unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
|
84
|
+
@components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
|
85
|
+
@mixing = Numo::Linalg.pinv(@components)
|
86
|
+
if @params[:n_components] == 1
|
87
|
+
@components = @components.flatten.dup
|
88
|
+
@mixing = @mixing.flatten.dup
|
89
|
+
end
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
# Fit the model with training data, and then transform them with the learned model.
|
94
|
+
#
|
95
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
96
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
97
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
98
|
+
def fit_transform(x, _y = nil)
|
99
|
+
check_sample_array(x)
|
100
|
+
raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
101
|
+
|
102
|
+
fit(x).transform(x)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Transform the given data with the learned model.
|
106
|
+
#
|
107
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
108
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
109
|
+
def transform(x)
|
110
|
+
check_sample_array(x)
|
111
|
+
cx = @params[:whiten] ? (x - @mean) : x
|
112
|
+
cx.dot(@components.transpose)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Inverse transform the given transformed data with the learned model.
|
116
|
+
#
|
117
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
|
118
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
|
119
|
+
def inverse_transform(z)
|
120
|
+
check_sample_array(z)
|
121
|
+
m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
|
122
|
+
x = z.dot(m.transpose)
|
123
|
+
x += @mean if @params[:whiten]
|
124
|
+
x
|
125
|
+
end
|
126
|
+
|
127
|
+
# Dump marshal data.
|
128
|
+
# @return [Hash] The marshal data.
|
129
|
+
def marshal_dump
|
130
|
+
{ params: @params,
|
131
|
+
components: @components,
|
132
|
+
mixing: @mixing,
|
133
|
+
n_iter: @n_iter,
|
134
|
+
mean: @mean,
|
135
|
+
rng: @rng }
|
136
|
+
end
|
137
|
+
|
138
|
+
# Load marshal data.
|
139
|
+
# @return [nil]
|
140
|
+
def marshal_load(obj)
|
141
|
+
@params = obj[:params]
|
142
|
+
@components = obj[:components]
|
143
|
+
@mixing = obj[:mixing]
|
144
|
+
@n_iter = obj[:n_iter]
|
145
|
+
@mean = obj[:mean]
|
146
|
+
@rng = obj[:rng]
|
147
|
+
nil
|
148
|
+
end
|
149
|
+
|
150
|
+
private
|
151
|
+
|
152
|
+
def whitening(x, n_components)
|
153
|
+
n_samples, n_features = x.shape
|
154
|
+
mean_vec = x.mean(0)
|
155
|
+
centered_x = x - mean_vec
|
156
|
+
covar_mat = centered_x.transpose.dot(centered_x) / n_samples
|
157
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
|
158
|
+
[mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
|
159
|
+
end
|
160
|
+
|
161
|
+
def ica(x, fun, max_iter, tol, sub_rng)
|
162
|
+
n_samples, n_components = x.shape
|
163
|
+
w = decorrelation(Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
|
164
|
+
n_iters = 0
|
165
|
+
max_iter.times do |t|
|
166
|
+
n_iters = t + 1
|
167
|
+
gx, ggx = gradient(x.dot(w.transpose), fun)
|
168
|
+
new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
|
169
|
+
err = (new_w - w).abs.max
|
170
|
+
w = new_w
|
171
|
+
break if err <= tol
|
172
|
+
end
|
173
|
+
[w, n_iters]
|
174
|
+
end
|
175
|
+
|
176
|
+
def decorrelation(w)
|
177
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
|
178
|
+
decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
|
179
|
+
decorr_mat.dot(w)
|
180
|
+
end
|
181
|
+
|
182
|
+
def gradient(x, func)
|
183
|
+
case func
|
184
|
+
when 'exp'
|
185
|
+
grad_exp(x, @params[:alpha])
|
186
|
+
when 'cube'
|
187
|
+
grad_cube(x)
|
188
|
+
else
|
189
|
+
grad_logcosh(x, @params[:alpha])
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def grad_logcosh(x, alpha)
|
194
|
+
gx = Numo::NMath.tanh(alpha * x)
|
195
|
+
ggx = (alpha * (1 - gx**2)).sum(0)
|
196
|
+
[gx, ggx]
|
197
|
+
end
|
198
|
+
|
199
|
+
def grad_exp(x, alpha)
|
200
|
+
squared_x = x**2
|
201
|
+
exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
|
202
|
+
gx = exp_x * x
|
203
|
+
ggx = (exp_x * (1 - alpha * squared_x)).sum(0)
|
204
|
+
[gx, ggx]
|
205
|
+
end
|
206
|
+
|
207
|
+
def grad_cube(x)
|
208
|
+
[x**3, (3 * x**2).sum(0)]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
data/lib/rumale/manifold/tsne.rb
CHANGED
@@ -106,7 +106,9 @@ module Rumale
|
|
106
106
|
y = (b.dot(one_vec) * y + (a - b).dot(y)) / a.dot(one_vec)
|
107
107
|
lo_prob_mat = t_distributed_probability_matrix(y)
|
108
108
|
@n_iter = t + 1
|
109
|
-
|
109
|
+
if @params[:verbose] && (@n_iter % 100).zero?
|
110
|
+
puts "[t-SNE] KL divergence after #{@n_iter} iterations: #{cost(hi_prob_mat, lo_prob_mat)}"
|
111
|
+
end
|
110
112
|
end
|
111
113
|
# store results.
|
112
114
|
@embedding = y
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -172,6 +172,7 @@ files:
|
|
172
172
|
- lib/rumale/clustering/spectral_clustering.rb
|
173
173
|
- lib/rumale/dataset.rb
|
174
174
|
- lib/rumale/decomposition/factor_analysis.rb
|
175
|
+
- lib/rumale/decomposition/fast_ica.rb
|
175
176
|
- lib/rumale/decomposition/nmf.rb
|
176
177
|
- lib/rumale/decomposition/pca.rb
|
177
178
|
- lib/rumale/ensemble/ada_boost_classifier.rb
|