rumale 0.13.5 → 0.13.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -3
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/lib/rumale.rb +1 -0
- data/lib/rumale/decomposition/fast_ica.rb +212 -0
- data/lib/rumale/manifold/tsne.rb +3 -1
- data/lib/rumale/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e78d2a2eeb35fc8409dac683e2f8a3b90e5c396d
|
4
|
+
data.tar.gz: 8ae3c1396efeac327288a5fe534661ea65d0d766
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f25dee4375b9d9707374341b7cebe19973ea66f8e8b42af92806aa50b2b41323fcd0ba1470b9a87266859e1c30eea8a1a583ffa620519998df6da91bca8e1b23
|
7
|
+
data.tar.gz: 2e9a3cd0d87aae35e180e74c8335a93b1126f57fd9d4f5c727e60decf5b81982b96f0bdcd2e99c4104169341a1dc4db9a6d631c1fe5cf8eb709ccc5ae9f377e2
|
data/.rubocop.yml
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
require:
|
1
|
+
require:
|
2
|
+
- rubocop-performance
|
3
|
+
- rubocop-rspec
|
2
4
|
|
3
5
|
AllCops:
|
4
6
|
TargetRubyVersion: 2.3
|
@@ -10,7 +12,7 @@ AllCops:
|
|
10
12
|
- 'Rakefile'
|
11
13
|
- 'Gemfile'
|
12
14
|
|
13
|
-
Documentation:
|
15
|
+
Style/Documentation:
|
14
16
|
Enabled: false
|
15
17
|
|
16
18
|
Metrics/LineLength:
|
@@ -40,7 +42,7 @@ Metrics/BlockLength:
|
|
40
42
|
Exclude:
|
41
43
|
- 'spec/**/*'
|
42
44
|
|
43
|
-
ParameterLists:
|
45
|
+
Metrics/ParameterLists:
|
44
46
|
Max: 10
|
45
47
|
|
46
48
|
Security/MarshalLoad:
|
@@ -49,6 +51,9 @@ Security/MarshalLoad:
|
|
49
51
|
Naming/UncommunicativeMethodParamName:
|
50
52
|
Enabled: false
|
51
53
|
|
54
|
+
Naming/ConstantName:
|
55
|
+
Enabled: false
|
56
|
+
|
52
57
|
Style/FormatStringToken:
|
53
58
|
Enabled: false
|
54
59
|
|
@@ -57,3 +62,15 @@ Style/NumericLiterals:
|
|
57
62
|
|
58
63
|
Layout/EmptyLineAfterGuardClause:
|
59
64
|
Enabled: false
|
65
|
+
|
66
|
+
RSpec/MultipleExpectations:
|
67
|
+
Enabled: false
|
68
|
+
|
69
|
+
RSpec/ExampleLength:
|
70
|
+
Max: 40
|
71
|
+
|
72
|
+
RSpec/InstanceVariable:
|
73
|
+
Enabled: false
|
74
|
+
|
75
|
+
RSpec/LeakyConstantDeclaration:
|
76
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.13.6
|
2
|
+
- Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
|
3
|
+
- Fix a typo on README ([#13](https://github.com/yoshoku/rumale/pull/13)).
|
4
|
+
|
1
5
|
# 0.13.5
|
2
6
|
- Add transformer class for [Factor Analysis](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FactorAnalysis.html).
|
3
7
|
- Add covariance_type parameter to [Rumale::Clustering::GaussianMixture](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/GaussianMixture.html).
|
data/README.md
CHANGED
@@ -13,7 +13,7 @@ Rumale provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
13
13
|
Rumale supports Linear / Kernel Support Vector Machine,
|
14
14
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
15
15
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
|
16
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN,
|
16
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
17
17
|
Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
|
18
18
|
|
19
19
|
This project was formerly known as "SVMKit".
|
data/lib/rumale.rb
CHANGED
@@ -70,6 +70,7 @@ require 'rumale/clustering/single_linkage'
|
|
70
70
|
require 'rumale/decomposition/pca'
|
71
71
|
require 'rumale/decomposition/nmf'
|
72
72
|
require 'rumale/decomposition/factor_analysis'
|
73
|
+
require 'rumale/decomposition/fast_ica'
|
73
74
|
require 'rumale/manifold/tsne'
|
74
75
|
require 'rumale/manifold/mds'
|
75
76
|
require 'rumale/preprocessing/l2_normalizer'
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Decomposition
|
8
|
+
# FastICA is a class that implments Fast Independent Component Analaysis.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'numo/linalg/autoloader'
|
12
|
+
#
|
13
|
+
# transformer = Rumale::Decomposition::FastICA.new(n_components: 2, random_seed: 1)
|
14
|
+
# source_data = transformer.fit_transform(observed_data)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - A. Hyvarinen "Fast and Robust Fixed-Point Algorithms for Independent Component Analysis," IEEE Trans. Neural Networks, Vol. 10 (3), pp. 626--634, 1999.
|
18
|
+
# - A. Hyvarinen and E. Oja, "Independent Component Analysis: Algorithms and Applications," Neural Networks, Vol. 13 (4-5), pp. 411--430, 2000.
|
19
|
+
class FastICA
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::Transformer
|
22
|
+
|
23
|
+
# Returns the unmixing matrix.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Returns the mixing matrix.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_features, n_components])
|
29
|
+
attr_reader :mixing
|
30
|
+
|
31
|
+
# Returns the number of iterations when converged.
|
32
|
+
# @return [Integer]
|
33
|
+
attr_reader :n_iter
|
34
|
+
|
35
|
+
# Return the random generator.
|
36
|
+
# @return [Random]
|
37
|
+
attr_reader :rng
|
38
|
+
|
39
|
+
# Create a new transformer with FastICA.
|
40
|
+
#
|
41
|
+
# @param n_components [Integer] The number of independent components.
|
42
|
+
# @param whiten [Boolean] The flag indicating whether to perform whitening.
|
43
|
+
# @param fun [String] The type of contrast function ('logcosh', 'exp', or 'cube').
|
44
|
+
# @param alpha [Float] The parameter of contrast function for 'logcosh' and 'exp'.
|
45
|
+
# If fun = 'cube', this parameter is ignored.
|
46
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
47
|
+
# @param tol [Float] The tolerance of termination criterion.
|
48
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
|
+
def initialize(n_components: 2, whiten: true, fun: 'logcosh', alpha: 1.0, max_iter: 200, tol: 1e-4, random_seed: nil)
|
50
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
51
|
+
check_params_boolean(whiten: whiten)
|
52
|
+
check_params_string(fun: fun)
|
53
|
+
check_params_float(alpha: alpha, tol: tol)
|
54
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
55
|
+
check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
|
56
|
+
@params = {}
|
57
|
+
@params[:n_components] = n_components
|
58
|
+
@params[:whiten] = whiten
|
59
|
+
@params[:fun] = fun
|
60
|
+
@params[:alpha] = alpha
|
61
|
+
@params[:max_iter] = max_iter
|
62
|
+
@params[:tol] = tol
|
63
|
+
@params[:random_seed] = random_seed
|
64
|
+
@params[:random_seed] ||= srand
|
65
|
+
@components = nil
|
66
|
+
@mixing = nil
|
67
|
+
@n_iter = nil
|
68
|
+
@mean = nil
|
69
|
+
@rng = Random.new(@params[:random_seed])
|
70
|
+
end
|
71
|
+
|
72
|
+
# Fit the model with given training data.
|
73
|
+
#
|
74
|
+
# @overload fit(x) -> FastICA
|
75
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
76
|
+
# @return [FastICA] The learned transformer itself.
|
77
|
+
def fit(x, _y = nil)
|
78
|
+
check_sample_array(x)
|
79
|
+
raise 'FastICA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
80
|
+
|
81
|
+
@mean, whiten_mat = whitening(x, @params[:n_components]) if @params[:whiten]
|
82
|
+
wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
|
83
|
+
unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
|
84
|
+
@components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
|
85
|
+
@mixing = Numo::Linalg.pinv(@components)
|
86
|
+
if @params[:n_components] == 1
|
87
|
+
@components = @components.flatten.dup
|
88
|
+
@mixing = @mixing.flatten.dup
|
89
|
+
end
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
# Fit the model with training data, and then transform them with the learned model.
|
94
|
+
#
|
95
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
96
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
97
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
98
|
+
def fit_transform(x, _y = nil)
|
99
|
+
check_sample_array(x)
|
100
|
+
raise 'FastICA#fit_transform requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
101
|
+
|
102
|
+
fit(x).transform(x)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Transform the given data with the learned model.
|
106
|
+
#
|
107
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
108
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
109
|
+
def transform(x)
|
110
|
+
check_sample_array(x)
|
111
|
+
cx = @params[:whiten] ? (x - @mean) : x
|
112
|
+
cx.dot(@components.transpose)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Inverse transform the given transformed data with the learned model.
|
116
|
+
#
|
117
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The source data reconstructed to the mixed data.
|
118
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The mixed data.
|
119
|
+
def inverse_transform(z)
|
120
|
+
check_sample_array(z)
|
121
|
+
m = @mixing.shape[1].nil? ? @mixing.expand_dims(0).transpose : @mixing
|
122
|
+
x = z.dot(m.transpose)
|
123
|
+
x += @mean if @params[:whiten]
|
124
|
+
x
|
125
|
+
end
|
126
|
+
|
127
|
+
# Dump marshal data.
|
128
|
+
# @return [Hash] The marshal data.
|
129
|
+
def marshal_dump
|
130
|
+
{ params: @params,
|
131
|
+
components: @components,
|
132
|
+
mixing: @mixing,
|
133
|
+
n_iter: @n_iter,
|
134
|
+
mean: @mean,
|
135
|
+
rng: @rng }
|
136
|
+
end
|
137
|
+
|
138
|
+
# Load marshal data.
|
139
|
+
# @return [nil]
|
140
|
+
def marshal_load(obj)
|
141
|
+
@params = obj[:params]
|
142
|
+
@components = obj[:components]
|
143
|
+
@mixing = obj[:mixing]
|
144
|
+
@n_iter = obj[:n_iter]
|
145
|
+
@mean = obj[:mean]
|
146
|
+
@rng = obj[:rng]
|
147
|
+
nil
|
148
|
+
end
|
149
|
+
|
150
|
+
private
|
151
|
+
|
152
|
+
def whitening(x, n_components)
|
153
|
+
n_samples, n_features = x.shape
|
154
|
+
mean_vec = x.mean(0)
|
155
|
+
centered_x = x - mean_vec
|
156
|
+
covar_mat = centered_x.transpose.dot(centered_x) / n_samples
|
157
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(covar_mat, vals_range: (n_features - n_components)...n_features)
|
158
|
+
[mean_vec, (eig_vecs.reverse(1).dup * (1 / Numo::NMath.sqrt(eig_vals.reverse.dup))).transpose.dup]
|
159
|
+
end
|
160
|
+
|
161
|
+
def ica(x, fun, max_iter, tol, sub_rng)
|
162
|
+
n_samples, n_components = x.shape
|
163
|
+
w = decorrelation(Rumale::Utils.rand_normal([n_components, n_components], sub_rng))
|
164
|
+
n_iters = 0
|
165
|
+
max_iter.times do |t|
|
166
|
+
n_iters = t + 1
|
167
|
+
gx, ggx = gradient(x.dot(w.transpose), fun)
|
168
|
+
new_w = decorrelation(gx.transpose.dot(x) / n_samples - w * ggx / n_samples)
|
169
|
+
err = (new_w - w).abs.max
|
170
|
+
w = new_w
|
171
|
+
break if err <= tol
|
172
|
+
end
|
173
|
+
[w, n_iters]
|
174
|
+
end
|
175
|
+
|
176
|
+
def decorrelation(w)
|
177
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(w.dot(w.transpose))
|
178
|
+
decorr_mat = (eig_vecs * (1 / Numo::NMath.sqrt(eig_vals))).dot(eig_vecs.transpose)
|
179
|
+
decorr_mat.dot(w)
|
180
|
+
end
|
181
|
+
|
182
|
+
def gradient(x, func)
|
183
|
+
case func
|
184
|
+
when 'exp'
|
185
|
+
grad_exp(x, @params[:alpha])
|
186
|
+
when 'cube'
|
187
|
+
grad_cube(x)
|
188
|
+
else
|
189
|
+
grad_logcosh(x, @params[:alpha])
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def grad_logcosh(x, alpha)
|
194
|
+
gx = Numo::NMath.tanh(alpha * x)
|
195
|
+
ggx = (alpha * (1 - gx**2)).sum(0)
|
196
|
+
[gx, ggx]
|
197
|
+
end
|
198
|
+
|
199
|
+
def grad_exp(x, alpha)
|
200
|
+
squared_x = x**2
|
201
|
+
exp_x = Numo::NMath.exp(-0.5 * alpha * squared_x)
|
202
|
+
gx = exp_x * x
|
203
|
+
ggx = (exp_x * (1 - alpha * squared_x)).sum(0)
|
204
|
+
[gx, ggx]
|
205
|
+
end
|
206
|
+
|
207
|
+
def grad_cube(x)
|
208
|
+
[x**3, (3 * x**2).sum(0)]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
data/lib/rumale/manifold/tsne.rb
CHANGED
@@ -106,7 +106,9 @@ module Rumale
|
|
106
106
|
y = (b.dot(one_vec) * y + (a - b).dot(y)) / a.dot(one_vec)
|
107
107
|
lo_prob_mat = t_distributed_probability_matrix(y)
|
108
108
|
@n_iter = t + 1
|
109
|
-
|
109
|
+
if @params[:verbose] && (@n_iter % 100).zero?
|
110
|
+
puts "[t-SNE] KL divergence after #{@n_iter} iterations: #{cost(hi_prob_mat, lo_prob_mat)}"
|
111
|
+
end
|
110
112
|
end
|
111
113
|
# store results.
|
112
114
|
@embedding = y
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -172,6 +172,7 @@ files:
|
|
172
172
|
- lib/rumale/clustering/spectral_clustering.rb
|
173
173
|
- lib/rumale/dataset.rb
|
174
174
|
- lib/rumale/decomposition/factor_analysis.rb
|
175
|
+
- lib/rumale/decomposition/fast_ica.rb
|
175
176
|
- lib/rumale/decomposition/nmf.rb
|
176
177
|
- lib/rumale/decomposition/pca.rb
|
177
178
|
- lib/rumale/ensemble/ada_boost_classifier.rb
|