rumale 0.12.3 → 0.12.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -2
- data/lib/rumale/dataset.rb +2 -2
- data/lib/rumale/manifold/mds.rb +175 -0
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +1 -0
- data/rumale.gemspec +2 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd6aef33fee80a240c1cad6a61189f9cb3a93034
|
4
|
+
data.tar.gz: 166db39ecff891c22648998d0524fee38b1fb906
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9ef86ae0e3c7f9477bbf4efd3feb05e0fa67bdddb3b6cb15bd9b6ae54aece4c9507156e228736cee0727a0377b934cd6f5a15597064df9a129938da91423316
|
7
|
+
data.tar.gz: 89f0bec8f13f504bc1620c13af6ae1e40475d2b1c1addf026f7510c33711401f7ab721d564b1bd9c7755a20c240bba0881473b20e13875c4f0eb45904dc572f0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -6,14 +6,15 @@
|
|
6
6
|
[](https://coveralls.io/github/yoshoku/rumale?branch=master)
|
7
7
|
[](https://badge.fury.io/rb/rumale)
|
8
8
|
[](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
|
9
|
-
[](https://www.rubydoc.info/gems/rumale/0.12.
|
9
|
+
[](https://www.rubydoc.info/gems/rumale/0.12.4)
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
13
13
|
Rumale supports Linear / Kernel Support Vector Machine,
|
14
14
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
15
15
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
|
16
|
-
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
16
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
17
|
+
Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
17
18
|
|
18
19
|
This project was formerly known as "SVMKit".
|
19
20
|
If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
|
data/lib/rumale/dataset.rb
CHANGED
@@ -56,7 +56,7 @@ module Rumale
|
|
56
56
|
# @param noise [Float] The standard deviaion of gaussian noise added to the data.
|
57
57
|
# If nil is given, no noise is added.
|
58
58
|
# @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
|
59
|
-
# @random_seed [Integer] The seed value using to initialize the random generator.
|
59
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
60
|
def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
|
61
61
|
Rumale::Validation.check_params_integer(n_samples: n_samples)
|
62
62
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
@@ -95,7 +95,7 @@ module Rumale
|
|
95
95
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset
|
96
96
|
# @param noise [Float] The standard deviaion of gaussian noise added to the data.
|
97
97
|
# If nil is given, no noise is added.
|
98
|
-
# @random_seed [Integer] The seed value using to initialize the random generator.
|
98
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
99
99
|
def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
|
100
100
|
Rumale::Validation.check_params_integer(n_samples: n_samples)
|
101
101
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
@@ -0,0 +1,175 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'rumale/decomposition/pca'
|
8
|
+
|
9
|
+
module Rumale
|
10
|
+
module Manifold
|
11
|
+
# MDS is a class that implements Metric Multidimensional Scaling (MDS)
|
12
|
+
# with Scaling by MAjorizing a COmplicated Function (SMACOF) algorithm.
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# mds = Rumale::Manifold::MDS.new(init: 'pca', max_iter: 500, random_seed: 1)
|
16
|
+
# representations = mds.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - P J. F. Groenen and M. van de Velden, "Multidimensional Scaling by Majorization: A Review," J. of Statistical Software, Vol. 73 (8), 2016.
|
20
|
+
class MDS
|
21
|
+
include Base::BaseEstimator
|
22
|
+
include Base::Transformer
|
23
|
+
|
24
|
+
# Return the data in representation space.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components])
|
26
|
+
attr_reader :embedding
|
27
|
+
|
28
|
+
# Return the stress function value after optimization.
|
29
|
+
# @return [Float]
|
30
|
+
attr_reader :stress
|
31
|
+
|
32
|
+
# Return the number of iterations run for optimization
|
33
|
+
# @return [Integer]
|
34
|
+
attr_reader :n_iter
|
35
|
+
|
36
|
+
# Return the random generator.
|
37
|
+
# @return [Random]
|
38
|
+
attr_reader :rng
|
39
|
+
|
40
|
+
# Create a new transformer with MDS.
|
41
|
+
#
|
42
|
+
# @param n_components [Integer] The number of dimensions on representation space.
|
43
|
+
# @param metric [String] The metric to calculate the distances in original space.
|
44
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance in original space.
|
45
|
+
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
46
|
+
# @param init [String] The init is a method to initialize the representaion space.
|
47
|
+
# If init is 'random', the representaion space is initialized with normal random variables.
|
48
|
+
# If init is 'pca', the result of principal component analysis as the initial value of the representation space.
|
49
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
50
|
+
# @param tol [Float] The tolerance of stress value for terminating optimization.
|
51
|
+
# If tol is nil, it does not use stress value as a criterion for terminating the optimization.
|
52
|
+
# @param verbose [Boolean] The flag indicating whether to output stress value during iteration.
|
53
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
54
|
+
def initialize(n_components: 2, metric: 'euclidean', init: 'random',
|
55
|
+
max_iter: 300, tol: nil, verbose: false, random_seed: nil)
|
56
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
57
|
+
check_params_string(metric: metric, init: init)
|
58
|
+
check_params_boolean(verbose: verbose)
|
59
|
+
check_params_type_or_nil(Float, tol: tol)
|
60
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
61
|
+
check_params_positive(n_components: n_components, max_iter: max_iter)
|
62
|
+
@params = {}
|
63
|
+
@params[:n_components] = n_components
|
64
|
+
@params[:max_iter] = max_iter
|
65
|
+
@params[:tol] = tol
|
66
|
+
@params[:metric] = metric
|
67
|
+
@params[:init] = init
|
68
|
+
@params[:verbose] = verbose
|
69
|
+
@params[:random_seed] = random_seed
|
70
|
+
@params[:random_seed] ||= srand
|
71
|
+
@rng = Random.new(@params[:random_seed])
|
72
|
+
@embedding = nil
|
73
|
+
@stress = nil
|
74
|
+
@n_iter = nil
|
75
|
+
end
|
76
|
+
|
77
|
+
# Fit the model with given training data.
|
78
|
+
#
|
79
|
+
# @overload fit(x) -> MDS
|
80
|
+
#
|
81
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
82
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
83
|
+
# @return [MDS] The learned transformer itself.
|
84
|
+
def fit(x, _not_used = nil)
|
85
|
+
check_sample_array(x)
|
86
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
87
|
+
# initialize some varibales.
|
88
|
+
n_samples = x.shape[0]
|
89
|
+
hi_distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
90
|
+
@embedding = init_embedding(x)
|
91
|
+
lo_distance_mat = Rumale::PairwiseMetric.euclidean_distance(@embedding)
|
92
|
+
@stress = calc_stress(hi_distance_mat, lo_distance_mat)
|
93
|
+
@n_iter = 0
|
94
|
+
# perform optimization.
|
95
|
+
@params[:max_iter].times do |t|
|
96
|
+
# guttman tarnsform.
|
97
|
+
ratio = hi_distance_mat / lo_distance_mat
|
98
|
+
ratio[ratio.diag_indices] = 0.0
|
99
|
+
ratio[lo_distance_mat.eq(0)] = 0.0
|
100
|
+
tmp_mat = -ratio
|
101
|
+
tmp_mat[tmp_mat.diag_indices] += ratio.sum(axis: 1)
|
102
|
+
@embedding = 1.fdiv(n_samples) * tmp_mat.dot(@embedding)
|
103
|
+
# check convergence.
|
104
|
+
new_stress = calc_stress(hi_distance_mat, lo_distance_mat)
|
105
|
+
if terminate?(@stress, new_stress)
|
106
|
+
@stress = new_stress
|
107
|
+
break
|
108
|
+
end
|
109
|
+
# next step.
|
110
|
+
@n_iter = t + 1
|
111
|
+
@stress = new_stress
|
112
|
+
lo_distance_mat = Rumale::PairwiseMetric.euclidean_distance(@embedding)
|
113
|
+
puts "[MDS] stress function after #{@n_iter} iterations: #{@stress}" if @params[:verbose] && (@n_iter % 100).zero?
|
114
|
+
end
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
118
|
+
# Fit the model with training data, and then transform them with the learned model.
|
119
|
+
#
|
120
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
121
|
+
#
|
122
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
123
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
124
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
125
|
+
def fit_transform(x, _not_used = nil)
|
126
|
+
fit(x)
|
127
|
+
@embedding.dup
|
128
|
+
end
|
129
|
+
|
130
|
+
# Dump marshal data.
|
131
|
+
# @return [Hash] The marshal data.
|
132
|
+
def marshal_dump
|
133
|
+
{ params: @params,
|
134
|
+
embedding: @embedding,
|
135
|
+
stress: @stress,
|
136
|
+
n_iter: @n_iter,
|
137
|
+
rng: @rng }
|
138
|
+
end
|
139
|
+
|
140
|
+
# Load marshal data.
|
141
|
+
# @return [nil]
|
142
|
+
def marshal_load(obj)
|
143
|
+
@params = obj[:params]
|
144
|
+
@embedding = obj[:embedding]
|
145
|
+
@stress = obj[:stress]
|
146
|
+
@n_iter = obj[:n_iter]
|
147
|
+
@rng = obj[:rng]
|
148
|
+
nil
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def init_embedding(x)
|
154
|
+
if @params[:init] == 'pca' && @params[:metric] == 'euclidean'
|
155
|
+
pca = Rumale::Decomposition::PCA.new(n_components: @params[:n_components], random_seed: @params[:random_seed])
|
156
|
+
pca.fit_transform(x)
|
157
|
+
else
|
158
|
+
n_samples = x.shape[0]
|
159
|
+
sub_rng = @rng.dup
|
160
|
+
Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) - 0.5
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def terminate?(old_stress, new_stress)
|
165
|
+
return false if @params[:tol].nil?
|
166
|
+
return false if old_stress.nil?
|
167
|
+
(old_stress - new_stress).abs <= @params[:tol]
|
168
|
+
end
|
169
|
+
|
170
|
+
def calc_stress(hi_distance_mat, lo_distance_mat)
|
171
|
+
((hi_distance_mat - lo_distance_mat)**2).sum.fdiv(2)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
data/lib/rumale/version.rb
CHANGED
data/lib/rumale.rb
CHANGED
@@ -63,6 +63,7 @@ require 'rumale/clustering/power_iteration'
|
|
63
63
|
require 'rumale/decomposition/pca'
|
64
64
|
require 'rumale/decomposition/nmf'
|
65
65
|
require 'rumale/manifold/tsne'
|
66
|
+
require 'rumale/manifold/mds'
|
66
67
|
require 'rumale/preprocessing/l2_normalizer'
|
67
68
|
require 'rumale/preprocessing/min_max_scaler'
|
68
69
|
require 'rumale/preprocessing/max_abs_scaler'
|
data/rumale.gemspec
CHANGED
@@ -19,7 +19,8 @@ Gem::Specification.new do |spec|
|
|
19
19
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
20
20
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
21
21
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
22
|
-
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
22
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
23
|
+
Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
23
24
|
MSG
|
24
25
|
spec.homepage = 'https://github.com/yoshoku/rumale'
|
25
26
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -114,7 +114,8 @@ description: |
|
|
114
114
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
115
115
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
116
116
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
117
|
-
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
117
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
118
|
+
Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
118
119
|
email:
|
119
120
|
- yoshoku@outlook.com
|
120
121
|
executables: []
|
@@ -187,6 +188,7 @@ files:
|
|
187
188
|
- lib/rumale/linear_model/ridge.rb
|
188
189
|
- lib/rumale/linear_model/svc.rb
|
189
190
|
- lib/rumale/linear_model/svr.rb
|
191
|
+
- lib/rumale/manifold/mds.rb
|
190
192
|
- lib/rumale/manifold/tsne.rb
|
191
193
|
- lib/rumale/model_selection/cross_validation.rb
|
192
194
|
- lib/rumale/model_selection/grid_search_cv.rb
|