rumale 0.12.3 → 0.12.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -2
- data/lib/rumale/dataset.rb +2 -2
- data/lib/rumale/manifold/mds.rb +175 -0
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +1 -0
- data/rumale.gemspec +2 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd6aef33fee80a240c1cad6a61189f9cb3a93034
|
4
|
+
data.tar.gz: 166db39ecff891c22648998d0524fee38b1fb906
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9ef86ae0e3c7f9477bbf4efd3feb05e0fa67bdddb3b6cb15bd9b6ae54aece4c9507156e228736cee0727a0377b934cd6f5a15597064df9a129938da91423316
|
7
|
+
data.tar.gz: 89f0bec8f13f504bc1620c13af6ae1e40475d2b1c1addf026f7510c33711401f7ab721d564b1bd9c7755a20c240bba0881473b20e13875c4f0eb45904dc572f0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -6,14 +6,15 @@
|
|
6
6
|
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
|
7
7
|
[![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
|
8
8
|
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
|
9
|
-
[![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.
|
9
|
+
[![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.4)
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
13
13
|
Rumale supports Linear / Kernel Support Vector Machine,
|
14
14
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
15
15
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
|
16
|
-
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
16
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
17
|
+
Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
17
18
|
|
18
19
|
This project was formerly known as "SVMKit".
|
19
20
|
If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
|
data/lib/rumale/dataset.rb
CHANGED
@@ -56,7 +56,7 @@ module Rumale
|
|
56
56
|
# @param noise [Float] The standard deviaion of gaussian noise added to the data.
|
57
57
|
# If nil is given, no noise is added.
|
58
58
|
# @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
|
59
|
-
# @random_seed [Integer] The seed value using to initialize the random generator.
|
59
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
60
|
def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
|
61
61
|
Rumale::Validation.check_params_integer(n_samples: n_samples)
|
62
62
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
@@ -95,7 +95,7 @@ module Rumale
|
|
95
95
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset
|
96
96
|
# @param noise [Float] The standard deviaion of gaussian noise added to the data.
|
97
97
|
# If nil is given, no noise is added.
|
98
|
-
# @random_seed [Integer] The seed value using to initialize the random generator.
|
98
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
99
99
|
def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
|
100
100
|
Rumale::Validation.check_params_integer(n_samples: n_samples)
|
101
101
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
@@ -0,0 +1,175 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'rumale/decomposition/pca'
|
8
|
+
|
9
|
+
module Rumale
|
10
|
+
module Manifold
|
11
|
+
# MDS is a class that implements Metric Multidimensional Scaling (MDS)
|
12
|
+
# with Scaling by MAjorizing a COmplicated Function (SMACOF) algorithm.
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# mds = Rumale::Manifold::MDS.new(init: 'pca', max_iter: 500, random_seed: 1)
|
16
|
+
# representations = mds.fit_transform(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - P J. F. Groenen and M. van de Velden, "Multidimensional Scaling by Majorization: A Review," J. of Statistical Software, Vol. 73 (8), 2016.
|
20
|
+
class MDS
|
21
|
+
include Base::BaseEstimator
|
22
|
+
include Base::Transformer
|
23
|
+
|
24
|
+
# Return the data in representation space.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components])
|
26
|
+
attr_reader :embedding
|
27
|
+
|
28
|
+
# Return the stress function value after optimization.
|
29
|
+
# @return [Float]
|
30
|
+
attr_reader :stress
|
31
|
+
|
32
|
+
# Return the number of iterations run for optimization
|
33
|
+
# @return [Integer]
|
34
|
+
attr_reader :n_iter
|
35
|
+
|
36
|
+
# Return the random generator.
|
37
|
+
# @return [Random]
|
38
|
+
attr_reader :rng
|
39
|
+
|
40
|
+
# Create a new transformer with MDS.
|
41
|
+
#
|
42
|
+
# @param n_components [Integer] The number of dimensions on representation space.
|
43
|
+
# @param metric [String] The metric to calculate the distances in original space.
|
44
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance in original space.
|
45
|
+
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
46
|
+
# @param init [String] The init is a method to initialize the representaion space.
|
47
|
+
# If init is 'random', the representaion space is initialized with normal random variables.
|
48
|
+
# If init is 'pca', the result of principal component analysis as the initial value of the representation space.
|
49
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
50
|
+
# @param tol [Float] The tolerance of stress value for terminating optimization.
|
51
|
+
# If tol is nil, it does not use stress value as a criterion for terminating the optimization.
|
52
|
+
# @param verbose [Boolean] The flag indicating whether to output stress value during iteration.
|
53
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
54
|
+
def initialize(n_components: 2, metric: 'euclidean', init: 'random',
|
55
|
+
max_iter: 300, tol: nil, verbose: false, random_seed: nil)
|
56
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
57
|
+
check_params_string(metric: metric, init: init)
|
58
|
+
check_params_boolean(verbose: verbose)
|
59
|
+
check_params_type_or_nil(Float, tol: tol)
|
60
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
61
|
+
check_params_positive(n_components: n_components, max_iter: max_iter)
|
62
|
+
@params = {}
|
63
|
+
@params[:n_components] = n_components
|
64
|
+
@params[:max_iter] = max_iter
|
65
|
+
@params[:tol] = tol
|
66
|
+
@params[:metric] = metric
|
67
|
+
@params[:init] = init
|
68
|
+
@params[:verbose] = verbose
|
69
|
+
@params[:random_seed] = random_seed
|
70
|
+
@params[:random_seed] ||= srand
|
71
|
+
@rng = Random.new(@params[:random_seed])
|
72
|
+
@embedding = nil
|
73
|
+
@stress = nil
|
74
|
+
@n_iter = nil
|
75
|
+
end
|
76
|
+
|
77
|
+
# Fit the model with given training data.
|
78
|
+
#
|
79
|
+
# @overload fit(x) -> MDS
|
80
|
+
#
|
81
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
82
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
83
|
+
# @return [MDS] The learned transformer itself.
|
84
|
+
def fit(x, _not_used = nil)
|
85
|
+
check_sample_array(x)
|
86
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
87
|
+
# initialize some varibales.
|
88
|
+
n_samples = x.shape[0]
|
89
|
+
hi_distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
90
|
+
@embedding = init_embedding(x)
|
91
|
+
lo_distance_mat = Rumale::PairwiseMetric.euclidean_distance(@embedding)
|
92
|
+
@stress = calc_stress(hi_distance_mat, lo_distance_mat)
|
93
|
+
@n_iter = 0
|
94
|
+
# perform optimization.
|
95
|
+
@params[:max_iter].times do |t|
|
96
|
+
# guttman tarnsform.
|
97
|
+
ratio = hi_distance_mat / lo_distance_mat
|
98
|
+
ratio[ratio.diag_indices] = 0.0
|
99
|
+
ratio[lo_distance_mat.eq(0)] = 0.0
|
100
|
+
tmp_mat = -ratio
|
101
|
+
tmp_mat[tmp_mat.diag_indices] += ratio.sum(axis: 1)
|
102
|
+
@embedding = 1.fdiv(n_samples) * tmp_mat.dot(@embedding)
|
103
|
+
# check convergence.
|
104
|
+
new_stress = calc_stress(hi_distance_mat, lo_distance_mat)
|
105
|
+
if terminate?(@stress, new_stress)
|
106
|
+
@stress = new_stress
|
107
|
+
break
|
108
|
+
end
|
109
|
+
# next step.
|
110
|
+
@n_iter = t + 1
|
111
|
+
@stress = new_stress
|
112
|
+
lo_distance_mat = Rumale::PairwiseMetric.euclidean_distance(@embedding)
|
113
|
+
puts "[MDS] stress function after #{@n_iter} iterations: #{@stress}" if @params[:verbose] && (@n_iter % 100).zero?
|
114
|
+
end
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
118
|
+
# Fit the model with training data, and then transform them with the learned model.
|
119
|
+
#
|
120
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
121
|
+
#
|
122
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
123
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
124
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
125
|
+
def fit_transform(x, _not_used = nil)
|
126
|
+
fit(x)
|
127
|
+
@embedding.dup
|
128
|
+
end
|
129
|
+
|
130
|
+
# Dump marshal data.
|
131
|
+
# @return [Hash] The marshal data.
|
132
|
+
def marshal_dump
|
133
|
+
{ params: @params,
|
134
|
+
embedding: @embedding,
|
135
|
+
stress: @stress,
|
136
|
+
n_iter: @n_iter,
|
137
|
+
rng: @rng }
|
138
|
+
end
|
139
|
+
|
140
|
+
# Load marshal data.
|
141
|
+
# @return [nil]
|
142
|
+
def marshal_load(obj)
|
143
|
+
@params = obj[:params]
|
144
|
+
@embedding = obj[:embedding]
|
145
|
+
@stress = obj[:stress]
|
146
|
+
@n_iter = obj[:n_iter]
|
147
|
+
@rng = obj[:rng]
|
148
|
+
nil
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def init_embedding(x)
|
154
|
+
if @params[:init] == 'pca' && @params[:metric] == 'euclidean'
|
155
|
+
pca = Rumale::Decomposition::PCA.new(n_components: @params[:n_components], random_seed: @params[:random_seed])
|
156
|
+
pca.fit_transform(x)
|
157
|
+
else
|
158
|
+
n_samples = x.shape[0]
|
159
|
+
sub_rng = @rng.dup
|
160
|
+
Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) - 0.5
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def terminate?(old_stress, new_stress)
|
165
|
+
return false if @params[:tol].nil?
|
166
|
+
return false if old_stress.nil?
|
167
|
+
(old_stress - new_stress).abs <= @params[:tol]
|
168
|
+
end
|
169
|
+
|
170
|
+
def calc_stress(hi_distance_mat, lo_distance_mat)
|
171
|
+
((hi_distance_mat - lo_distance_mat)**2).sum.fdiv(2)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
data/lib/rumale/version.rb
CHANGED
data/lib/rumale.rb
CHANGED
@@ -63,6 +63,7 @@ require 'rumale/clustering/power_iteration'
|
|
63
63
|
require 'rumale/decomposition/pca'
|
64
64
|
require 'rumale/decomposition/nmf'
|
65
65
|
require 'rumale/manifold/tsne'
|
66
|
+
require 'rumale/manifold/mds'
|
66
67
|
require 'rumale/preprocessing/l2_normalizer'
|
67
68
|
require 'rumale/preprocessing/min_max_scaler'
|
68
69
|
require 'rumale/preprocessing/max_abs_scaler'
|
data/rumale.gemspec
CHANGED
@@ -19,7 +19,8 @@ Gem::Specification.new do |spec|
|
|
19
19
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
20
20
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
21
21
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
22
|
-
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
22
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
23
|
+
Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
23
24
|
MSG
|
24
25
|
spec.homepage = 'https://github.com/yoshoku/rumale'
|
25
26
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -114,7 +114,8 @@ description: |
|
|
114
114
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
115
115
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
116
116
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
117
|
-
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
117
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
|
118
|
+
Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
118
119
|
email:
|
119
120
|
- yoshoku@outlook.com
|
120
121
|
executables: []
|
@@ -187,6 +188,7 @@ files:
|
|
187
188
|
- lib/rumale/linear_model/ridge.rb
|
188
189
|
- lib/rumale/linear_model/svc.rb
|
189
190
|
- lib/rumale/linear_model/svr.rb
|
191
|
+
- lib/rumale/manifold/mds.rb
|
190
192
|
- lib/rumale/manifold/tsne.rb
|
191
193
|
- lib/rumale/model_selection/cross_validation.rb
|
192
194
|
- lib/rumale/model_selection/grid_search_cv.rb
|