rumale 0.13.3 → 0.13.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -1
- data/lib/rumale.rb +3 -0
- data/lib/rumale/clustering/hdbscan.rb +282 -0
- data/lib/rumale/clustering/power_iteration.rb +46 -29
- data/lib/rumale/clustering/single_linkage.rb +200 -0
- data/lib/rumale/clustering/spectral_clustering.rb +134 -0
- data/lib/rumale/kernel_machine/kernel_pca.rb +2 -0
- data/lib/rumale/kernel_machine/kernel_ridge.rb +2 -0
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 522eaabfd67ced29bf275fb6f5cec019ff60e3d5
|
4
|
+
data.tar.gz: 0eb97f58c3764bdcbf448f9a392f8f5091ce418d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf5a3caf614b08813aa4b11673da758778191847ba6fe4c4144cae7da1dd8e4b3ec3eac1367d54b78a00a7afd5ae1ae047fa84c58954b0e7d0571a9442a10380
|
7
|
+
data.tar.gz: 8bdb25aaec7304f12595673d3fa915cc1739ef14fedd89210205f53325de5a96076b4cf718f8d7ca15fdec1f55d5f9c65cd256781ec2d60462a48221525ad068
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 0.13.4
|
2
|
+
- Add cluster analysis class for [HDBSCAN](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/HDBSCAN.html).
|
3
|
+
- Add cluster analysis class for [spectral clustering](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/SpectralClustering.html).
|
4
|
+
- Refactor power iteration clustering.
|
5
|
+
- Several documentation improvements.
|
6
|
+
|
1
7
|
# 0.13.3
|
2
8
|
- Add transformer class for [Kernel PCA](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelPCA.html).
|
3
9
|
- Add regressor class for [Kernel Ridge](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidge.html).
|
data/README.md
CHANGED
@@ -13,7 +13,7 @@ Rumale provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
13
13
|
Rumale supports Linear / Kernel Support Vector Machine,
|
14
14
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
15
15
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
|
16
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
16
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
17
17
|
Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
|
18
18
|
|
19
19
|
This project was formerly known as "SVMKit".
|
data/lib/rumale.rb
CHANGED
@@ -62,8 +62,11 @@ require 'rumale/clustering/k_means'
|
|
62
62
|
require 'rumale/clustering/k_medoids'
|
63
63
|
require 'rumale/clustering/gaussian_mixture'
|
64
64
|
require 'rumale/clustering/dbscan'
|
65
|
+
require 'rumale/clustering/hdbscan'
|
65
66
|
require 'rumale/clustering/snn'
|
66
67
|
require 'rumale/clustering/power_iteration'
|
68
|
+
require 'rumale/clustering/spectral_clustering'
|
69
|
+
require 'rumale/clustering/single_linkage'
|
67
70
|
require 'rumale/decomposition/pca'
|
68
71
|
require 'rumale/decomposition/nmf'
|
69
72
|
require 'rumale/manifold/tsne'
|
@@ -0,0 +1,282 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ostruct'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
require 'rumale/base/cluster_analyzer'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'rumale/clustering/single_linkage'
|
8
|
+
|
9
|
+
module Rumale
|
10
|
+
module Clustering
|
11
|
+
# HDBSCAN is a class that implements HDBSCAN cluster analysis.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# analyzer = Rumale::Clustering::HDBSCAN.new(min_samples: 5)
|
15
|
+
# cluster_labels = analyzer.fit_predict(samples)
|
16
|
+
#
|
17
|
+
# *Reference*
|
18
|
+
# - R J. G. B. Campello, D. Moulavi, A. Zimek, and J. Sander, "Hierarchical Density Estimates for Data Clustering, Visualization, and Outlier Detection," TKDD, Vol. 10 (1), pp. 5:1--5:51, 2015.
|
19
|
+
# - R J. G. B. Campello, D. Moulavi, and J Sander, "Density-Based Clustering Based on Hierarchical Density Estimates," Proc. PAKDD'13, pp. 160--172, 2013.
|
20
|
+
# - L. Lelis and J. Sander, "Semi-Supervised Density-Based Clustering," Proc. ICDM'09, pp. 842--847, 2009.
|
21
|
+
class HDBSCAN
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::ClusterAnalyzer
|
24
|
+
|
25
|
+
# Return the cluster labels. The negative cluster label indicates that the point is noise.
|
26
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
27
|
+
attr_reader :labels
|
28
|
+
|
29
|
+
# Create a new cluster analyzer with HDBSCAN algorithm.
|
30
|
+
#
|
31
|
+
# @param min_samples [Integer] The number of neighbor samples to be used for the criterion whether a point is a core point.
|
32
|
+
# @param min_cluster_size [Integer/Nil] The minimum size of cluster. If nil is given, it is set equal to min_samples.
|
33
|
+
# @param metric [String] The metric to calculate the distances.
|
34
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
35
|
+
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
36
|
+
def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
|
37
|
+
check_params_integer(min_samples: min_samples)
|
38
|
+
check_params_type_or_nil(Integer, min_cluster_size: min_cluster_size)
|
39
|
+
check_params_string(metric: metric)
|
40
|
+
check_params_positive(min_samples: min_samples)
|
41
|
+
@params = {}
|
42
|
+
@params[:min_samples] = min_samples
|
43
|
+
@params[:min_cluster_size] = min_cluster_size || min_samples
|
44
|
+
@params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
|
45
|
+
@labels = nil
|
46
|
+
end
|
47
|
+
|
48
|
+
# Analysis clusters with given training data.
|
49
|
+
#
|
50
|
+
# @overload fit(x) -> HDBSCAN
|
51
|
+
#
|
52
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
53
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
54
|
+
# @return [HDBSCAN] The learned cluster analyzer itself.
|
55
|
+
def fit(x, _y = nil)
|
56
|
+
check_sample_array(x)
|
57
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
58
|
+
fit_predict(x)
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Analysis clusters and assign samples to clusters.
|
63
|
+
#
|
64
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
|
65
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
66
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
67
|
+
def fit_predict(x)
|
68
|
+
check_sample_array(x)
|
69
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
70
|
+
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
71
|
+
@labels = partial_fit(distance_mat)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Dump marshal data.
|
75
|
+
# @return [Hash] The marshal data.
|
76
|
+
def marshal_dump
|
77
|
+
{ params: @params,
|
78
|
+
labels: @labels }
|
79
|
+
end
|
80
|
+
|
81
|
+
# Load marshal data.
|
82
|
+
# @return [nil]
|
83
|
+
def marshal_load(obj)
|
84
|
+
@params = obj[:params]
|
85
|
+
@labels = obj[:labels]
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
# @!visibility private
|
92
|
+
class UnionFind
|
93
|
+
def initialize(n)
|
94
|
+
@parent = Numo::Int32.new(n).seq
|
95
|
+
@rank = Numo::Int32.zeros(n)
|
96
|
+
end
|
97
|
+
|
98
|
+
# @!visibility private
|
99
|
+
def union(x, y)
|
100
|
+
x_root = find(x)
|
101
|
+
y_root = find(y)
|
102
|
+
|
103
|
+
return if x_root == y_root
|
104
|
+
|
105
|
+
# :nocov:
|
106
|
+
if @rank[x_root] < @rank[y_root]
|
107
|
+
@parent[x_root] = y_root
|
108
|
+
else
|
109
|
+
@parent[y_root] = x_root
|
110
|
+
@rank[x_root] += 1 if @rank[x_root] == @rank[y_root]
|
111
|
+
end
|
112
|
+
# :nocov:
|
113
|
+
|
114
|
+
nil
|
115
|
+
end
|
116
|
+
|
117
|
+
# @!visibility private
|
118
|
+
def find(x)
|
119
|
+
@parent[x] = find(@parent[x]) if @parent[x] != x
|
120
|
+
@parent[x]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
private_constant :UnionFind
|
125
|
+
|
126
|
+
def partial_fit(distance_mat)
|
127
|
+
mr_distance_mat = mutual_reachability_distances(distance_mat, @params[:min_samples])
|
128
|
+
hierarchy = Rumale::Clustering::SingleLinkage.new(n_clusters: 1, metric: 'precomputed').fit(mr_distance_mat).hierarchy
|
129
|
+
tree = condense_tree(hierarchy, @params[:min_cluster_size])
|
130
|
+
stabilities = cluster_stability(tree)
|
131
|
+
flatten(tree, stabilities)
|
132
|
+
end
|
133
|
+
|
134
|
+
def mutual_reachability_distances(distance_mat, min_samples)
|
135
|
+
core_distances = distance_mat.sort(axis: 1)[true, min_samples + 1]
|
136
|
+
Numo::DFloat.maximum(core_distances.expand_dims(1), Numo::DFloat.maximum(core_distances, distance_mat))
|
137
|
+
end
|
138
|
+
|
139
|
+
def breadth_first_search_hierarchy(hierarchy, root)
|
140
|
+
n_edges = hierarchy.size
|
141
|
+
n_points = n_edges + 1
|
142
|
+
to_process = [root]
|
143
|
+
res = []
|
144
|
+
while to_process.any?
|
145
|
+
res.concat(to_process)
|
146
|
+
to_process = to_process.select { |n| n >= n_points }.map { |n| n - n_points }
|
147
|
+
to_process = to_process.map { |n| [hierarchy[n].x, hierarchy[n].y] }.flatten if to_process.any?
|
148
|
+
end
|
149
|
+
res
|
150
|
+
end
|
151
|
+
|
152
|
+
def condense_tree(hierarchy, min_cluster_size)
|
153
|
+
n_edges = hierarchy.size
|
154
|
+
root = 2 * n_edges
|
155
|
+
n_points = n_edges + 1
|
156
|
+
next_label = n_points + 1
|
157
|
+
|
158
|
+
node_ids = breadth_first_search_hierarchy(hierarchy, root)
|
159
|
+
|
160
|
+
relabel = Numo::Int32.zeros(root + 1)
|
161
|
+
relabel[root] = n_points
|
162
|
+
res = []
|
163
|
+
visited = {}
|
164
|
+
|
165
|
+
node_ids.each do |n_id|
|
166
|
+
next if visited[n_id] || n_id < n_points
|
167
|
+
|
168
|
+
edge = hierarchy[n_id - n_points]
|
169
|
+
|
170
|
+
density = edge.weight > 0.0 ? 1.fdiv(edge.weight) : Float::INFINITY
|
171
|
+
n_x_elements = edge.x >= n_points ? hierarchy[edge.x - n_points].n_elements : 1
|
172
|
+
n_y_elements = edge.y >= n_points ? hierarchy[edge.y - n_points].n_elements : 1
|
173
|
+
|
174
|
+
if n_x_elements >= min_cluster_size && n_y_elements >= min_cluster_size
|
175
|
+
relabel[edge.x] = next_label
|
176
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: relabel[edge.x], weight: density, n_elements: n_x_elements))
|
177
|
+
next_label += 1
|
178
|
+
relabel[edge.y] = next_label
|
179
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: relabel[edge.y], weight: density, n_elements: n_y_elements))
|
180
|
+
next_label += 1
|
181
|
+
elsif n_x_elements < min_cluster_size && n_y_elements < min_cluster_size
|
182
|
+
breadth_first_search_hierarchy(hierarchy, edge.x).each do |sn_id|
|
183
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
184
|
+
visited[sn_id] = true
|
185
|
+
end
|
186
|
+
breadth_first_search_hierarchy(hierarchy, edge.y).each do |sn_id|
|
187
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
188
|
+
visited[sn_id] = true
|
189
|
+
end
|
190
|
+
elsif n_x_elements < min_cluster_size
|
191
|
+
relabel[edge.y] = relabel[n_id]
|
192
|
+
breadth_first_search_hierarchy(hierarchy, edge.x).each do |sn_id|
|
193
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
194
|
+
visited[sn_id] = true
|
195
|
+
end
|
196
|
+
elsif n_y_elements < min_cluster_size
|
197
|
+
relabel[edge.x] = relabel[n_id]
|
198
|
+
breadth_first_search_hierarchy(hierarchy, edge.y).each do |sn_id|
|
199
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
200
|
+
visited[sn_id] = true
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
res
|
205
|
+
end
|
206
|
+
|
207
|
+
def cluster_stability(tree)
|
208
|
+
tree.sort! { |a, b| a.weight <=> b.weight }
|
209
|
+
|
210
|
+
root = tree.map(&:x).min
|
211
|
+
child_max = tree.map(&:y).max
|
212
|
+
child_max = root if child_max < root
|
213
|
+
densities = Numo::DFloat.zeros(child_max + 1) + Float::INFINITY
|
214
|
+
|
215
|
+
current = tree[0].y
|
216
|
+
density_min = tree[0].weight
|
217
|
+
tree.each do |edge|
|
218
|
+
if edge.x == current
|
219
|
+
density_min = [density_min, edge.weight].min
|
220
|
+
else
|
221
|
+
densities[current] = density_min
|
222
|
+
current = edge.y
|
223
|
+
density_min = edge.weight
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
densities[current] = density_min if current != tree[0].y
|
228
|
+
densities[root] = 0.0
|
229
|
+
|
230
|
+
tree.each_with_object({}) do |edge, stab|
|
231
|
+
stab[edge.x] ||= 0.0
|
232
|
+
stab[edge.x] += (edge.weight - densities[edge.x]) * edge.n_elements
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def breadth_first_search_tree(tree, root)
|
237
|
+
to_process = [root]
|
238
|
+
res = []
|
239
|
+
while to_process.any?
|
240
|
+
res.concat(to_process)
|
241
|
+
to_process = tree.select { |v| to_process.include?(v.x) }.map(&:y)
|
242
|
+
end
|
243
|
+
res
|
244
|
+
end
|
245
|
+
|
246
|
+
def flatten(tree, stabilities)
|
247
|
+
node_ids = stabilities.keys.sort { |a, b| b <=> a }.slice(0, stabilities.size - 1)
|
248
|
+
|
249
|
+
cluster_tree = tree.select { |edge| edge.n_elements > 1 }
|
250
|
+
is_cluster = node_ids.each_with_object({}) { |n_id, h| h[n_id] = true }
|
251
|
+
|
252
|
+
node_ids.each do |n_id|
|
253
|
+
children = cluster_tree.select { |node| node.x == n_id }.map(&:y)
|
254
|
+
subtree_stability = children.inject(0.0) { |sum, c_id| sum + stabilities[c_id] }
|
255
|
+
if subtree_stability > stabilities[n_id]
|
256
|
+
is_cluster[n_id] = false
|
257
|
+
stabilities[n_id] = subtree_stability
|
258
|
+
else
|
259
|
+
breadth_first_search_tree(cluster_tree, n_id).each do |sn_id|
|
260
|
+
is_cluster[sn_id] = false if sn_id != n_id
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
cluster_label_map = {}
|
266
|
+
is_cluster.select { |_k, v| v == true }.keys.uniq.sort.each_with_index { |n_idx, c_idx| cluster_label_map[n_idx] = c_idx }
|
267
|
+
|
268
|
+
parent_arr = tree.map(&:x)
|
269
|
+
uf = UnionFind.new(parent_arr.max + 1)
|
270
|
+
tree.each { |edge| uf.union(edge.x, edge.y) if cluster_label_map[edge.y].nil? }
|
271
|
+
|
272
|
+
root = parent_arr.min
|
273
|
+
res = Numo::Int32.zeros(root)
|
274
|
+
root.times do |n|
|
275
|
+
cluster = uf.find(n)
|
276
|
+
res[n] = cluster < root ? -1 : cluster_label_map[cluster] || -1
|
277
|
+
end
|
278
|
+
res
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
@@ -22,6 +22,10 @@ module Rumale
|
|
22
22
|
# @return [Numo::DFloat] (shape: [n_samples])
|
23
23
|
attr_reader :embedding
|
24
24
|
|
25
|
+
# Return the cluster labels.
|
26
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
27
|
+
attr_reader :labels
|
28
|
+
|
25
29
|
# Return the number of iterations run for optimization
|
26
30
|
# @return [Integer]
|
27
31
|
attr_reader :n_iter
|
@@ -55,12 +59,13 @@ module Rumale
|
|
55
59
|
@params[:random_seed] = random_seed
|
56
60
|
@params[:random_seed] ||= srand
|
57
61
|
@embedding = nil
|
62
|
+
@labels = nil
|
58
63
|
@n_iter = nil
|
59
64
|
end
|
60
65
|
|
61
66
|
# Analysis clusters with given training data.
|
62
67
|
#
|
63
|
-
# @overload fit(x) ->
|
68
|
+
# @overload fit(x) -> PowerIteration
|
64
69
|
#
|
65
70
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
66
71
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
@@ -68,28 +73,7 @@ module Rumale
|
|
68
73
|
def fit(x, _y = nil)
|
69
74
|
check_sample_array(x)
|
70
75
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
71
|
-
|
72
|
-
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
73
|
-
affinity_mat[affinity_mat.diag_indices] = 0.0
|
74
|
-
n_samples = affinity_mat.shape[0]
|
75
|
-
tol = @params[:tol].fdiv(n_samples)
|
76
|
-
# calculate normalized affinity matrix.
|
77
|
-
degrees = affinity_mat.sum(axis: 1)
|
78
|
-
normalized_affinity_mat = (1.0 / degrees).diag.dot(affinity_mat)
|
79
|
-
# initialize embedding space.
|
80
|
-
@embedding = degrees / degrees.sum
|
81
|
-
# optimization
|
82
|
-
@n_iter = 0
|
83
|
-
error = Numo::DFloat.ones(n_samples)
|
84
|
-
@params[:max_iter].times do |t|
|
85
|
-
@n_iter = t + 1
|
86
|
-
new_embedding = normalized_affinity_mat.dot(@embedding)
|
87
|
-
new_embedding /= new_embedding.abs.sum
|
88
|
-
new_error = (new_embedding - @embedding).abs
|
89
|
-
break if (new_error - error).abs.max <= tol
|
90
|
-
@embedding = new_embedding
|
91
|
-
error = new_error
|
92
|
-
end
|
76
|
+
fit_predict(x)
|
93
77
|
self
|
94
78
|
end
|
95
79
|
|
@@ -100,12 +84,11 @@ module Rumale
|
|
100
84
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
101
85
|
def fit_predict(x)
|
102
86
|
check_sample_array(x)
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
)
|
108
|
-
kmeans.fit_predict(@embedding.expand_dims(1))
|
87
|
+
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
88
|
+
|
89
|
+
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
90
|
+
@embedding, @n_iter = embedded_space(affinity_mat, @params[:max_iter], @params[:tol].fdiv(affinity_mat.shape[0]))
|
91
|
+
@labels = line_kmeans_clustering(@embedding)
|
109
92
|
end
|
110
93
|
|
111
94
|
# Dump marshal data.
|
@@ -113,6 +96,7 @@ module Rumale
|
|
113
96
|
def marshal_dump
|
114
97
|
{ params: @params,
|
115
98
|
embedding: @embedding,
|
99
|
+
labels: @labels,
|
116
100
|
n_iter: @n_iter }
|
117
101
|
end
|
118
102
|
|
@@ -121,9 +105,42 @@ module Rumale
|
|
121
105
|
def marshal_load(obj)
|
122
106
|
@params = obj[:params]
|
123
107
|
@embedding = obj[:embedding]
|
108
|
+
@labels = obj[:labels]
|
124
109
|
@n_iter = obj[:n_iter]
|
125
110
|
nil
|
126
111
|
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def embedded_space(affinity_mat, max_iter, tol)
|
116
|
+
affinity_mat[affinity_mat.diag_indices] = 0.0
|
117
|
+
|
118
|
+
degrees = affinity_mat.sum(axis: 1)
|
119
|
+
normalized_affinity_mat = (1.0 / degrees).diag.dot(affinity_mat)
|
120
|
+
|
121
|
+
iters = 0
|
122
|
+
embedded_line = degrees / degrees.sum
|
123
|
+
n_samples = embedded_line.shape[0]
|
124
|
+
error = Numo::DFloat.ones(n_samples)
|
125
|
+
max_iter.times do |t|
|
126
|
+
iters = t + 1
|
127
|
+
new_embedded_line = normalized_affinity_mat.dot(embedded_line)
|
128
|
+
new_embedded_line /= new_embedded_line.abs.sum
|
129
|
+
new_error = (new_embedded_line - embedded_line).abs
|
130
|
+
break if (new_error - error).abs.max <= tol
|
131
|
+
embedded_line = new_embedded_line
|
132
|
+
error = new_error
|
133
|
+
end
|
134
|
+
|
135
|
+
[embedded_line, iters]
|
136
|
+
end
|
137
|
+
|
138
|
+
def line_kmeans_clustering(vec)
|
139
|
+
Rumale::Clustering::KMeans.new(
|
140
|
+
n_clusters: @params[:n_clusters], init: @params[:init],
|
141
|
+
max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
|
142
|
+
).fit_predict(vec.expand_dims(1))
|
143
|
+
end
|
127
144
|
end
|
128
145
|
end
|
129
146
|
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ostruct'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
require 'rumale/base/cluster_analyzer'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Clustering
|
10
|
+
# SingleLinkage is a class that implements hierarchical cluster analysis with single linakge method.
|
11
|
+
# This class is used internally for HDBSCAN.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# analyzer = Rumale::Clustering::SingleLinkage.new(n_clusters: 2)
|
15
|
+
# cluster_labels = analyzer.fit_predict(samples)
|
16
|
+
#
|
17
|
+
# *Reference*
|
18
|
+
# - D. Mullner, "Modern hierarchical, agglomerative clustering algorithms," arXiv:1109.2378, 2011.
|
19
|
+
class SingleLinkage
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::ClusterAnalyzer
|
22
|
+
|
23
|
+
# Return the cluster labels.
|
24
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
25
|
+
attr_reader :labels
|
26
|
+
|
27
|
+
# Return the hierarchical structure.
|
28
|
+
# @return [Array<OpenStruct>] (shape: [n_samples - 1])
|
29
|
+
attr_reader :hierarchy
|
30
|
+
|
31
|
+
# Create a new cluster analyzer with single linkage algorithm.
|
32
|
+
#
|
33
|
+
# @param n_clusters [Integer] The number of clusters.
|
34
|
+
# @param metric [String] The metric to calculate the distances.
|
35
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
36
|
+
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
37
|
+
def initialize(n_clusters: 2, metric: 'euclidean')
|
38
|
+
check_params_integer(n_clusters: n_clusters)
|
39
|
+
check_params_string(metric: metric)
|
40
|
+
@params = {}
|
41
|
+
@params[:n_clusters] = n_clusters
|
42
|
+
@params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
|
43
|
+
@labels = nil
|
44
|
+
@hierarchy = nil
|
45
|
+
end
|
46
|
+
|
47
|
+
# Analysis clusters with given training data.
|
48
|
+
#
|
49
|
+
# @overload fit(x) -> SingleLinkage
|
50
|
+
#
|
51
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
52
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
53
|
+
# @return [SingleLinkage] The learned cluster analyzer itself.
|
54
|
+
def fit(x, _y = nil)
|
55
|
+
check_sample_array(x)
|
56
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
57
|
+
fit_predict(x)
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
# Analysis clusters and assign samples to clusters.
|
62
|
+
#
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
|
64
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
65
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
66
|
+
def fit_predict(x)
|
67
|
+
check_sample_array(x)
|
68
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
69
|
+
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
70
|
+
@labels = partial_fit(distance_mat)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Dump marshal data.
|
74
|
+
# @return [Hash] The marshal data.
|
75
|
+
def marshal_dump
|
76
|
+
{ params: @params,
|
77
|
+
labels: @labels,
|
78
|
+
hierarchy: @hierarchy }
|
79
|
+
end
|
80
|
+
|
81
|
+
# Load marshal data.
|
82
|
+
# @return [nil]
|
83
|
+
def marshal_load(obj)
|
84
|
+
@params = obj[:params]
|
85
|
+
@labels = obj[:labels]
|
86
|
+
@hierarchy = obj[:hierarchy]
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
# @!visibility private
|
93
|
+
class UnionFind
|
94
|
+
def initialize(n)
|
95
|
+
@parent = Numo::Int32.zeros(2 * n - 1) - 1
|
96
|
+
@size = Numo::Int32.hstack([Numo::Int32.ones(n), Numo::Int32.zeros(n - 1)])
|
97
|
+
@next_label = n
|
98
|
+
end
|
99
|
+
|
100
|
+
# @!visibility private
|
101
|
+
def union(x, y)
|
102
|
+
size = @size[x] + @size[y]
|
103
|
+
@parent[x] = @next_label
|
104
|
+
@parent[y] = @next_label
|
105
|
+
@size[@next_label] = size
|
106
|
+
@next_label += 1
|
107
|
+
size
|
108
|
+
end
|
109
|
+
|
110
|
+
# @!visibility private
|
111
|
+
def find(x)
|
112
|
+
p = x
|
113
|
+
x = @parent[x] while @parent[x] != -1
|
114
|
+
while @parent[p] != x
|
115
|
+
p = @parent[p]
|
116
|
+
@parent[p] = x
|
117
|
+
end
|
118
|
+
x
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
private_constant :UnionFind
|
123
|
+
|
124
|
+
def partial_fit(distance_mat)
|
125
|
+
mst = minimum_spanning_tree(distance_mat)
|
126
|
+
@hierarchy = single_linkage_hierarchy(mst)
|
127
|
+
flatten(@hierarchy, @params[:n_clusters])
|
128
|
+
end
|
129
|
+
|
130
|
+
def minimum_spanning_tree(complete_graph)
|
131
|
+
n_samples = complete_graph.shape[0]
|
132
|
+
n_edges = n_samples - 1
|
133
|
+
curr_weights = Numo::DFloat.zeros(n_samples) + Float::INFINITY
|
134
|
+
curr_labels = Numo::Int32.new(n_samples).seq
|
135
|
+
next_node = 0
|
136
|
+
mst = Array.new(n_edges) do
|
137
|
+
curr_node = next_node
|
138
|
+
target = curr_labels.ne(curr_node)
|
139
|
+
curr_labels = curr_labels[target]
|
140
|
+
curr_weights = Numo::DFloat.minimum(curr_weights[target], complete_graph[curr_node, curr_labels])
|
141
|
+
next_node = curr_labels[curr_weights.min_index]
|
142
|
+
weight = curr_weights.min
|
143
|
+
OpenStruct.new(x: curr_node, y: next_node, weight: weight)
|
144
|
+
end
|
145
|
+
mst.sort! { |a, b| a.weight <=> b.weight }
|
146
|
+
end
|
147
|
+
|
148
|
+
def single_linkage_hierarchy(mst)
|
149
|
+
n_edges = mst.size
|
150
|
+
n_nodes = n_edges + 1
|
151
|
+
uf = UnionFind.new(n_nodes)
|
152
|
+
Array.new(n_edges) do |n|
|
153
|
+
x_root = uf.find(mst[n].x)
|
154
|
+
y_root = uf.find(mst[n].y)
|
155
|
+
x_root, y_root = [y_root, x_root] unless x_root < y_root
|
156
|
+
weight = mst[n].weight
|
157
|
+
n_samples = uf.union(x_root, y_root)
|
158
|
+
OpenStruct.new(x: x_root, y: y_root, weight: weight, n_elements: n_samples)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def descedent_ids(hierarchy_, start_node)
|
163
|
+
n_samples = hierarchy_.size + 1
|
164
|
+
return [start_node] if start_node < n_samples
|
165
|
+
|
166
|
+
res = []
|
167
|
+
indices = [start_node]
|
168
|
+
n_indices = 1
|
169
|
+
while n_indices.positive?
|
170
|
+
idx = indices.pop
|
171
|
+
if idx < n_samples
|
172
|
+
res.push(idx)
|
173
|
+
n_indices -= 1
|
174
|
+
else
|
175
|
+
indices.push(hierarchy_[idx - n_samples].x)
|
176
|
+
indices.push(hierarchy_[idx - n_samples].y)
|
177
|
+
n_indices += 1
|
178
|
+
end
|
179
|
+
end
|
180
|
+
res
|
181
|
+
end
|
182
|
+
|
183
|
+
def flatten(hierarchy_, n_clusters)
|
184
|
+
n_samples = hierarchy_.size + 1
|
185
|
+
return Numo::Int32.zeros(n_samples) if n_clusters < 2
|
186
|
+
|
187
|
+
nodes = [-([hierarchy_[-1].x, hierarchy_[-1].y].max + 1)]
|
188
|
+
(n_clusters - 1).times do
|
189
|
+
children = hierarchy_[-nodes[0] - n_samples]
|
190
|
+
nodes.push(-children.x)
|
191
|
+
nodes.push(-children.y)
|
192
|
+
nodes.sort!.shift
|
193
|
+
end
|
194
|
+
res = Numo::Int32.zeros(n_samples)
|
195
|
+
nodes.each_with_index { |sid, cluster_id| res[descedent_ids(hierarchy_, -sid)] = cluster_id }
|
196
|
+
res
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/cluster_analyzer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
require 'rumale/preprocessing/l2_normalizer'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Clustering
|
10
|
+
# SpectralClustering is a class that implements the normalized spectral clustering.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
#
|
15
|
+
# analyzer = Rumale::Clustering::SpectralClustering.new(n_clusters: 10, gamma: 8.0)
|
16
|
+
# cluster_labels = analyzer.fit_predict(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - A Y. Ng, M I. Jordan, and Y. Weiss, "On Spectral Clustering: Analyssi and an algorithm," Proc. NIPS'01, pp. 849--856, 2001.
|
20
|
+
# - U von Luxburg, "A tutorial on spectral clustering," Statistics and Computing, Vol. 17 (4), pp. 395--416, 2007.
|
21
|
+
class SpectralClustering
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::ClusterAnalyzer
|
24
|
+
|
25
|
+
# Return the data in embedded space.
|
26
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_clusters])
|
27
|
+
attr_reader :embedding
|
28
|
+
|
29
|
+
# Return the cluster labels.
|
30
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
31
|
+
attr_reader :labels
|
32
|
+
|
33
|
+
# Create a new cluster analyzer with normalized spectral clustering.
|
34
|
+
#
|
35
|
+
# @param n_clusters [Integer] The number of clusters.
|
36
|
+
# @param affinity [String] The representation of affinity matrix ('rbf' or 'precomputed').
|
37
|
+
# If affinity = 'rbf', the class performs the normalized spectral clustering with the fully connected graph weighted by rbf kernel.
|
38
|
+
# @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
|
39
|
+
# If affinity = 'precomputed', this parameter is ignored.
|
40
|
+
# @param init [String] The initialization method for centroids of K-Means clustering ('random' or 'k-means++').
|
41
|
+
# @param max_iter [Integer] The maximum number of iterations for K-Means clustering.
|
42
|
+
# @param tol [Float] The tolerance of termination criterion for K-Means clustering.
|
43
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
|
+
def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
|
45
|
+
check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
|
46
|
+
check_params_float(tol: tol)
|
47
|
+
check_params_string(affinity: affinity, init: init)
|
48
|
+
check_params_type_or_nil(Float, gamma: gamma)
|
49
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
|
+
check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
51
|
+
@params = {}
|
52
|
+
@params[:n_clusters] = n_clusters
|
53
|
+
@params[:affinity] = affinity
|
54
|
+
@params[:gamma] = gamma
|
55
|
+
@params[:init] = init == 'random' ? 'random' : 'k-means++'
|
56
|
+
@params[:max_iter] = max_iter
|
57
|
+
@params[:tol] = tol
|
58
|
+
@params[:random_seed] = random_seed
|
59
|
+
@params[:random_seed] ||= srand
|
60
|
+
@embedding = nil
|
61
|
+
@labels = nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# Analysis clusters with given training data.
|
65
|
+
# To execute this method, Numo::Linalg must be loaded.
|
66
|
+
#
|
67
|
+
# @overload fit(x) -> SpectralClustering
|
68
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
69
|
+
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
70
|
+
# @return [SpectralClustering] The learned cluster analyzer itself.
|
71
|
+
def fit(x, _y = nil)
|
72
|
+
check_sample_array(x)
|
73
|
+
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
74
|
+
raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
75
|
+
|
76
|
+
fit_predict(x)
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
# Analysis clusters and assign samples to clusters.
|
81
|
+
# To execute this method, Numo::Linalg must be loaded.
|
82
|
+
#
|
83
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
84
|
+
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
85
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
86
|
+
def fit_predict(x)
|
87
|
+
check_sample_array(x)
|
88
|
+
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
89
|
+
raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
90
|
+
|
91
|
+
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
92
|
+
@embedding = embedded_space(affinity_mat, @params[:n_clusters])
|
93
|
+
normalized_embedding = Rumale::Preprocessing::L2Normalizer.new.fit_transform(@embedding)
|
94
|
+
@labels = kmeans_clustering(normalized_embedding)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Dump marshal data.
|
98
|
+
# @return [Hash] The marshal data.
|
99
|
+
def marshal_dump
|
100
|
+
{ params: @params,
|
101
|
+
embedding: @embedding,
|
102
|
+
labels: @labels }
|
103
|
+
end
|
104
|
+
|
105
|
+
# Load marshal data.
|
106
|
+
# @return [nil]
|
107
|
+
def marshal_load(obj)
|
108
|
+
@params = obj[:params]
|
109
|
+
@embedding = obj[:embedding]
|
110
|
+
@labels = obj[:labels]
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def embedded_space(affinity_mat, n_clusters)
|
117
|
+
affinity_mat[affinity_mat.diag_indices] = 0.0
|
118
|
+
degrees = 1.0 / Numo::NMath.sqrt(affinity_mat.sum(axis: 1))
|
119
|
+
laplacian_mat = degrees.diag.dot(affinity_mat).dot(degrees.diag)
|
120
|
+
|
121
|
+
n_samples = affinity_mat.shape[0]
|
122
|
+
_, eig_vecs = Numo::Linalg.eigh(laplacian_mat, vals_range: (n_samples - n_clusters)...n_samples)
|
123
|
+
eig_vecs.reverse(1).dup
|
124
|
+
end
|
125
|
+
|
126
|
+
def kmeans_clustering(x)
|
127
|
+
Rumale::Clustering::KMeans.new(
|
128
|
+
n_clusters: @params[:n_clusters], init: @params[:init],
|
129
|
+
max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
|
130
|
+
).fit_predict(x)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -8,6 +8,8 @@ module Rumale
|
|
8
8
|
# KernelPCA is a class that implements Kernel Principal Component Analysis.
|
9
9
|
#
|
10
10
|
# @example
|
11
|
+
# require 'numo/linalg/autoloader'
|
12
|
+
#
|
11
13
|
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
12
14
|
# kpca = Rumale::KernelMachine::KernelPCA(n_components: 2)
|
13
15
|
# mapped_traininig_samples = kpca.fit_transform(kernel_mat_train)
|
@@ -8,6 +8,8 @@ module Rumale
|
|
8
8
|
# KernelRidge is a class that implements kernel ridge regression.
|
9
9
|
#
|
10
10
|
# @example
|
11
|
+
# require 'numo/linalg/autoloader'
|
12
|
+
#
|
11
13
|
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
12
14
|
# kridge = Rumale::KernelMachine::KernelRidge.new(reg_param: 1.0)
|
13
15
|
# kridge.fit(kernel_mat_train, traininig_values)
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
20
20
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
21
21
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
22
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
22
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
23
23
|
Multidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA, and Non-negative Matrix Factorization.
|
24
24
|
MSG
|
25
25
|
spec.homepage = 'https://github.com/yoshoku/rumale'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -128,7 +128,7 @@ description: |
|
|
128
128
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
129
129
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
130
130
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
131
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
131
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
132
132
|
Multidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA, and Non-negative Matrix Factorization.
|
133
133
|
email:
|
134
134
|
- yoshoku@outlook.com
|
@@ -163,10 +163,13 @@ files:
|
|
163
163
|
- lib/rumale/base/transformer.rb
|
164
164
|
- lib/rumale/clustering/dbscan.rb
|
165
165
|
- lib/rumale/clustering/gaussian_mixture.rb
|
166
|
+
- lib/rumale/clustering/hdbscan.rb
|
166
167
|
- lib/rumale/clustering/k_means.rb
|
167
168
|
- lib/rumale/clustering/k_medoids.rb
|
168
169
|
- lib/rumale/clustering/power_iteration.rb
|
170
|
+
- lib/rumale/clustering/single_linkage.rb
|
169
171
|
- lib/rumale/clustering/snn.rb
|
172
|
+
- lib/rumale/clustering/spectral_clustering.rb
|
170
173
|
- lib/rumale/dataset.rb
|
171
174
|
- lib/rumale/decomposition/nmf.rb
|
172
175
|
- lib/rumale/decomposition/pca.rb
|