rumale 0.13.3 → 0.13.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -1
- data/lib/rumale.rb +3 -0
- data/lib/rumale/clustering/hdbscan.rb +282 -0
- data/lib/rumale/clustering/power_iteration.rb +46 -29
- data/lib/rumale/clustering/single_linkage.rb +200 -0
- data/lib/rumale/clustering/spectral_clustering.rb +134 -0
- data/lib/rumale/kernel_machine/kernel_pca.rb +2 -0
- data/lib/rumale/kernel_machine/kernel_ridge.rb +2 -0
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 522eaabfd67ced29bf275fb6f5cec019ff60e3d5
|
4
|
+
data.tar.gz: 0eb97f58c3764bdcbf448f9a392f8f5091ce418d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf5a3caf614b08813aa4b11673da758778191847ba6fe4c4144cae7da1dd8e4b3ec3eac1367d54b78a00a7afd5ae1ae047fa84c58954b0e7d0571a9442a10380
|
7
|
+
data.tar.gz: 8bdb25aaec7304f12595673d3fa915cc1739ef14fedd89210205f53325de5a96076b4cf718f8d7ca15fdec1f55d5f9c65cd256781ec2d60462a48221525ad068
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 0.13.4
|
2
|
+
- Add cluster analysis class for [HDBSCAN](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/HDBSCAN.html).
|
3
|
+
- Add cluster analysis class for [spectral clustering](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/SpectralClustering.html).
|
4
|
+
- Refactor power iteration clustering.
|
5
|
+
- Several documentation improvements.
|
6
|
+
|
1
7
|
# 0.13.3
|
2
8
|
- Add transformer class for [Kernel PCA](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelPCA.html).
|
3
9
|
- Add regressor class for [Kernel Ridge](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidge.html).
|
data/README.md
CHANGED
@@ -13,7 +13,7 @@ Rumale provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
13
13
|
Rumale supports Linear / Kernel Support Vector Machine,
|
14
14
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
15
15
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
|
16
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
16
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
17
17
|
Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
|
18
18
|
|
19
19
|
This project was formerly known as "SVMKit".
|
data/lib/rumale.rb
CHANGED
@@ -62,8 +62,11 @@ require 'rumale/clustering/k_means'
|
|
62
62
|
require 'rumale/clustering/k_medoids'
|
63
63
|
require 'rumale/clustering/gaussian_mixture'
|
64
64
|
require 'rumale/clustering/dbscan'
|
65
|
+
require 'rumale/clustering/hdbscan'
|
65
66
|
require 'rumale/clustering/snn'
|
66
67
|
require 'rumale/clustering/power_iteration'
|
68
|
+
require 'rumale/clustering/spectral_clustering'
|
69
|
+
require 'rumale/clustering/single_linkage'
|
67
70
|
require 'rumale/decomposition/pca'
|
68
71
|
require 'rumale/decomposition/nmf'
|
69
72
|
require 'rumale/manifold/tsne'
|
@@ -0,0 +1,282 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ostruct'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
require 'rumale/base/cluster_analyzer'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'rumale/clustering/single_linkage'
|
8
|
+
|
9
|
+
module Rumale
|
10
|
+
module Clustering
|
11
|
+
# HDBSCAN is a class that implements HDBSCAN cluster analysis.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# analyzer = Rumale::Clustering::HDBSCAN.new(min_samples: 5)
|
15
|
+
# cluster_labels = analyzer.fit_predict(samples)
|
16
|
+
#
|
17
|
+
# *Reference*
|
18
|
+
# - R J. G. B. Campello, D. Moulavi, A. Zimek, and J. Sander, "Hierarchical Density Estimates for Data Clustering, Visualization, and Outlier Detection," TKDD, Vol. 10 (1), pp. 5:1--5:51, 2015.
|
19
|
+
# - R J. G. B. Campello, D. Moulavi, and J Sander, "Density-Based Clustering Based on Hierarchical Density Estimates," Proc. PAKDD'13, pp. 160--172, 2013.
|
20
|
+
# - L. Lelis and J. Sander, "Semi-Supervised Density-Based Clustering," Proc. ICDM'09, pp. 842--847, 2009.
|
21
|
+
class HDBSCAN
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::ClusterAnalyzer
|
24
|
+
|
25
|
+
# Return the cluster labels. The negative cluster label indicates that the point is noise.
|
26
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
27
|
+
attr_reader :labels
|
28
|
+
|
29
|
+
# Create a new cluster analyzer with HDBSCAN algorithm.
|
30
|
+
#
|
31
|
+
# @param min_samples [Integer] The number of neighbor samples to be used for the criterion whether a point is a core point.
|
32
|
+
# @param min_cluster_size [Integer/Nil] The minimum size of cluster. If nil is given, it is set equal to min_samples.
|
33
|
+
# @param metric [String] The metric to calculate the distances.
|
34
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
35
|
+
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
36
|
+
def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
|
37
|
+
check_params_integer(min_samples: min_samples)
|
38
|
+
check_params_type_or_nil(Integer, min_cluster_size: min_cluster_size)
|
39
|
+
check_params_string(metric: metric)
|
40
|
+
check_params_positive(min_samples: min_samples)
|
41
|
+
@params = {}
|
42
|
+
@params[:min_samples] = min_samples
|
43
|
+
@params[:min_cluster_size] = min_cluster_size || min_samples
|
44
|
+
@params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
|
45
|
+
@labels = nil
|
46
|
+
end
|
47
|
+
|
48
|
+
# Analysis clusters with given training data.
|
49
|
+
#
|
50
|
+
# @overload fit(x) -> HDBSCAN
|
51
|
+
#
|
52
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
53
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
54
|
+
# @return [HDBSCAN] The learned cluster analyzer itself.
|
55
|
+
def fit(x, _y = nil)
|
56
|
+
check_sample_array(x)
|
57
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
58
|
+
fit_predict(x)
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Analysis clusters and assign samples to clusters.
|
63
|
+
#
|
64
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
|
65
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
66
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
67
|
+
def fit_predict(x)
|
68
|
+
check_sample_array(x)
|
69
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
70
|
+
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
71
|
+
@labels = partial_fit(distance_mat)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Dump marshal data.
|
75
|
+
# @return [Hash] The marshal data.
|
76
|
+
def marshal_dump
|
77
|
+
{ params: @params,
|
78
|
+
labels: @labels }
|
79
|
+
end
|
80
|
+
|
81
|
+
# Load marshal data.
|
82
|
+
# @return [nil]
|
83
|
+
def marshal_load(obj)
|
84
|
+
@params = obj[:params]
|
85
|
+
@labels = obj[:labels]
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
# @!visibility private
|
92
|
+
class UnionFind
|
93
|
+
def initialize(n)
|
94
|
+
@parent = Numo::Int32.new(n).seq
|
95
|
+
@rank = Numo::Int32.zeros(n)
|
96
|
+
end
|
97
|
+
|
98
|
+
# @!visibility private
|
99
|
+
def union(x, y)
|
100
|
+
x_root = find(x)
|
101
|
+
y_root = find(y)
|
102
|
+
|
103
|
+
return if x_root == y_root
|
104
|
+
|
105
|
+
# :nocov:
|
106
|
+
if @rank[x_root] < @rank[y_root]
|
107
|
+
@parent[x_root] = y_root
|
108
|
+
else
|
109
|
+
@parent[y_root] = x_root
|
110
|
+
@rank[x_root] += 1 if @rank[x_root] == @rank[y_root]
|
111
|
+
end
|
112
|
+
# :nocov:
|
113
|
+
|
114
|
+
nil
|
115
|
+
end
|
116
|
+
|
117
|
+
# @!visibility private
|
118
|
+
def find(x)
|
119
|
+
@parent[x] = find(@parent[x]) if @parent[x] != x
|
120
|
+
@parent[x]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
private_constant :UnionFind
|
125
|
+
|
126
|
+
def partial_fit(distance_mat)
|
127
|
+
mr_distance_mat = mutual_reachability_distances(distance_mat, @params[:min_samples])
|
128
|
+
hierarchy = Rumale::Clustering::SingleLinkage.new(n_clusters: 1, metric: 'precomputed').fit(mr_distance_mat).hierarchy
|
129
|
+
tree = condense_tree(hierarchy, @params[:min_cluster_size])
|
130
|
+
stabilities = cluster_stability(tree)
|
131
|
+
flatten(tree, stabilities)
|
132
|
+
end
|
133
|
+
|
134
|
+
def mutual_reachability_distances(distance_mat, min_samples)
|
135
|
+
core_distances = distance_mat.sort(axis: 1)[true, min_samples + 1]
|
136
|
+
Numo::DFloat.maximum(core_distances.expand_dims(1), Numo::DFloat.maximum(core_distances, distance_mat))
|
137
|
+
end
|
138
|
+
|
139
|
+
def breadth_first_search_hierarchy(hierarchy, root)
|
140
|
+
n_edges = hierarchy.size
|
141
|
+
n_points = n_edges + 1
|
142
|
+
to_process = [root]
|
143
|
+
res = []
|
144
|
+
while to_process.any?
|
145
|
+
res.concat(to_process)
|
146
|
+
to_process = to_process.select { |n| n >= n_points }.map { |n| n - n_points }
|
147
|
+
to_process = to_process.map { |n| [hierarchy[n].x, hierarchy[n].y] }.flatten if to_process.any?
|
148
|
+
end
|
149
|
+
res
|
150
|
+
end
|
151
|
+
|
152
|
+
def condense_tree(hierarchy, min_cluster_size)
|
153
|
+
n_edges = hierarchy.size
|
154
|
+
root = 2 * n_edges
|
155
|
+
n_points = n_edges + 1
|
156
|
+
next_label = n_points + 1
|
157
|
+
|
158
|
+
node_ids = breadth_first_search_hierarchy(hierarchy, root)
|
159
|
+
|
160
|
+
relabel = Numo::Int32.zeros(root + 1)
|
161
|
+
relabel[root] = n_points
|
162
|
+
res = []
|
163
|
+
visited = {}
|
164
|
+
|
165
|
+
node_ids.each do |n_id|
|
166
|
+
next if visited[n_id] || n_id < n_points
|
167
|
+
|
168
|
+
edge = hierarchy[n_id - n_points]
|
169
|
+
|
170
|
+
density = edge.weight > 0.0 ? 1.fdiv(edge.weight) : Float::INFINITY
|
171
|
+
n_x_elements = edge.x >= n_points ? hierarchy[edge.x - n_points].n_elements : 1
|
172
|
+
n_y_elements = edge.y >= n_points ? hierarchy[edge.y - n_points].n_elements : 1
|
173
|
+
|
174
|
+
if n_x_elements >= min_cluster_size && n_y_elements >= min_cluster_size
|
175
|
+
relabel[edge.x] = next_label
|
176
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: relabel[edge.x], weight: density, n_elements: n_x_elements))
|
177
|
+
next_label += 1
|
178
|
+
relabel[edge.y] = next_label
|
179
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: relabel[edge.y], weight: density, n_elements: n_y_elements))
|
180
|
+
next_label += 1
|
181
|
+
elsif n_x_elements < min_cluster_size && n_y_elements < min_cluster_size
|
182
|
+
breadth_first_search_hierarchy(hierarchy, edge.x).each do |sn_id|
|
183
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
184
|
+
visited[sn_id] = true
|
185
|
+
end
|
186
|
+
breadth_first_search_hierarchy(hierarchy, edge.y).each do |sn_id|
|
187
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
188
|
+
visited[sn_id] = true
|
189
|
+
end
|
190
|
+
elsif n_x_elements < min_cluster_size
|
191
|
+
relabel[edge.y] = relabel[n_id]
|
192
|
+
breadth_first_search_hierarchy(hierarchy, edge.x).each do |sn_id|
|
193
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
194
|
+
visited[sn_id] = true
|
195
|
+
end
|
196
|
+
elsif n_y_elements < min_cluster_size
|
197
|
+
relabel[edge.x] = relabel[n_id]
|
198
|
+
breadth_first_search_hierarchy(hierarchy, edge.y).each do |sn_id|
|
199
|
+
res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
|
200
|
+
visited[sn_id] = true
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
res
|
205
|
+
end
|
206
|
+
|
207
|
+
def cluster_stability(tree)
|
208
|
+
tree.sort! { |a, b| a.weight <=> b.weight }
|
209
|
+
|
210
|
+
root = tree.map(&:x).min
|
211
|
+
child_max = tree.map(&:y).max
|
212
|
+
child_max = root if child_max < root
|
213
|
+
densities = Numo::DFloat.zeros(child_max + 1) + Float::INFINITY
|
214
|
+
|
215
|
+
current = tree[0].y
|
216
|
+
density_min = tree[0].weight
|
217
|
+
tree.each do |edge|
|
218
|
+
if edge.x == current
|
219
|
+
density_min = [density_min, edge.weight].min
|
220
|
+
else
|
221
|
+
densities[current] = density_min
|
222
|
+
current = edge.y
|
223
|
+
density_min = edge.weight
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
densities[current] = density_min if current != tree[0].y
|
228
|
+
densities[root] = 0.0
|
229
|
+
|
230
|
+
tree.each_with_object({}) do |edge, stab|
|
231
|
+
stab[edge.x] ||= 0.0
|
232
|
+
stab[edge.x] += (edge.weight - densities[edge.x]) * edge.n_elements
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def breadth_first_search_tree(tree, root)
|
237
|
+
to_process = [root]
|
238
|
+
res = []
|
239
|
+
while to_process.any?
|
240
|
+
res.concat(to_process)
|
241
|
+
to_process = tree.select { |v| to_process.include?(v.x) }.map(&:y)
|
242
|
+
end
|
243
|
+
res
|
244
|
+
end
|
245
|
+
|
246
|
+
def flatten(tree, stabilities)
|
247
|
+
node_ids = stabilities.keys.sort { |a, b| b <=> a }.slice(0, stabilities.size - 1)
|
248
|
+
|
249
|
+
cluster_tree = tree.select { |edge| edge.n_elements > 1 }
|
250
|
+
is_cluster = node_ids.each_with_object({}) { |n_id, h| h[n_id] = true }
|
251
|
+
|
252
|
+
node_ids.each do |n_id|
|
253
|
+
children = cluster_tree.select { |node| node.x == n_id }.map(&:y)
|
254
|
+
subtree_stability = children.inject(0.0) { |sum, c_id| sum + stabilities[c_id] }
|
255
|
+
if subtree_stability > stabilities[n_id]
|
256
|
+
is_cluster[n_id] = false
|
257
|
+
stabilities[n_id] = subtree_stability
|
258
|
+
else
|
259
|
+
breadth_first_search_tree(cluster_tree, n_id).each do |sn_id|
|
260
|
+
is_cluster[sn_id] = false if sn_id != n_id
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
cluster_label_map = {}
|
266
|
+
is_cluster.select { |_k, v| v == true }.keys.uniq.sort.each_with_index { |n_idx, c_idx| cluster_label_map[n_idx] = c_idx }
|
267
|
+
|
268
|
+
parent_arr = tree.map(&:x)
|
269
|
+
uf = UnionFind.new(parent_arr.max + 1)
|
270
|
+
tree.each { |edge| uf.union(edge.x, edge.y) if cluster_label_map[edge.y].nil? }
|
271
|
+
|
272
|
+
root = parent_arr.min
|
273
|
+
res = Numo::Int32.zeros(root)
|
274
|
+
root.times do |n|
|
275
|
+
cluster = uf.find(n)
|
276
|
+
res[n] = cluster < root ? -1 : cluster_label_map[cluster] || -1
|
277
|
+
end
|
278
|
+
res
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
@@ -22,6 +22,10 @@ module Rumale
|
|
22
22
|
# @return [Numo::DFloat] (shape: [n_samples])
|
23
23
|
attr_reader :embedding
|
24
24
|
|
25
|
+
# Return the cluster labels.
|
26
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
27
|
+
attr_reader :labels
|
28
|
+
|
25
29
|
# Return the number of iterations run for optimization
|
26
30
|
# @return [Integer]
|
27
31
|
attr_reader :n_iter
|
@@ -55,12 +59,13 @@ module Rumale
|
|
55
59
|
@params[:random_seed] = random_seed
|
56
60
|
@params[:random_seed] ||= srand
|
57
61
|
@embedding = nil
|
62
|
+
@labels = nil
|
58
63
|
@n_iter = nil
|
59
64
|
end
|
60
65
|
|
61
66
|
# Analysis clusters with given training data.
|
62
67
|
#
|
63
|
-
# @overload fit(x) ->
|
68
|
+
# @overload fit(x) -> PowerIteration
|
64
69
|
#
|
65
70
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
66
71
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
@@ -68,28 +73,7 @@ module Rumale
|
|
68
73
|
def fit(x, _y = nil)
|
69
74
|
check_sample_array(x)
|
70
75
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
71
|
-
|
72
|
-
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
73
|
-
affinity_mat[affinity_mat.diag_indices] = 0.0
|
74
|
-
n_samples = affinity_mat.shape[0]
|
75
|
-
tol = @params[:tol].fdiv(n_samples)
|
76
|
-
# calculate normalized affinity matrix.
|
77
|
-
degrees = affinity_mat.sum(axis: 1)
|
78
|
-
normalized_affinity_mat = (1.0 / degrees).diag.dot(affinity_mat)
|
79
|
-
# initialize embedding space.
|
80
|
-
@embedding = degrees / degrees.sum
|
81
|
-
# optimization
|
82
|
-
@n_iter = 0
|
83
|
-
error = Numo::DFloat.ones(n_samples)
|
84
|
-
@params[:max_iter].times do |t|
|
85
|
-
@n_iter = t + 1
|
86
|
-
new_embedding = normalized_affinity_mat.dot(@embedding)
|
87
|
-
new_embedding /= new_embedding.abs.sum
|
88
|
-
new_error = (new_embedding - @embedding).abs
|
89
|
-
break if (new_error - error).abs.max <= tol
|
90
|
-
@embedding = new_embedding
|
91
|
-
error = new_error
|
92
|
-
end
|
76
|
+
fit_predict(x)
|
93
77
|
self
|
94
78
|
end
|
95
79
|
|
@@ -100,12 +84,11 @@ module Rumale
|
|
100
84
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
101
85
|
def fit_predict(x)
|
102
86
|
check_sample_array(x)
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
)
|
108
|
-
kmeans.fit_predict(@embedding.expand_dims(1))
|
87
|
+
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
88
|
+
|
89
|
+
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
90
|
+
@embedding, @n_iter = embedded_space(affinity_mat, @params[:max_iter], @params[:tol].fdiv(affinity_mat.shape[0]))
|
91
|
+
@labels = line_kmeans_clustering(@embedding)
|
109
92
|
end
|
110
93
|
|
111
94
|
# Dump marshal data.
|
@@ -113,6 +96,7 @@ module Rumale
|
|
113
96
|
def marshal_dump
|
114
97
|
{ params: @params,
|
115
98
|
embedding: @embedding,
|
99
|
+
labels: @labels,
|
116
100
|
n_iter: @n_iter }
|
117
101
|
end
|
118
102
|
|
@@ -121,9 +105,42 @@ module Rumale
|
|
121
105
|
def marshal_load(obj)
|
122
106
|
@params = obj[:params]
|
123
107
|
@embedding = obj[:embedding]
|
108
|
+
@labels = obj[:labels]
|
124
109
|
@n_iter = obj[:n_iter]
|
125
110
|
nil
|
126
111
|
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def embedded_space(affinity_mat, max_iter, tol)
|
116
|
+
affinity_mat[affinity_mat.diag_indices] = 0.0
|
117
|
+
|
118
|
+
degrees = affinity_mat.sum(axis: 1)
|
119
|
+
normalized_affinity_mat = (1.0 / degrees).diag.dot(affinity_mat)
|
120
|
+
|
121
|
+
iters = 0
|
122
|
+
embedded_line = degrees / degrees.sum
|
123
|
+
n_samples = embedded_line.shape[0]
|
124
|
+
error = Numo::DFloat.ones(n_samples)
|
125
|
+
max_iter.times do |t|
|
126
|
+
iters = t + 1
|
127
|
+
new_embedded_line = normalized_affinity_mat.dot(embedded_line)
|
128
|
+
new_embedded_line /= new_embedded_line.abs.sum
|
129
|
+
new_error = (new_embedded_line - embedded_line).abs
|
130
|
+
break if (new_error - error).abs.max <= tol
|
131
|
+
embedded_line = new_embedded_line
|
132
|
+
error = new_error
|
133
|
+
end
|
134
|
+
|
135
|
+
[embedded_line, iters]
|
136
|
+
end
|
137
|
+
|
138
|
+
def line_kmeans_clustering(vec)
|
139
|
+
Rumale::Clustering::KMeans.new(
|
140
|
+
n_clusters: @params[:n_clusters], init: @params[:init],
|
141
|
+
max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
|
142
|
+
).fit_predict(vec.expand_dims(1))
|
143
|
+
end
|
127
144
|
end
|
128
145
|
end
|
129
146
|
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ostruct'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
require 'rumale/base/cluster_analyzer'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Clustering
|
10
|
+
# SingleLinkage is a class that implements hierarchical cluster analysis with single linakge method.
|
11
|
+
# This class is used internally for HDBSCAN.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# analyzer = Rumale::Clustering::SingleLinkage.new(n_clusters: 2)
|
15
|
+
# cluster_labels = analyzer.fit_predict(samples)
|
16
|
+
#
|
17
|
+
# *Reference*
|
18
|
+
# - D. Mullner, "Modern hierarchical, agglomerative clustering algorithms," arXiv:1109.2378, 2011.
|
19
|
+
class SingleLinkage
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::ClusterAnalyzer
|
22
|
+
|
23
|
+
# Return the cluster labels.
|
24
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
25
|
+
attr_reader :labels
|
26
|
+
|
27
|
+
# Return the hierarchical structure.
|
28
|
+
# @return [Array<OpenStruct>] (shape: [n_samples - 1])
|
29
|
+
attr_reader :hierarchy
|
30
|
+
|
31
|
+
# Create a new cluster analyzer with single linkage algorithm.
|
32
|
+
#
|
33
|
+
# @param n_clusters [Integer] The number of clusters.
|
34
|
+
# @param metric [String] The metric to calculate the distances.
|
35
|
+
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
36
|
+
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
37
|
+
def initialize(n_clusters: 2, metric: 'euclidean')
|
38
|
+
check_params_integer(n_clusters: n_clusters)
|
39
|
+
check_params_string(metric: metric)
|
40
|
+
@params = {}
|
41
|
+
@params[:n_clusters] = n_clusters
|
42
|
+
@params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
|
43
|
+
@labels = nil
|
44
|
+
@hierarchy = nil
|
45
|
+
end
|
46
|
+
|
47
|
+
# Analysis clusters with given training data.
|
48
|
+
#
|
49
|
+
# @overload fit(x) -> SingleLinkage
|
50
|
+
#
|
51
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
52
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
53
|
+
# @return [SingleLinkage] The learned cluster analyzer itself.
|
54
|
+
def fit(x, _y = nil)
|
55
|
+
check_sample_array(x)
|
56
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
57
|
+
fit_predict(x)
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
# Analysis clusters and assign samples to clusters.
|
62
|
+
#
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
|
64
|
+
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
65
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
66
|
+
def fit_predict(x)
|
67
|
+
check_sample_array(x)
|
68
|
+
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
69
|
+
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
70
|
+
@labels = partial_fit(distance_mat)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Dump marshal data.
|
74
|
+
# @return [Hash] The marshal data.
|
75
|
+
def marshal_dump
|
76
|
+
{ params: @params,
|
77
|
+
labels: @labels,
|
78
|
+
hierarchy: @hierarchy }
|
79
|
+
end
|
80
|
+
|
81
|
+
# Load marshal data.
|
82
|
+
# @return [nil]
|
83
|
+
def marshal_load(obj)
|
84
|
+
@params = obj[:params]
|
85
|
+
@labels = obj[:labels]
|
86
|
+
@hierarchy = obj[:hierarchy]
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
# @!visibility private
|
93
|
+
class UnionFind
|
94
|
+
def initialize(n)
|
95
|
+
@parent = Numo::Int32.zeros(2 * n - 1) - 1
|
96
|
+
@size = Numo::Int32.hstack([Numo::Int32.ones(n), Numo::Int32.zeros(n - 1)])
|
97
|
+
@next_label = n
|
98
|
+
end
|
99
|
+
|
100
|
+
# @!visibility private
|
101
|
+
def union(x, y)
|
102
|
+
size = @size[x] + @size[y]
|
103
|
+
@parent[x] = @next_label
|
104
|
+
@parent[y] = @next_label
|
105
|
+
@size[@next_label] = size
|
106
|
+
@next_label += 1
|
107
|
+
size
|
108
|
+
end
|
109
|
+
|
110
|
+
# @!visibility private
|
111
|
+
def find(x)
|
112
|
+
p = x
|
113
|
+
x = @parent[x] while @parent[x] != -1
|
114
|
+
while @parent[p] != x
|
115
|
+
p = @parent[p]
|
116
|
+
@parent[p] = x
|
117
|
+
end
|
118
|
+
x
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
private_constant :UnionFind
|
123
|
+
|
124
|
+
def partial_fit(distance_mat)
|
125
|
+
mst = minimum_spanning_tree(distance_mat)
|
126
|
+
@hierarchy = single_linkage_hierarchy(mst)
|
127
|
+
flatten(@hierarchy, @params[:n_clusters])
|
128
|
+
end
|
129
|
+
|
130
|
+
def minimum_spanning_tree(complete_graph)
|
131
|
+
n_samples = complete_graph.shape[0]
|
132
|
+
n_edges = n_samples - 1
|
133
|
+
curr_weights = Numo::DFloat.zeros(n_samples) + Float::INFINITY
|
134
|
+
curr_labels = Numo::Int32.new(n_samples).seq
|
135
|
+
next_node = 0
|
136
|
+
mst = Array.new(n_edges) do
|
137
|
+
curr_node = next_node
|
138
|
+
target = curr_labels.ne(curr_node)
|
139
|
+
curr_labels = curr_labels[target]
|
140
|
+
curr_weights = Numo::DFloat.minimum(curr_weights[target], complete_graph[curr_node, curr_labels])
|
141
|
+
next_node = curr_labels[curr_weights.min_index]
|
142
|
+
weight = curr_weights.min
|
143
|
+
OpenStruct.new(x: curr_node, y: next_node, weight: weight)
|
144
|
+
end
|
145
|
+
mst.sort! { |a, b| a.weight <=> b.weight }
|
146
|
+
end
|
147
|
+
|
148
|
+
def single_linkage_hierarchy(mst)
|
149
|
+
n_edges = mst.size
|
150
|
+
n_nodes = n_edges + 1
|
151
|
+
uf = UnionFind.new(n_nodes)
|
152
|
+
Array.new(n_edges) do |n|
|
153
|
+
x_root = uf.find(mst[n].x)
|
154
|
+
y_root = uf.find(mst[n].y)
|
155
|
+
x_root, y_root = [y_root, x_root] unless x_root < y_root
|
156
|
+
weight = mst[n].weight
|
157
|
+
n_samples = uf.union(x_root, y_root)
|
158
|
+
OpenStruct.new(x: x_root, y: y_root, weight: weight, n_elements: n_samples)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def descedent_ids(hierarchy_, start_node)
|
163
|
+
n_samples = hierarchy_.size + 1
|
164
|
+
return [start_node] if start_node < n_samples
|
165
|
+
|
166
|
+
res = []
|
167
|
+
indices = [start_node]
|
168
|
+
n_indices = 1
|
169
|
+
while n_indices.positive?
|
170
|
+
idx = indices.pop
|
171
|
+
if idx < n_samples
|
172
|
+
res.push(idx)
|
173
|
+
n_indices -= 1
|
174
|
+
else
|
175
|
+
indices.push(hierarchy_[idx - n_samples].x)
|
176
|
+
indices.push(hierarchy_[idx - n_samples].y)
|
177
|
+
n_indices += 1
|
178
|
+
end
|
179
|
+
end
|
180
|
+
res
|
181
|
+
end
|
182
|
+
|
183
|
+
def flatten(hierarchy_, n_clusters)
|
184
|
+
n_samples = hierarchy_.size + 1
|
185
|
+
return Numo::Int32.zeros(n_samples) if n_clusters < 2
|
186
|
+
|
187
|
+
nodes = [-([hierarchy_[-1].x, hierarchy_[-1].y].max + 1)]
|
188
|
+
(n_clusters - 1).times do
|
189
|
+
children = hierarchy_[-nodes[0] - n_samples]
|
190
|
+
nodes.push(-children.x)
|
191
|
+
nodes.push(-children.y)
|
192
|
+
nodes.sort!.shift
|
193
|
+
end
|
194
|
+
res = Numo::Int32.zeros(n_samples)
|
195
|
+
nodes.each_with_index { |sid, cluster_id| res[descedent_ids(hierarchy_, -sid)] = cluster_id }
|
196
|
+
res
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/cluster_analyzer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
require 'rumale/preprocessing/l2_normalizer'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Clustering
|
10
|
+
# SpectralClustering is a class that implements the normalized spectral clustering.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
#
|
15
|
+
# analyzer = Rumale::Clustering::SpectralClustering.new(n_clusters: 10, gamma: 8.0)
|
16
|
+
# cluster_labels = analyzer.fit_predict(samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - A Y. Ng, M I. Jordan, and Y. Weiss, "On Spectral Clustering: Analyssi and an algorithm," Proc. NIPS'01, pp. 849--856, 2001.
|
20
|
+
# - U von Luxburg, "A tutorial on spectral clustering," Statistics and Computing, Vol. 17 (4), pp. 395--416, 2007.
|
21
|
+
class SpectralClustering
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::ClusterAnalyzer
|
24
|
+
|
25
|
+
# Return the data in embedded space.
|
26
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_clusters])
|
27
|
+
attr_reader :embedding
|
28
|
+
|
29
|
+
# Return the cluster labels.
|
30
|
+
# @return [Numo::Int32] (shape: [n_samples])
|
31
|
+
attr_reader :labels
|
32
|
+
|
33
|
+
# Create a new cluster analyzer with normalized spectral clustering.
|
34
|
+
#
|
35
|
+
# @param n_clusters [Integer] The number of clusters.
|
36
|
+
# @param affinity [String] The representation of affinity matrix ('rbf' or 'precomputed').
|
37
|
+
# If affinity = 'rbf', the class performs the normalized spectral clustering with the fully connected graph weighted by rbf kernel.
|
38
|
+
# @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
|
39
|
+
# If affinity = 'precomputed', this parameter is ignored.
|
40
|
+
# @param init [String] The initialization method for centroids of K-Means clustering ('random' or 'k-means++').
|
41
|
+
# @param max_iter [Integer] The maximum number of iterations for K-Means clustering.
|
42
|
+
# @param tol [Float] The tolerance of termination criterion for K-Means clustering.
|
43
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
|
+
def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
|
45
|
+
check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
|
46
|
+
check_params_float(tol: tol)
|
47
|
+
check_params_string(affinity: affinity, init: init)
|
48
|
+
check_params_type_or_nil(Float, gamma: gamma)
|
49
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
|
+
check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
51
|
+
@params = {}
|
52
|
+
@params[:n_clusters] = n_clusters
|
53
|
+
@params[:affinity] = affinity
|
54
|
+
@params[:gamma] = gamma
|
55
|
+
@params[:init] = init == 'random' ? 'random' : 'k-means++'
|
56
|
+
@params[:max_iter] = max_iter
|
57
|
+
@params[:tol] = tol
|
58
|
+
@params[:random_seed] = random_seed
|
59
|
+
@params[:random_seed] ||= srand
|
60
|
+
@embedding = nil
|
61
|
+
@labels = nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# Analysis clusters with given training data.
|
65
|
+
# To execute this method, Numo::Linalg must be loaded.
|
66
|
+
#
|
67
|
+
# @overload fit(x) -> SpectralClustering
|
68
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
69
|
+
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
70
|
+
# @return [SpectralClustering] The learned cluster analyzer itself.
|
71
|
+
def fit(x, _y = nil)
|
72
|
+
check_sample_array(x)
|
73
|
+
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
74
|
+
raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
75
|
+
|
76
|
+
fit_predict(x)
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
# Analysis clusters and assign samples to clusters.
|
81
|
+
# To execute this method, Numo::Linalg must be loaded.
|
82
|
+
#
|
83
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
84
|
+
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
85
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
86
|
+
def fit_predict(x)
|
87
|
+
check_sample_array(x)
|
88
|
+
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
89
|
+
raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
90
|
+
|
91
|
+
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
92
|
+
@embedding = embedded_space(affinity_mat, @params[:n_clusters])
|
93
|
+
normalized_embedding = Rumale::Preprocessing::L2Normalizer.new.fit_transform(@embedding)
|
94
|
+
@labels = kmeans_clustering(normalized_embedding)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Dump marshal data.
|
98
|
+
# @return [Hash] The marshal data.
|
99
|
+
def marshal_dump
|
100
|
+
{ params: @params,
|
101
|
+
embedding: @embedding,
|
102
|
+
labels: @labels }
|
103
|
+
end
|
104
|
+
|
105
|
+
# Load marshal data.
|
106
|
+
# @return [nil]
|
107
|
+
def marshal_load(obj)
|
108
|
+
@params = obj[:params]
|
109
|
+
@embedding = obj[:embedding]
|
110
|
+
@labels = obj[:labels]
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def embedded_space(affinity_mat, n_clusters)
|
117
|
+
affinity_mat[affinity_mat.diag_indices] = 0.0
|
118
|
+
degrees = 1.0 / Numo::NMath.sqrt(affinity_mat.sum(axis: 1))
|
119
|
+
laplacian_mat = degrees.diag.dot(affinity_mat).dot(degrees.diag)
|
120
|
+
|
121
|
+
n_samples = affinity_mat.shape[0]
|
122
|
+
_, eig_vecs = Numo::Linalg.eigh(laplacian_mat, vals_range: (n_samples - n_clusters)...n_samples)
|
123
|
+
eig_vecs.reverse(1).dup
|
124
|
+
end
|
125
|
+
|
126
|
+
def kmeans_clustering(x)
|
127
|
+
Rumale::Clustering::KMeans.new(
|
128
|
+
n_clusters: @params[:n_clusters], init: @params[:init],
|
129
|
+
max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
|
130
|
+
).fit_predict(x)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -8,6 +8,8 @@ module Rumale
|
|
8
8
|
# KernelPCA is a class that implements Kernel Principal Component Analysis.
|
9
9
|
#
|
10
10
|
# @example
|
11
|
+
# require 'numo/linalg/autoloader'
|
12
|
+
#
|
11
13
|
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
12
14
|
# kpca = Rumale::KernelMachine::KernelPCA(n_components: 2)
|
13
15
|
# mapped_traininig_samples = kpca.fit_transform(kernel_mat_train)
|
@@ -8,6 +8,8 @@ module Rumale
|
|
8
8
|
# KernelRidge is a class that implements kernel ridge regression.
|
9
9
|
#
|
10
10
|
# @example
|
11
|
+
# require 'numo/linalg/autoloader'
|
12
|
+
#
|
11
13
|
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
12
14
|
# kridge = Rumale::KernelMachine::KernelRidge.new(reg_param: 1.0)
|
13
15
|
# kridge.fit(kernel_mat_train, traininig_values)
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
20
20
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
21
21
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
22
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
22
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
23
23
|
Multidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA, and Non-negative Matrix Factorization.
|
24
24
|
MSG
|
25
25
|
spec.homepage = 'https://github.com/yoshoku/rumale'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -128,7 +128,7 @@ description: |
|
|
128
128
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
129
129
|
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
130
130
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
131
|
-
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
131
|
+
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
|
132
132
|
Multidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA, and Non-negative Matrix Factorization.
|
133
133
|
email:
|
134
134
|
- yoshoku@outlook.com
|
@@ -163,10 +163,13 @@ files:
|
|
163
163
|
- lib/rumale/base/transformer.rb
|
164
164
|
- lib/rumale/clustering/dbscan.rb
|
165
165
|
- lib/rumale/clustering/gaussian_mixture.rb
|
166
|
+
- lib/rumale/clustering/hdbscan.rb
|
166
167
|
- lib/rumale/clustering/k_means.rb
|
167
168
|
- lib/rumale/clustering/k_medoids.rb
|
168
169
|
- lib/rumale/clustering/power_iteration.rb
|
170
|
+
- lib/rumale/clustering/single_linkage.rb
|
169
171
|
- lib/rumale/clustering/snn.rb
|
172
|
+
- lib/rumale/clustering/spectral_clustering.rb
|
170
173
|
- lib/rumale/dataset.rb
|
171
174
|
- lib/rumale/decomposition/nmf.rb
|
172
175
|
- lib/rumale/decomposition/pca.rb
|