rumale-clustering 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+ require 'rumale/clustering/k_means'
8
+
9
+ module Rumale
10
+ module Clustering
11
+ # PowerIteration is a class that implements power iteration clustering.
12
+ #
13
+ # @example
14
+ # require 'rumale/clustering/power_iteration'
15
+ #
16
+ # analyzer = Rumale::Clustering::PowerIteration.new(n_clusters: 10, gamma: 8.0, max_iter: 1000)
17
+ # cluster_labels = analyzer.fit_predict(samples)
18
+ #
19
+ # *Reference*
20
+ # - Lin, F., and Cohen, W W., "Power Iteration Clustering," Proc. ICML'10, pp. 655--662, 2010.
21
+ class PowerIteration < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::ClusterAnalyzer
23
+
24
+ # Return the data in embedded space.
25
+ # @return [Numo::DFloat] (shape: [n_samples])
26
+ attr_reader :embedding
27
+
28
+ # Return the cluster labels.
29
+ # @return [Numo::Int32] (shape: [n_samples])
30
+ attr_reader :labels
31
+
32
+ # Return the number of iterations run for optimization
33
+ # @return [Integer]
34
+ attr_reader :n_iter
35
+
36
+ # Create a new cluster analyzer with power iteration clustering.
37
+ #
38
+ # @param n_clusters [Integer] The number of clusters.
39
+ # @param affinity [String] The representation of affinity matrix ('rbf' or 'precomputed').
40
+ # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
41
+ # If affinity = 'precomputed', this parameter is ignored.
42
+ # @param init [String] The initialization method for centroids of K-Means clustering ('random' or 'k-means++').
43
+ # @param max_iter [Integer] The maximum number of iterations.
44
+ # @param tol [Float] The tolerance of termination criterion.
45
+ # @param eps [Float] A small value close to zero to avoid zero division error.
46
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
47
+ def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++',
48
+ max_iter: 1000, tol: 1.0e-8, eps: 1.0e-5, random_seed: nil)
49
+ super()
50
+ @params = {
51
+ n_clusters: n_clusters,
52
+ affinity: affinity,
53
+ gamma: gamma,
54
+ init: (init == 'random' ? 'random' : 'k-means++'),
55
+ max_iter: max_iter,
56
+ tol: tol,
57
+ eps: eps,
58
+ random_seed: (random_seed || srand)
59
+ }
60
+ end
61
+
62
+ # Analysis clusters with given training data.
63
+ #
64
+ # @overload fit(x) -> PowerIteration
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
66
+ # If the affinity is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
67
+ # @return [PowerIteration] The learned cluster analyzer itself.
68
+ def fit(x, _y = nil)
69
+ x = ::Rumale::Validation.check_convert_sample_array(x)
70
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
71
+
72
+ fit_predict(x)
73
+ self
74
+ end
75
+
76
+ # Analysis clusters and assign samples to clusters.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
79
+ # If the affinity is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
80
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
81
+ def fit_predict(x)
82
+ x = ::Rumale::Validation.check_convert_sample_array(x)
83
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
84
+
85
+ affinity_mat = @params[:affinity] == 'precomputed' ? x : ::Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
86
+ @embedding, @n_iter = embedded_space(affinity_mat, @params[:max_iter], @params[:tol].fdiv(affinity_mat.shape[0]))
87
+ @labels = line_kmeans_clustering(@embedding)
88
+ end
89
+
90
+ private
91
+
92
+ def check_invalid_array_shape(x)
93
+ @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
94
+ end
95
+
96
+ def embedded_space(affinity_mat, max_iter, tol)
97
+ affinity_mat[affinity_mat.diag_indices] = 0.0
98
+
99
+ degrees = affinity_mat.sum(axis: 1)
100
+ normalized_affinity_mat = (1.0 / degrees).diag.dot(affinity_mat)
101
+
102
+ iters = 0
103
+ embedded_line = degrees / degrees.sum
104
+ n_samples = embedded_line.shape[0]
105
+ error = Numo::DFloat.ones(n_samples)
106
+ max_iter.times do |t|
107
+ iters = t + 1
108
+ new_embedded_line = normalized_affinity_mat.dot(embedded_line)
109
+ new_embedded_line /= new_embedded_line.abs.sum
110
+ new_error = (new_embedded_line - embedded_line).abs
111
+ break if (new_error - error).abs.max <= tol
112
+
113
+ embedded_line = new_embedded_line
114
+ error = new_error
115
+ end
116
+
117
+ [embedded_line, iters]
118
+ end
119
+
120
+ def line_kmeans_clustering(vec)
121
+ ::Rumale::Clustering::KMeans.new(
122
+ n_clusters: @params[:n_clusters], init: @params[:init],
123
+ max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
124
+ ).fit_predict(vec.expand_dims(1))
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module Clustering
10
+ # SingleLinkage is a class that implements hierarchical cluster analysis with single linakge method.
11
+ # This class is used internally for HDBSCAN.
12
+ #
13
+ # @example
14
+ # require 'rumale/clustering/single_linkage'
15
+ #
16
+ # analyzer = Rumale::Clustering::SingleLinkage.new(n_clusters: 2)
17
+ # cluster_labels = analyzer.fit_predict(samples)
18
+ #
19
+ # *Reference*
20
+ # - Mullner, D., "Modern hierarchical, agglomerative clustering algorithms," arXiv:1109.2378, 2011.
21
+ class SingleLinkage < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::ClusterAnalyzer
23
+
24
+ # Return the cluster labels.
25
+ # @return [Numo::Int32] (shape: [n_samples])
26
+ attr_reader :labels
27
+
28
+ # Return the hierarchical structure.
29
+ # @return [Array<SingleLinkage::Node>] (shape: [n_samples - 1])
30
+ attr_reader :hierarchy
31
+
32
+ # Create a new cluster analyzer with single linkage algorithm.
33
+ #
34
+ # @param n_clusters [Integer] The number of clusters.
35
+ # @param metric [String] The metric to calculate the distances.
36
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
37
+ # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
38
+ def initialize(n_clusters: 2, metric: 'euclidean')
39
+ super()
40
+ @params = {
41
+ n_clusters: n_clusters,
42
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
43
+ }
44
+ end
45
+
46
+ # Analysis clusters with given training data.
47
+ #
48
+ # @overload fit(x) -> SingleLinkage
49
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
50
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
51
+ # @return [SingleLinkage] The learned cluster analyzer itself.
52
+ def fit(x, _y = nil)
53
+ x = ::Rumale::Validation.check_convert_sample_array(x)
54
+ raise ArgumentError, 'the input distance matrix should be square' if check_invalid_array_shape(x)
55
+
56
+ fit_predict(x)
57
+ self
58
+ end
59
+
60
+ # Analysis clusters and assign samples to clusters.
61
+ #
62
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
63
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
64
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
65
+ def fit_predict(x)
66
+ x = ::Rumale::Validation.check_convert_sample_array(x)
67
+ raise ArgumentError, 'the input distance matrix should be square' if check_invalid_array_shape(x)
68
+
69
+ distance_mat = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x)
70
+ @labels = partial_fit(distance_mat)
71
+ end
72
+
73
+ private
74
+
75
+ def check_invalid_array_shape(x)
76
+ @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
77
+ end
78
+
79
+ # @!visibility private
80
+ class UnionFind
81
+ def initialize(n)
82
+ @parent = Numo::Int32.zeros(2 * n - 1) - 1
83
+ @size = Numo::Int32.hstack([Numo::Int32.ones(n), Numo::Int32.zeros(n - 1)])
84
+ @next_label = n
85
+ end
86
+
87
+ # @!visibility private
88
+ def union(x, y)
89
+ size = @size[x] + @size[y]
90
+ @parent[x] = @next_label
91
+ @parent[y] = @next_label
92
+ @size[@next_label] = size
93
+ @next_label += 1
94
+ size
95
+ end
96
+
97
+ # @!visibility private
98
+ def find(x)
99
+ p = x
100
+ x = @parent[x] while @parent[x] != -1
101
+ while @parent[p] != x
102
+ p = @parent[p]
103
+ @parent[p] = x
104
+ end
105
+ x
106
+ end
107
+ end
108
+
109
+ # @!visibility private
110
+ class Node
111
+ # @!visibility private
112
+ attr_reader :x, :y, :weight, :n_elements
113
+
114
+ # @!visibility private
115
+ def initialize(x:, y:, weight:, n_elements: 0)
116
+ @x = x
117
+ @y = y
118
+ @weight = weight
119
+ @n_elements = n_elements
120
+ end
121
+
122
+ # @!visibility private
123
+ def ==(other)
124
+ x == other.x && y == other.y && weight == other.weight && n_elements == other.n_elements
125
+ end
126
+ end
127
+
128
+ private_constant :UnionFind, :Node
129
+
130
+ def partial_fit(distance_mat)
131
+ mst = minimum_spanning_tree(distance_mat)
132
+ @hierarchy = single_linkage_hierarchy(mst)
133
+ flatten(@hierarchy, @params[:n_clusters])
134
+ end
135
+
136
+ def minimum_spanning_tree(complete_graph)
137
+ n_samples = complete_graph.shape[0]
138
+ n_edges = n_samples - 1
139
+ curr_weights = Numo::DFloat.zeros(n_samples) + Float::INFINITY
140
+ curr_labels = Numo::Int32.new(n_samples).seq
141
+ next_node = 0
142
+ mst = Array.new(n_edges) do
143
+ curr_node = next_node
144
+ target = curr_labels.ne(curr_node)
145
+ curr_labels = curr_labels[target]
146
+ curr_weights = Numo::DFloat.minimum(curr_weights[target], complete_graph[curr_node, curr_labels])
147
+ next_node = curr_labels[curr_weights.min_index]
148
+ weight = curr_weights.min
149
+ Node.new(x: curr_node, y: next_node, weight: weight)
150
+ end
151
+ mst.sort! { |a, b| a.weight <=> b.weight }
152
+ end
153
+
154
+ def single_linkage_hierarchy(mst)
155
+ n_edges = mst.size
156
+ n_nodes = n_edges + 1
157
+ uf = UnionFind.new(n_nodes)
158
+ Array.new(n_edges) do |n|
159
+ x_root = uf.find(mst[n].x)
160
+ y_root = uf.find(mst[n].y)
161
+ x_root, y_root = [y_root, x_root] unless x_root < y_root
162
+ weight = mst[n].weight
163
+ n_samples = uf.union(x_root, y_root)
164
+ Node.new(x: x_root, y: y_root, weight: weight, n_elements: n_samples)
165
+ end
166
+ end
167
+
168
+ def descedent_ids(hierarchy_, start_node)
169
+ n_samples = hierarchy_.size + 1
170
+ return [start_node] if start_node < n_samples
171
+
172
+ res = []
173
+ indices = [start_node]
174
+ n_indices = 1
175
+ while n_indices.positive?
176
+ idx = indices.pop
177
+ if idx < n_samples
178
+ res.push(idx)
179
+ n_indices -= 1
180
+ else
181
+ indices.push(hierarchy_[idx - n_samples].x)
182
+ indices.push(hierarchy_[idx - n_samples].y)
183
+ n_indices += 1
184
+ end
185
+ end
186
+ res
187
+ end
188
+
189
+ def flatten(hierarchy_, n_clusters)
190
+ n_samples = hierarchy_.size + 1
191
+ return Numo::Int32.zeros(n_samples) if n_clusters < 2
192
+
193
+ nodes = [-([hierarchy_[-1].x, hierarchy_[-1].y].max + 1)]
194
+ (n_clusters - 1).times do
195
+ children = hierarchy_[-nodes[0] - n_samples]
196
+ nodes.push(-children.x)
197
+ nodes.push(-children.y)
198
+ nodes.sort!.shift
199
+ end
200
+ res = Numo::Int32.zeros(n_samples)
201
+ nodes.each_with_index { |sid, cluster_id| res[descedent_ids(hierarchy_, -sid)] = cluster_id }
202
+ res
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/pairwise_metric'
4
+ require 'rumale/clustering/dbscan'
5
+
6
+ module Rumale
7
+ module Clustering
8
+ # SNN is a class that implements Shared Nearest Neighbor cluster analysis.
9
+ # The SNN method is a variation of DBSCAN that uses similarity based on k-nearest neighbors as a metric.
10
+ #
11
+ # @example
12
+ # require 'rumale/clustering/snn'
13
+ #
14
+ # analyzer = Rumale::Clustering::SNN.new(n_neighbros: 10, eps: 5, min_samples: 5)
15
+ # cluster_labels = analyzer.fit_predict(samples)
16
+ #
17
+ # *Reference*
18
+ # - Ertoz, L., Steinbach, M., and Kumar, V., "Finding Clusters of Different Sizes, Shapes, and Densities in Noisy, High Dimensional Data," Proc. SDM'03, pp. 47--58, 2003.
19
+ # - Houle, M E., Kriegel, H-P., Kroger, P., Schubert, E., and Zimek, A., "Can Shared-Neighbor Distances Defeat the Curse of Dimensionality?," Proc. SSDBM'10, pp. 482--500, 2010.
20
+ class SNN < DBSCAN
21
+ # Create a new cluster analyzer with Shared Neareset Neighbor method.
22
+ #
23
+ # @param n_neighbors [Integer] The number of neighbors to be used for finding k-nearest neighbors.
24
+ # @param eps [Integer] The threshold value for finding connected components based on similarity.
25
+ # @param min_samples [Integer] The number of neighbor samples to be used for the criterion whether a point is a core point.
26
+ # @param metric [String] The metric to calculate the distances.
27
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
28
+ # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
29
+ def initialize(n_neighbors: 10, eps: 5, min_samples: 5, metric: 'euclidean') # rubocop:disable Lint/MissingSuper
30
+ @params = {
31
+ n_neighbors: n_neighbors,
32
+ eps: eps,
33
+ min_samples: min_samples,
34
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
35
+ }
36
+ end
37
+
38
+ # Analysis clusters with given training data.
39
+ #
40
+ # @overload fit(x) -> SNN
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
42
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
43
+ # @return [SNN] The learned cluster analyzer itself.
44
+ def fit(x, _y = nil)
45
+ super
46
+ end
47
+
48
+ # Analysis clusters and assign samples to clusters.
49
+ #
50
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
51
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
52
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
53
+ def fit_predict(x) # rubocop:disable Lint/UselessMethodDefinition
54
+ super
55
+ end
56
+
57
+ private
58
+
59
+ def calc_pairwise_metrics(x)
60
+ distance_mat = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x)
61
+ n_samples = distance_mat.shape[0]
62
+ adjacency_mat = Numo::DFloat.zeros(n_samples, n_samples)
63
+ n_samples.times do |n|
64
+ neighbor_ids = distance_mat[n, true].sort_index[0...@params[:n_neighbors]]
65
+ adjacency_mat[n, neighbor_ids] = 1
66
+ end
67
+ adjacency_mat.dot(adjacency_mat.transpose)
68
+ end
69
+
70
+ def region_query(similarity_arr)
71
+ similarity_arr.gt(@params[:eps]).where.to_a
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/utils'
7
+ require 'rumale/validation'
8
+ require 'rumale/clustering/k_means'
9
+
10
+ module Rumale
11
+ module Clustering
12
+ # SpectralClustering is a class that implements the normalized spectral clustering.
13
+ #
14
+ # @example
15
+ # require 'numo/linalg/autoloader'
16
+ # require 'rumale/clustering/spectral_clustering'
17
+ #
18
+ # analyzer = Rumale::Clustering::SpectralClustering.new(n_clusters: 10, gamma: 8.0)
19
+ # cluster_labels = analyzer.fit_predict(samples)
20
+ #
21
+ # *Reference*
22
+ # - Ng, A Y., Jordan, M I., and Weiss, Y., "On Spectral Clustering: Analyssi and an algorithm," Proc. NIPS'01, pp. 849--856, 2001.
23
+ # - von Luxburg, U., "A tutorial on spectral clustering," Statistics and Computing, Vol. 17 (4), pp. 395--416, 2007.
24
+ class SpectralClustering < ::Rumale::Base::Estimator
25
+ include ::Rumale::Base::ClusterAnalyzer
26
+
27
+ # Return the data in embedded space.
28
+ # @return [Numo::DFloat] (shape: [n_samples, n_clusters])
29
+ attr_reader :embedding
30
+
31
+ # Return the cluster labels.
32
+ # @return [Numo::Int32] (shape: [n_samples])
33
+ attr_reader :labels
34
+
35
+ # Create a new cluster analyzer with normalized spectral clustering.
36
+ #
37
+ # @param n_clusters [Integer] The number of clusters.
38
+ # @param affinity [String] The representation of affinity matrix ('rbf' or 'precomputed').
39
+ # If affinity = 'rbf', the class performs the normalized spectral clustering with the fully connected graph weighted by rbf kernel.
40
+ # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
41
+ # If affinity = 'precomputed', this parameter is ignored.
42
+ # @param init [String] The initialization method for centroids of K-Means clustering ('random' or 'k-means++').
43
+ # @param max_iter [Integer] The maximum number of iterations for K-Means clustering.
44
+ # @param tol [Float] The tolerance of termination criterion for K-Means clustering.
45
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
46
+ def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
47
+ super()
48
+ @params = {
49
+ n_clusters: n_clusters,
50
+ affinity: affinity,
51
+ gamma: gamma,
52
+ init: (init == 'random' ? 'random' : 'k-means++'),
53
+ max_iter: max_iter,
54
+ tol: tol,
55
+ random_seed: (random_seed || srand)
56
+ }
57
+ end
58
+
59
+ # Analysis clusters with given training data.
60
+ # To execute this method, Numo::Linalg must be loaded.
61
+ #
62
+ # @overload fit(x) -> SpectralClustering
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
64
+ # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
65
+ # @return [SpectralClustering] The learned cluster analyzer itself.
66
+ def fit(x, _y = nil)
67
+ x = ::Rumale::Validation.check_convert_sample_array(x)
68
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
69
+
70
+ raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
71
+
72
+ fit_predict(x)
73
+ self
74
+ end
75
+
76
+ # Analysis clusters and assign samples to clusters.
77
+ # To execute this method, Numo::Linalg must be loaded.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
80
+ # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
81
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
82
+ def fit_predict(x)
83
+ x = ::Rumale::Validation.check_convert_sample_array(x)
84
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
85
+
86
+ unless enable_linalg?(warning: false)
87
+ raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded'
88
+ end
89
+
90
+ affinity_mat = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
91
+ @embedding = embedded_space(affinity_mat, @params[:n_clusters])
92
+ normalized_embedding = ::Rumale::Utils.normalize(@embedding, 'l2')
93
+ @labels = kmeans_clustering(normalized_embedding)
94
+ end
95
+
96
+ private
97
+
98
+ def check_invalid_array_shape(x)
99
+ @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
100
+ end
101
+
102
+ def embedded_space(affinity_mat, n_clusters)
103
+ affinity_mat[affinity_mat.diag_indices] = 0.0
104
+ degrees = 1.0 / Numo::NMath.sqrt(affinity_mat.sum(axis: 1))
105
+ laplacian_mat = degrees.diag.dot(affinity_mat).dot(degrees.diag)
106
+
107
+ n_samples = affinity_mat.shape[0]
108
+ _, eig_vecs = Numo::Linalg.eigh(laplacian_mat, vals_range: (n_samples - n_clusters)...n_samples)
109
+ eig_vecs.reverse(1).dup
110
+ end
111
+
112
+ def kmeans_clustering(x)
113
+ ::Rumale::Clustering::KMeans.new(
114
+ n_clusters: @params[:n_clusters], init: @params[:init],
115
+ max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
116
+ ).fit_predict(x)
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of classes that implement cluster analysis methods.
6
+ module Clustering
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'clustering/dbscan'
6
+ require_relative 'clustering/gaussian_mixture'
7
+ require_relative 'clustering/hdbscan'
8
+ require_relative 'clustering/k_means'
9
+ require_relative 'clustering/k_medoids'
10
+ require_relative 'clustering/mini_batch_k_means'
11
+ require_relative 'clustering/power_iteration'
12
+ require_relative 'clustering/single_linkage'
13
+ require_relative 'clustering/snn'
14
+ require_relative 'clustering/spectral_clustering'
15
+ require_relative 'clustering/version'
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-clustering
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: |
42
+ Rumale::Clustering provides cluster analysis algorithms,
43
+ such as K-Means, Gaussian Mixture Model, DBSCAN, and Spectral Clustering,
44
+ with Rumale interface.
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/clustering.rb
54
+ - lib/rumale/clustering/dbscan.rb
55
+ - lib/rumale/clustering/gaussian_mixture.rb
56
+ - lib/rumale/clustering/hdbscan.rb
57
+ - lib/rumale/clustering/k_means.rb
58
+ - lib/rumale/clustering/k_medoids.rb
59
+ - lib/rumale/clustering/mini_batch_k_means.rb
60
+ - lib/rumale/clustering/power_iteration.rb
61
+ - lib/rumale/clustering/single_linkage.rb
62
+ - lib/rumale/clustering/snn.rb
63
+ - lib/rumale/clustering/spectral_clustering.rb
64
+ - lib/rumale/clustering/version.rb
65
+ homepage: https://github.com/yoshoku/rumale
66
+ licenses:
67
+ - BSD-3-Clause
68
+ metadata:
69
+ homepage_uri: https://github.com/yoshoku/rumale
70
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-clustering
71
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
72
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
73
+ rubygems_mfa_required: 'true'
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubygems_version: 3.3.26
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: Rumale::Clustering provides cluster analysis algorithms with Rumale interface.
93
+ test_files: []