rumale-clustering 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+ require 'rumale/clustering/k_means'
8
+
9
+ module Rumale
10
+ module Clustering
11
+ # PowerIteration is a class that implements power iteration clustering.
12
+ #
13
+ # @example
14
+ # require 'rumale/clustering/power_iteration'
15
+ #
16
+ # analyzer = Rumale::Clustering::PowerIteration.new(n_clusters: 10, gamma: 8.0, max_iter: 1000)
17
+ # cluster_labels = analyzer.fit_predict(samples)
18
+ #
19
+ # *Reference*
20
+ # - Lin, F., and Cohen, W W., "Power Iteration Clustering," Proc. ICML'10, pp. 655--662, 2010.
21
+ class PowerIteration < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::ClusterAnalyzer
23
+
24
+ # Return the data in embedded space.
25
+ # @return [Numo::DFloat] (shape: [n_samples])
26
+ attr_reader :embedding
27
+
28
+ # Return the cluster labels.
29
+ # @return [Numo::Int32] (shape: [n_samples])
30
+ attr_reader :labels
31
+
32
+ # Return the number of iterations run for optimization
33
+ # @return [Integer]
34
+ attr_reader :n_iter
35
+
36
+ # Create a new cluster analyzer with power iteration clustering.
37
+ #
38
+ # @param n_clusters [Integer] The number of clusters.
39
+ # @param affinity [String] The representation of affinity matrix ('rbf' or 'precomputed').
40
+ # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
41
+ # If affinity = 'precomputed', this parameter is ignored.
42
+ # @param init [String] The initialization method for centroids of K-Means clustering ('random' or 'k-means++').
43
+ # @param max_iter [Integer] The maximum number of iterations.
44
+ # @param tol [Float] The tolerance of termination criterion.
45
+ # @param eps [Float] A small value close to zero to avoid zero division error.
46
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
47
+ def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++',
48
+ max_iter: 1000, tol: 1.0e-8, eps: 1.0e-5, random_seed: nil)
49
+ super()
50
+ @params = {
51
+ n_clusters: n_clusters,
52
+ affinity: affinity,
53
+ gamma: gamma,
54
+ init: (init == 'random' ? 'random' : 'k-means++'),
55
+ max_iter: max_iter,
56
+ tol: tol,
57
+ eps: eps,
58
+ random_seed: (random_seed || srand)
59
+ }
60
+ end
61
+
62
+ # Analysis clusters with given training data.
63
+ #
64
+ # @overload fit(x) -> PowerIteration
65
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
66
+ # If the affinity is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
67
+ # @return [PowerIteration] The learned cluster analyzer itself.
68
+ def fit(x, _y = nil)
69
+ x = ::Rumale::Validation.check_convert_sample_array(x)
70
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
71
+
72
+ fit_predict(x)
73
+ self
74
+ end
75
+
76
+ # Analysis clusters and assign samples to clusters.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
79
+ # If the affinity is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
80
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
81
+ def fit_predict(x)
82
+ x = ::Rumale::Validation.check_convert_sample_array(x)
83
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
84
+
85
+ affinity_mat = @params[:affinity] == 'precomputed' ? x : ::Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
86
+ @embedding, @n_iter = embedded_space(affinity_mat, @params[:max_iter], @params[:tol].fdiv(affinity_mat.shape[0]))
87
+ @labels = line_kmeans_clustering(@embedding)
88
+ end
89
+
90
+ private
91
+
92
+ def check_invalid_array_shape(x)
93
+ @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
94
+ end
95
+
96
+ def embedded_space(affinity_mat, max_iter, tol)
97
+ affinity_mat[affinity_mat.diag_indices] = 0.0
98
+
99
+ degrees = affinity_mat.sum(axis: 1)
100
+ normalized_affinity_mat = (1.0 / degrees).diag.dot(affinity_mat)
101
+
102
+ iters = 0
103
+ embedded_line = degrees / degrees.sum
104
+ n_samples = embedded_line.shape[0]
105
+ error = Numo::DFloat.ones(n_samples)
106
+ max_iter.times do |t|
107
+ iters = t + 1
108
+ new_embedded_line = normalized_affinity_mat.dot(embedded_line)
109
+ new_embedded_line /= new_embedded_line.abs.sum
110
+ new_error = (new_embedded_line - embedded_line).abs
111
+ break if (new_error - error).abs.max <= tol
112
+
113
+ embedded_line = new_embedded_line
114
+ error = new_error
115
+ end
116
+
117
+ [embedded_line, iters]
118
+ end
119
+
120
+ def line_kmeans_clustering(vec)
121
+ ::Rumale::Clustering::KMeans.new(
122
+ n_clusters: @params[:n_clusters], init: @params[:init],
123
+ max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
124
+ ).fit_predict(vec.expand_dims(1))
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/validation'
7
+
8
+ module Rumale
9
+ module Clustering
10
+ # SingleLinkage is a class that implements hierarchical cluster analysis with single linakge method.
11
+ # This class is used internally for HDBSCAN.
12
+ #
13
+ # @example
14
+ # require 'rumale/clustering/single_linkage'
15
+ #
16
+ # analyzer = Rumale::Clustering::SingleLinkage.new(n_clusters: 2)
17
+ # cluster_labels = analyzer.fit_predict(samples)
18
+ #
19
+ # *Reference*
20
+ # - Mullner, D., "Modern hierarchical, agglomerative clustering algorithms," arXiv:1109.2378, 2011.
21
+ class SingleLinkage < ::Rumale::Base::Estimator
22
+ include ::Rumale::Base::ClusterAnalyzer
23
+
24
+ # Return the cluster labels.
25
+ # @return [Numo::Int32] (shape: [n_samples])
26
+ attr_reader :labels
27
+
28
+ # Return the hierarchical structure.
29
+ # @return [Array<SingleLinkage::Node>] (shape: [n_samples - 1])
30
+ attr_reader :hierarchy
31
+
32
+ # Create a new cluster analyzer with single linkage algorithm.
33
+ #
34
+ # @param n_clusters [Integer] The number of clusters.
35
+ # @param metric [String] The metric to calculate the distances.
36
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
37
+ # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
38
+ def initialize(n_clusters: 2, metric: 'euclidean')
39
+ super()
40
+ @params = {
41
+ n_clusters: n_clusters,
42
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
43
+ }
44
+ end
45
+
46
+ # Analysis clusters with given training data.
47
+ #
48
+ # @overload fit(x) -> SingleLinkage
49
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
50
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
51
+ # @return [SingleLinkage] The learned cluster analyzer itself.
52
+ def fit(x, _y = nil)
53
+ x = ::Rumale::Validation.check_convert_sample_array(x)
54
+ raise ArgumentError, 'the input distance matrix should be square' if check_invalid_array_shape(x)
55
+
56
+ fit_predict(x)
57
+ self
58
+ end
59
+
60
+ # Analysis clusters and assign samples to clusters.
61
+ #
62
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
63
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
64
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
65
+ def fit_predict(x)
66
+ x = ::Rumale::Validation.check_convert_sample_array(x)
67
+ raise ArgumentError, 'the input distance matrix should be square' if check_invalid_array_shape(x)
68
+
69
+ distance_mat = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x)
70
+ @labels = partial_fit(distance_mat)
71
+ end
72
+
73
+ private
74
+
75
+ def check_invalid_array_shape(x)
76
+ @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
77
+ end
78
+
79
+ # @!visibility private
80
+ class UnionFind
81
+ def initialize(n)
82
+ @parent = Numo::Int32.zeros(2 * n - 1) - 1
83
+ @size = Numo::Int32.hstack([Numo::Int32.ones(n), Numo::Int32.zeros(n - 1)])
84
+ @next_label = n
85
+ end
86
+
87
+ # @!visibility private
88
+ def union(x, y)
89
+ size = @size[x] + @size[y]
90
+ @parent[x] = @next_label
91
+ @parent[y] = @next_label
92
+ @size[@next_label] = size
93
+ @next_label += 1
94
+ size
95
+ end
96
+
97
+ # @!visibility private
98
+ def find(x)
99
+ p = x
100
+ x = @parent[x] while @parent[x] != -1
101
+ while @parent[p] != x
102
+ p = @parent[p]
103
+ @parent[p] = x
104
+ end
105
+ x
106
+ end
107
+ end
108
+
109
+ # @!visibility private
110
+ class Node
111
+ # @!visibility private
112
+ attr_reader :x, :y, :weight, :n_elements
113
+
114
+ # @!visibility private
115
+ def initialize(x:, y:, weight:, n_elements: 0)
116
+ @x = x
117
+ @y = y
118
+ @weight = weight
119
+ @n_elements = n_elements
120
+ end
121
+
122
+ # @!visibility private
123
+ def ==(other)
124
+ x == other.x && y == other.y && weight == other.weight && n_elements == other.n_elements
125
+ end
126
+ end
127
+
128
+ private_constant :UnionFind, :Node
129
+
130
+ def partial_fit(distance_mat)
131
+ mst = minimum_spanning_tree(distance_mat)
132
+ @hierarchy = single_linkage_hierarchy(mst)
133
+ flatten(@hierarchy, @params[:n_clusters])
134
+ end
135
+
136
+ def minimum_spanning_tree(complete_graph)
137
+ n_samples = complete_graph.shape[0]
138
+ n_edges = n_samples - 1
139
+ curr_weights = Numo::DFloat.zeros(n_samples) + Float::INFINITY
140
+ curr_labels = Numo::Int32.new(n_samples).seq
141
+ next_node = 0
142
+ mst = Array.new(n_edges) do
143
+ curr_node = next_node
144
+ target = curr_labels.ne(curr_node)
145
+ curr_labels = curr_labels[target]
146
+ curr_weights = Numo::DFloat.minimum(curr_weights[target], complete_graph[curr_node, curr_labels])
147
+ next_node = curr_labels[curr_weights.min_index]
148
+ weight = curr_weights.min
149
+ Node.new(x: curr_node, y: next_node, weight: weight)
150
+ end
151
+ mst.sort! { |a, b| a.weight <=> b.weight }
152
+ end
153
+
154
+ def single_linkage_hierarchy(mst)
155
+ n_edges = mst.size
156
+ n_nodes = n_edges + 1
157
+ uf = UnionFind.new(n_nodes)
158
+ Array.new(n_edges) do |n|
159
+ x_root = uf.find(mst[n].x)
160
+ y_root = uf.find(mst[n].y)
161
+ x_root, y_root = [y_root, x_root] unless x_root < y_root
162
+ weight = mst[n].weight
163
+ n_samples = uf.union(x_root, y_root)
164
+ Node.new(x: x_root, y: y_root, weight: weight, n_elements: n_samples)
165
+ end
166
+ end
167
+
168
+ def descedent_ids(hierarchy_, start_node)
169
+ n_samples = hierarchy_.size + 1
170
+ return [start_node] if start_node < n_samples
171
+
172
+ res = []
173
+ indices = [start_node]
174
+ n_indices = 1
175
+ while n_indices.positive?
176
+ idx = indices.pop
177
+ if idx < n_samples
178
+ res.push(idx)
179
+ n_indices -= 1
180
+ else
181
+ indices.push(hierarchy_[idx - n_samples].x)
182
+ indices.push(hierarchy_[idx - n_samples].y)
183
+ n_indices += 1
184
+ end
185
+ end
186
+ res
187
+ end
188
+
189
+ def flatten(hierarchy_, n_clusters)
190
+ n_samples = hierarchy_.size + 1
191
+ return Numo::Int32.zeros(n_samples) if n_clusters < 2
192
+
193
+ nodes = [-([hierarchy_[-1].x, hierarchy_[-1].y].max + 1)]
194
+ (n_clusters - 1).times do
195
+ children = hierarchy_[-nodes[0] - n_samples]
196
+ nodes.push(-children.x)
197
+ nodes.push(-children.y)
198
+ nodes.sort!.shift
199
+ end
200
+ res = Numo::Int32.zeros(n_samples)
201
+ nodes.each_with_index { |sid, cluster_id| res[descedent_ids(hierarchy_, -sid)] = cluster_id }
202
+ res
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/pairwise_metric'
4
+ require 'rumale/clustering/dbscan'
5
+
6
+ module Rumale
7
+ module Clustering
8
+ # SNN is a class that implements Shared Nearest Neighbor cluster analysis.
9
+ # The SNN method is a variation of DBSCAN that uses similarity based on k-nearest neighbors as a metric.
10
+ #
11
+ # @example
12
+ # require 'rumale/clustering/snn'
13
+ #
14
+ # analyzer = Rumale::Clustering::SNN.new(n_neighbros: 10, eps: 5, min_samples: 5)
15
+ # cluster_labels = analyzer.fit_predict(samples)
16
+ #
17
+ # *Reference*
18
+ # - Ertoz, L., Steinbach, M., and Kumar, V., "Finding Clusters of Different Sizes, Shapes, and Densities in Noisy, High Dimensional Data," Proc. SDM'03, pp. 47--58, 2003.
19
+ # - Houle, M E., Kriegel, H-P., Kroger, P., Schubert, E., and Zimek, A., "Can Shared-Neighbor Distances Defeat the Curse of Dimensionality?," Proc. SSDBM'10, pp. 482--500, 2010.
20
+ class SNN < DBSCAN
21
+ # Create a new cluster analyzer with Shared Neareset Neighbor method.
22
+ #
23
+ # @param n_neighbors [Integer] The number of neighbors to be used for finding k-nearest neighbors.
24
+ # @param eps [Integer] The threshold value for finding connected components based on similarity.
25
+ # @param min_samples [Integer] The number of neighbor samples to be used for the criterion whether a point is a core point.
26
+ # @param metric [String] The metric to calculate the distances.
27
+ # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
28
+ # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
29
+ def initialize(n_neighbors: 10, eps: 5, min_samples: 5, metric: 'euclidean') # rubocop:disable Lint/MissingSuper
30
+ @params = {
31
+ n_neighbors: n_neighbors,
32
+ eps: eps,
33
+ min_samples: min_samples,
34
+ metric: (metric == 'precomputed' ? 'precomputed' : 'euclidean')
35
+ }
36
+ end
37
+
38
+ # Analysis clusters with given training data.
39
+ #
40
+ # @overload fit(x) -> SNN
41
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
42
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
43
+ # @return [SNN] The learned cluster analyzer itself.
44
+ def fit(x, _y = nil)
45
+ super
46
+ end
47
+
48
+ # Analysis clusters and assign samples to clusters.
49
+ #
50
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
51
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
52
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
53
+ def fit_predict(x) # rubocop:disable Lint/UselessMethodDefinition
54
+ super
55
+ end
56
+
57
+ private
58
+
59
+ def calc_pairwise_metrics(x)
60
+ distance_mat = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x)
61
+ n_samples = distance_mat.shape[0]
62
+ adjacency_mat = Numo::DFloat.zeros(n_samples, n_samples)
63
+ n_samples.times do |n|
64
+ neighbor_ids = distance_mat[n, true].sort_index[0...@params[:n_neighbors]]
65
+ adjacency_mat[n, neighbor_ids] = 1
66
+ end
67
+ adjacency_mat.dot(adjacency_mat.transpose)
68
+ end
69
+
70
+ def region_query(similarity_arr)
71
+ similarity_arr.gt(@params[:eps]).where.to_a
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+ require 'rumale/utils'
7
+ require 'rumale/validation'
8
+ require 'rumale/clustering/k_means'
9
+
10
+ module Rumale
11
+ module Clustering
12
+ # SpectralClustering is a class that implements the normalized spectral clustering.
13
+ #
14
+ # @example
15
+ # require 'numo/linalg/autoloader'
16
+ # require 'rumale/clustering/spectral_clustering'
17
+ #
18
+ # analyzer = Rumale::Clustering::SpectralClustering.new(n_clusters: 10, gamma: 8.0)
19
+ # cluster_labels = analyzer.fit_predict(samples)
20
+ #
21
+ # *Reference*
22
+ # - Ng, A Y., Jordan, M I., and Weiss, Y., "On Spectral Clustering: Analyssi and an algorithm," Proc. NIPS'01, pp. 849--856, 2001.
23
+ # - von Luxburg, U., "A tutorial on spectral clustering," Statistics and Computing, Vol. 17 (4), pp. 395--416, 2007.
24
+ class SpectralClustering < ::Rumale::Base::Estimator
25
+ include ::Rumale::Base::ClusterAnalyzer
26
+
27
+ # Return the data in embedded space.
28
+ # @return [Numo::DFloat] (shape: [n_samples, n_clusters])
29
+ attr_reader :embedding
30
+
31
+ # Return the cluster labels.
32
+ # @return [Numo::Int32] (shape: [n_samples])
33
+ attr_reader :labels
34
+
35
+ # Create a new cluster analyzer with normalized spectral clustering.
36
+ #
37
+ # @param n_clusters [Integer] The number of clusters.
38
+ # @param affinity [String] The representation of affinity matrix ('rbf' or 'precomputed').
39
+ # If affinity = 'rbf', the class performs the normalized spectral clustering with the fully connected graph weighted by rbf kernel.
40
+ # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
41
+ # If affinity = 'precomputed', this parameter is ignored.
42
+ # @param init [String] The initialization method for centroids of K-Means clustering ('random' or 'k-means++').
43
+ # @param max_iter [Integer] The maximum number of iterations for K-Means clustering.
44
+ # @param tol [Float] The tolerance of termination criterion for K-Means clustering.
45
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
46
+ def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
47
+ super()
48
+ @params = {
49
+ n_clusters: n_clusters,
50
+ affinity: affinity,
51
+ gamma: gamma,
52
+ init: (init == 'random' ? 'random' : 'k-means++'),
53
+ max_iter: max_iter,
54
+ tol: tol,
55
+ random_seed: (random_seed || srand)
56
+ }
57
+ end
58
+
59
+ # Analysis clusters with given training data.
60
+ # To execute this method, Numo::Linalg must be loaded.
61
+ #
62
+ # @overload fit(x) -> SpectralClustering
63
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
64
+ # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
65
+ # @return [SpectralClustering] The learned cluster analyzer itself.
66
+ def fit(x, _y = nil)
67
+ x = ::Rumale::Validation.check_convert_sample_array(x)
68
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
69
+
70
+ raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
71
+
72
+ fit_predict(x)
73
+ self
74
+ end
75
+
76
+ # Analysis clusters and assign samples to clusters.
77
+ # To execute this method, Numo::Linalg must be loaded.
78
+ #
79
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
80
+ # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
81
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
82
+ def fit_predict(x)
83
+ x = ::Rumale::Validation.check_convert_sample_array(x)
84
+ raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)
85
+
86
+ unless enable_linalg?(warning: false)
87
+ raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded'
88
+ end
89
+
90
+ affinity_mat = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
91
+ @embedding = embedded_space(affinity_mat, @params[:n_clusters])
92
+ normalized_embedding = ::Rumale::Utils.normalize(@embedding, 'l2')
93
+ @labels = kmeans_clustering(normalized_embedding)
94
+ end
95
+
96
+ private
97
+
98
+ def check_invalid_array_shape(x)
99
+ @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
100
+ end
101
+
102
+ def embedded_space(affinity_mat, n_clusters)
103
+ affinity_mat[affinity_mat.diag_indices] = 0.0
104
+ degrees = 1.0 / Numo::NMath.sqrt(affinity_mat.sum(axis: 1))
105
+ laplacian_mat = degrees.diag.dot(affinity_mat).dot(degrees.diag)
106
+
107
+ n_samples = affinity_mat.shape[0]
108
+ _, eig_vecs = Numo::Linalg.eigh(laplacian_mat, vals_range: (n_samples - n_clusters)...n_samples)
109
+ eig_vecs.reverse(1).dup
110
+ end
111
+
112
+ def kmeans_clustering(x)
113
+ ::Rumale::Clustering::KMeans.new(
114
+ n_clusters: @params[:n_clusters], init: @params[:init],
115
+ max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
116
+ ).fit_predict(x)
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of classes that implement cluster analysis methods.
6
+ module Clustering
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'clustering/dbscan'
6
+ require_relative 'clustering/gaussian_mixture'
7
+ require_relative 'clustering/hdbscan'
8
+ require_relative 'clustering/k_means'
9
+ require_relative 'clustering/k_medoids'
10
+ require_relative 'clustering/mini_batch_k_means'
11
+ require_relative 'clustering/power_iteration'
12
+ require_relative 'clustering/single_linkage'
13
+ require_relative 'clustering/snn'
14
+ require_relative 'clustering/spectral_clustering'
15
+ require_relative 'clustering/version'
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rumale-clustering
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.24.0
5
+ platform: ruby
6
+ authors:
7
+ - yoshoku
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rumale-core
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.24.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.24.0
41
+ description: |
42
+ Rumale::Clustering provides cluster analysis algorithms,
43
+ such as K-Means, Gaussian Mixture Model, DBSCAN, and Spectral Clustering,
44
+ with Rumale interface.
45
+ email:
46
+ - yoshoku@outlook.com
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - LICENSE.txt
52
+ - README.md
53
+ - lib/rumale/clustering.rb
54
+ - lib/rumale/clustering/dbscan.rb
55
+ - lib/rumale/clustering/gaussian_mixture.rb
56
+ - lib/rumale/clustering/hdbscan.rb
57
+ - lib/rumale/clustering/k_means.rb
58
+ - lib/rumale/clustering/k_medoids.rb
59
+ - lib/rumale/clustering/mini_batch_k_means.rb
60
+ - lib/rumale/clustering/power_iteration.rb
61
+ - lib/rumale/clustering/single_linkage.rb
62
+ - lib/rumale/clustering/snn.rb
63
+ - lib/rumale/clustering/spectral_clustering.rb
64
+ - lib/rumale/clustering/version.rb
65
+ homepage: https://github.com/yoshoku/rumale
66
+ licenses:
67
+ - BSD-3-Clause
68
+ metadata:
69
+ homepage_uri: https://github.com/yoshoku/rumale
70
+ source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-clustering
71
+ changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
72
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
73
+ rubygems_mfa_required: 'true'
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubygems_version: 3.3.26
90
+ signing_key:
91
+ specification_version: 4
92
+ summary: Rumale::Clustering provides cluster analysis algorithms with Rumale interface.
93
+ test_files: []