rumale 0.13.6 → 0.13.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/rumale.rb +3 -0
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +56 -0
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +53 -0
- data/lib/rumale/evaluation_measure/silhouette_score.rb +80 -0
- data/lib/rumale/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b7030e096e10df1a95d79aefe6d275c0ca16406
|
4
|
+
data.tar.gz: 85f6b809e41ecb4743df7e07b99bc3fcf13710e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efb25c2ea461c3ceb9b8d500a4ef77dd4db6e4a2c21400009e8fd940bd28dab6d4a725a320a5e3cc1a1b7676626d03b9c2ef73c9c02246c29e122461680488d7
|
7
|
+
data.tar.gz: 264c10852a7eb01ddb075c87969625f2cb82eb0bfa3050e21bf41f355d8213808c3684a63a0e7e95480965e0737ae76a85babee60b0a638274c31bc1741774b9
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 0.13.7
|
2
|
+
- Add some evaluator classes for clustering.
|
3
|
+
- SilhouetteScore
|
4
|
+
- CalinskiHarabaszScore
|
5
|
+
- DaviesBouldinScore
|
6
|
+
|
1
7
|
# 0.13.6
|
2
8
|
- Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
|
3
9
|
- Fix a typo on README ([#13](https://github.com/yoshoku/rumale/pull/13)).
|
data/lib/rumale.rb
CHANGED
@@ -104,3 +104,6 @@ require 'rumale/evaluation_measure/adjusted_rand_score'
|
|
104
104
|
require 'rumale/evaluation_measure/purity'
|
105
105
|
require 'rumale/evaluation_measure/mutual_information'
|
106
106
|
require 'rumale/evaluation_measure/normalized_mutual_information'
|
107
|
+
require 'rumale/evaluation_measure/silhouette_score'
|
108
|
+
require 'rumale/evaluation_measure/davies_bouldin_score'
|
109
|
+
require 'rumale/evaluation_measure/calinski_harabasz_score'
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# CalinskiHarabaszScore is a class that calculates the Calinski and Harabasz score.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# evaluator = Rumale::EvaluationMeasure::CalinskiHarabaszScore.new
|
11
|
+
# puts evaluator.score(x, predicted)
|
12
|
+
#
|
13
|
+
# *Reference*
|
14
|
+
# - T. Calinski and J. Harabsz, "A dendrite method for cluster analysis," Communication in Statistics, Vol. 3 (1), pp. 1--27, 1972.
|
15
|
+
class CalinskiHarabaszScore
|
16
|
+
include Base::Evaluator
|
17
|
+
|
18
|
+
# Calculates the Calinski and Harabasz score.
|
19
|
+
#
|
20
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
21
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
22
|
+
# @return [Float] The Calinski and Harabasz score.
|
23
|
+
def score(x, y)
|
24
|
+
check_sample_array(x)
|
25
|
+
check_label_array(y)
|
26
|
+
check_sample_label_size(x, y)
|
27
|
+
|
28
|
+
labels = y.to_a.uniq.sort
|
29
|
+
n_clusters = labels.size
|
30
|
+
n_dimensions = x.shape[1]
|
31
|
+
|
32
|
+
centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
|
33
|
+
|
34
|
+
within_group = 0.0
|
35
|
+
n_clusters.times do |n|
|
36
|
+
cls_samples = x[y.eq(labels[n]), true]
|
37
|
+
cls_centroid = cls_samples.mean(0)
|
38
|
+
centroids[n, true] = cls_centroid
|
39
|
+
within_group += ((cls_samples - cls_centroid)**2).sum
|
40
|
+
end
|
41
|
+
|
42
|
+
return 1.0 if within_group.zero?
|
43
|
+
|
44
|
+
mean_vec = x.mean(0)
|
45
|
+
between_group = 0.0
|
46
|
+
n_clusters.times do |n|
|
47
|
+
sz_cluster = y.eq(labels[n]).count
|
48
|
+
between_group += sz_cluster * ((centroids[n, true] - mean_vec)**2).sum
|
49
|
+
end
|
50
|
+
|
51
|
+
n_samples = x.shape[0]
|
52
|
+
(between_group / (n_clusters - 1)) / (within_group / (n_samples - n_clusters))
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/pairwise_metric'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# DaviesBouldinScore is a class that calculates the Davies-Bouldin score.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# evaluator = Rumale::EvaluationMeasure::DaviesBouldinScore.new
|
12
|
+
# puts evaluator.score(x, predicted)
|
13
|
+
#
|
14
|
+
# *Reference*
|
15
|
+
# - D L. Davies and D W. Bouldin, "A Cluster Separation Measure," IEEE Trans. Pattern Analysis and Machine Intelligence, Vol. PAMI-1, No. 2, pp. 224--227, 1979.
|
16
|
+
class DaviesBouldinScore
|
17
|
+
include Base::Evaluator
|
18
|
+
|
19
|
+
# Calculates the Davies-Bouldin score.
|
20
|
+
#
|
21
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
22
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
23
|
+
# @return [Float] The Davies-Bouldin score.
|
24
|
+
def score(x, y)
|
25
|
+
check_sample_array(x)
|
26
|
+
check_label_array(y)
|
27
|
+
check_sample_label_size(x, y)
|
28
|
+
|
29
|
+
labels = y.to_a.uniq.sort
|
30
|
+
n_clusters = labels.size
|
31
|
+
n_dimensions = x.shape[1]
|
32
|
+
|
33
|
+
dist_cluster = Numo::DFloat.zeros(n_clusters)
|
34
|
+
centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
|
35
|
+
|
36
|
+
n_clusters.times do |n|
|
37
|
+
cls_samples = x[y.eq(labels[n]), true]
|
38
|
+
cls_centroid = cls_samples.mean(0)
|
39
|
+
centroids[n, true] = cls_centroid
|
40
|
+
dist_cluster[n] = Rumale::PairwiseMetric.euclidean_distance(cls_samples, cls_centroid.expand_dims(0)).mean
|
41
|
+
end
|
42
|
+
|
43
|
+
dist_centroid = Rumale::PairwiseMetric.euclidean_distance(centroids)
|
44
|
+
# p dist_cluster
|
45
|
+
# p dist_centroid
|
46
|
+
dist_centroid[dist_centroid.eq(0)] = Float::INFINITY
|
47
|
+
dist_mat = (dist_cluster.expand_dims(1) + dist_cluster) / dist_centroid
|
48
|
+
dist_mat[dist_mat.diag_indices] = -Float::INFINITY
|
49
|
+
dist_mat.max(0).mean
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/pairwise_metric'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# SilhouetteScore is a class that calculates the Silhouette Coefficient.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# evaluator = Rumale::EvaluationMeasure::SilhouetteScore.new
|
12
|
+
# puts evaluator.score(x, predicted)
|
13
|
+
#
|
14
|
+
# *Reference*
|
15
|
+
# - P J. Rousseuw, "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis," Journal of Computational and Applied Mathematics, Vol. 20, pp. 53--65, 1987.
|
16
|
+
class SilhouetteScore
|
17
|
+
include Base::Evaluator
|
18
|
+
|
19
|
+
# Create a new evaluator that calculates the silhouette coefficient.
|
20
|
+
#
|
21
|
+
# @param metric [String] The metric to calculate the sihouette coefficient.
|
22
|
+
# If metric is 'euclidean', Euclidean distance is used for dissimilarity between sample points.
|
23
|
+
# If metric is 'precomputed', the score method expects to be given a distance matrix.
|
24
|
+
def initialize(metric: 'euclidean')
|
25
|
+
check_params_string(metric: metric)
|
26
|
+
@metric = metric
|
27
|
+
end
|
28
|
+
|
29
|
+
# Calculates the silhouette coefficient.
|
30
|
+
#
|
31
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
32
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
33
|
+
# @return [Float] The mean of silhouette coefficient.
|
34
|
+
def score(x, y)
|
35
|
+
check_sample_array(x)
|
36
|
+
check_label_array(y)
|
37
|
+
check_sample_label_size(x, y)
|
38
|
+
|
39
|
+
dist_mat = @metric == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
40
|
+
|
41
|
+
labels = y.to_a.uniq.sort
|
42
|
+
n_clusters = labels.size
|
43
|
+
n_samples = dist_mat.shape[0]
|
44
|
+
|
45
|
+
intra_dists = Numo::DFloat.zeros(n_samples)
|
46
|
+
n_clusters.times do |n|
|
47
|
+
cls_pos = y.eq(labels[n])
|
48
|
+
sz_cluster = cls_pos.count
|
49
|
+
next unless sz_cluster > 1
|
50
|
+
cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
|
51
|
+
cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
|
52
|
+
intra_dists[cls_pos] = cls_dist_mat.sum(0) / (sz_cluster - 1)
|
53
|
+
end
|
54
|
+
|
55
|
+
inter_dists = Numo::DFloat.zeros(n_samples) + Float::INFINITY
|
56
|
+
n_clusters.times do |m|
|
57
|
+
cls_pos = y.eq(labels[m])
|
58
|
+
n_clusters.times do |n|
|
59
|
+
next if m == n
|
60
|
+
not_cls_pos = y.eq(labels[n])
|
61
|
+
inter_dists[cls_pos] = Numo::DFloat.minimum(
|
62
|
+
inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
|
63
|
+
)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
mask = Numo::DFloat.ones(n_samples)
|
68
|
+
n_clusters.times do |n|
|
69
|
+
cls_pos = y.eq(labels[n])
|
70
|
+
mask[cls_pos] = 0 unless cls_pos.count > 1
|
71
|
+
end
|
72
|
+
|
73
|
+
silhouettes = mask * ((inter_dists - intra_dists) / Numo::DFloat.maximum(inter_dists, intra_dists))
|
74
|
+
silhouettes[silhouettes.isnan] = 0.0
|
75
|
+
|
76
|
+
silhouettes.mean
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -185,6 +185,8 @@ files:
|
|
185
185
|
- lib/rumale/ensemble/random_forest_regressor.rb
|
186
186
|
- lib/rumale/evaluation_measure/accuracy.rb
|
187
187
|
- lib/rumale/evaluation_measure/adjusted_rand_score.rb
|
188
|
+
- lib/rumale/evaluation_measure/calinski_harabasz_score.rb
|
189
|
+
- lib/rumale/evaluation_measure/davies_bouldin_score.rb
|
188
190
|
- lib/rumale/evaluation_measure/explained_variance_score.rb
|
189
191
|
- lib/rumale/evaluation_measure/f_score.rb
|
190
192
|
- lib/rumale/evaluation_measure/log_loss.rb
|
@@ -200,6 +202,7 @@ files:
|
|
200
202
|
- lib/rumale/evaluation_measure/r2_score.rb
|
201
203
|
- lib/rumale/evaluation_measure/recall.rb
|
202
204
|
- lib/rumale/evaluation_measure/roc_auc.rb
|
205
|
+
- lib/rumale/evaluation_measure/silhouette_score.rb
|
203
206
|
- lib/rumale/kernel_approximation/rbf.rb
|
204
207
|
- lib/rumale/kernel_machine/kernel_pca.rb
|
205
208
|
- lib/rumale/kernel_machine/kernel_ridge.rb
|