rumale 0.13.6 → 0.13.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e78d2a2eeb35fc8409dac683e2f8a3b90e5c396d
4
- data.tar.gz: 8ae3c1396efeac327288a5fe534661ea65d0d766
3
+ metadata.gz: 6b7030e096e10df1a95d79aefe6d275c0ca16406
4
+ data.tar.gz: 85f6b809e41ecb4743df7e07b99bc3fcf13710e1
5
5
  SHA512:
6
- metadata.gz: f25dee4375b9d9707374341b7cebe19973ea66f8e8b42af92806aa50b2b41323fcd0ba1470b9a87266859e1c30eea8a1a583ffa620519998df6da91bca8e1b23
7
- data.tar.gz: 2e9a3cd0d87aae35e180e74c8335a93b1126f57fd9d4f5c727e60decf5b81982b96f0bdcd2e99c4104169341a1dc4db9a6d631c1fe5cf8eb709ccc5ae9f377e2
6
+ metadata.gz: efb25c2ea461c3ceb9b8d500a4ef77dd4db6e4a2c21400009e8fd940bd28dab6d4a725a320a5e3cc1a1b7676626d03b9c2ef73c9c02246c29e122461680488d7
7
+ data.tar.gz: 264c10852a7eb01ddb075c87969625f2cb82eb0bfa3050e21bf41f355d8213808c3684a63a0e7e95480965e0737ae76a85babee60b0a638274c31bc1741774b9
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.13.7
2
+ - Add some evaluator classes for clustering.
3
+ - SilhouetteScore
4
+ - CalinskiHarabaszScore
5
+ - DaviesBouldinScore
6
+
1
7
  # 0.13.6
2
8
  - Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
3
9
  - Fix a typo on README ([#13](https://github.com/yoshoku/rumale/pull/13)).
data/lib/rumale.rb CHANGED
@@ -104,3 +104,6 @@ require 'rumale/evaluation_measure/adjusted_rand_score'
104
104
  require 'rumale/evaluation_measure/purity'
105
105
  require 'rumale/evaluation_measure/mutual_information'
106
106
  require 'rumale/evaluation_measure/normalized_mutual_information'
107
+ require 'rumale/evaluation_measure/silhouette_score'
108
+ require 'rumale/evaluation_measure/davies_bouldin_score'
109
+ require 'rumale/evaluation_measure/calinski_harabasz_score'
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # CalinskiHarabaszScore is a class that calculates the Calinski and Harabasz score.
8
+ #
9
+ # @example
10
+ # evaluator = Rumale::EvaluationMeasure::CalinskiHarabaszScore.new
11
+ # puts evaluator.score(x, predicted)
12
+ #
13
+ # *Reference*
14
+ # - T. Calinski and J. Harabsz, "A dendrite method for cluster analysis," Communication in Statistics, Vol. 3 (1), pp. 1--27, 1972.
15
+ class CalinskiHarabaszScore
16
+ include Base::Evaluator
17
+
18
+ # Calculates the Calinski and Harabasz score.
19
+ #
20
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
21
+ # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
22
+ # @return [Float] The Calinski and Harabasz score.
23
+ def score(x, y)
24
+ check_sample_array(x)
25
+ check_label_array(y)
26
+ check_sample_label_size(x, y)
27
+
28
+ labels = y.to_a.uniq.sort
29
+ n_clusters = labels.size
30
+ n_dimensions = x.shape[1]
31
+
32
+ centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
33
+
34
+ within_group = 0.0
35
+ n_clusters.times do |n|
36
+ cls_samples = x[y.eq(labels[n]), true]
37
+ cls_centroid = cls_samples.mean(0)
38
+ centroids[n, true] = cls_centroid
39
+ within_group += ((cls_samples - cls_centroid)**2).sum
40
+ end
41
+
42
+ return 1.0 if within_group.zero?
43
+
44
+ mean_vec = x.mean(0)
45
+ between_group = 0.0
46
+ n_clusters.times do |n|
47
+ sz_cluster = y.eq(labels[n]).count
48
+ between_group += sz_cluster * ((centroids[n, true] - mean_vec)**2).sum
49
+ end
50
+
51
+ n_samples = x.shape[0]
52
+ (between_group / (n_clusters - 1)) / (within_group / (n_samples - n_clusters))
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/pairwise_metric'
5
+
6
+ module Rumale
7
+ module EvaluationMeasure
8
+ # DaviesBouldinScore is a class that calculates the Davies-Bouldin score.
9
+ #
10
+ # @example
11
+ # evaluator = Rumale::EvaluationMeasure::DaviesBouldinScore.new
12
+ # puts evaluator.score(x, predicted)
13
+ #
14
+ # *Reference*
15
+ # - D L. Davies and D W. Bouldin, "A Cluster Separation Measure," IEEE Trans. Pattern Analysis and Machine Intelligence, Vol. PAMI-1, No. 2, pp. 224--227, 1979.
16
+ class DaviesBouldinScore
17
+ include Base::Evaluator
18
+
19
+ # Calculates the Davies-Bouldin score.
20
+ #
21
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
22
+ # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
23
+ # @return [Float] The Davies-Bouldin score.
24
+ def score(x, y)
25
+ check_sample_array(x)
26
+ check_label_array(y)
27
+ check_sample_label_size(x, y)
28
+
29
+ labels = y.to_a.uniq.sort
30
+ n_clusters = labels.size
31
+ n_dimensions = x.shape[1]
32
+
33
+ dist_cluster = Numo::DFloat.zeros(n_clusters)
34
+ centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
35
+
36
+ n_clusters.times do |n|
37
+ cls_samples = x[y.eq(labels[n]), true]
38
+ cls_centroid = cls_samples.mean(0)
39
+ centroids[n, true] = cls_centroid
40
+ dist_cluster[n] = Rumale::PairwiseMetric.euclidean_distance(cls_samples, cls_centroid.expand_dims(0)).mean
41
+ end
42
+
43
+ dist_centroid = Rumale::PairwiseMetric.euclidean_distance(centroids)
44
+ # p dist_cluster
45
+ # p dist_centroid
46
+ dist_centroid[dist_centroid.eq(0)] = Float::INFINITY
47
+ dist_mat = (dist_cluster.expand_dims(1) + dist_cluster) / dist_centroid
48
+ dist_mat[dist_mat.diag_indices] = -Float::INFINITY
49
+ dist_mat.max(0).mean
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/pairwise_metric'
5
+
6
+ module Rumale
7
+ module EvaluationMeasure
8
+ # SilhouetteScore is a class that calculates the Silhouette Coefficient.
9
+ #
10
+ # @example
11
+ # evaluator = Rumale::EvaluationMeasure::SilhouetteScore.new
12
+ # puts evaluator.score(x, predicted)
13
+ #
14
+ # *Reference*
15
+ # - P J. Rousseuw, "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis," Journal of Computational and Applied Mathematics, Vol. 20, pp. 53--65, 1987.
16
+ class SilhouetteScore
17
+ include Base::Evaluator
18
+
19
+ # Create a new evaluator that calculates the silhouette coefficient.
20
+ #
21
+ # @param metric [String] The metric to calculate the sihouette coefficient.
22
+ # If metric is 'euclidean', Euclidean distance is used for dissimilarity between sample points.
23
+ # If metric is 'precomputed', the score method expects to be given a distance matrix.
24
+ def initialize(metric: 'euclidean')
25
+ check_params_string(metric: metric)
26
+ @metric = metric
27
+ end
28
+
29
+ # Calculates the silhouette coefficient.
30
+ #
31
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
32
+ # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
33
+ # @return [Float] The mean of silhouette coefficient.
34
+ def score(x, y)
35
+ check_sample_array(x)
36
+ check_label_array(y)
37
+ check_sample_label_size(x, y)
38
+
39
+ dist_mat = @metric == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
40
+
41
+ labels = y.to_a.uniq.sort
42
+ n_clusters = labels.size
43
+ n_samples = dist_mat.shape[0]
44
+
45
+ intra_dists = Numo::DFloat.zeros(n_samples)
46
+ n_clusters.times do |n|
47
+ cls_pos = y.eq(labels[n])
48
+ sz_cluster = cls_pos.count
49
+ next unless sz_cluster > 1
50
+ cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
51
+ cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
52
+ intra_dists[cls_pos] = cls_dist_mat.sum(0) / (sz_cluster - 1)
53
+ end
54
+
55
+ inter_dists = Numo::DFloat.zeros(n_samples) + Float::INFINITY
56
+ n_clusters.times do |m|
57
+ cls_pos = y.eq(labels[m])
58
+ n_clusters.times do |n|
59
+ next if m == n
60
+ not_cls_pos = y.eq(labels[n])
61
+ inter_dists[cls_pos] = Numo::DFloat.minimum(
62
+ inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
63
+ )
64
+ end
65
+ end
66
+
67
+ mask = Numo::DFloat.ones(n_samples)
68
+ n_clusters.times do |n|
69
+ cls_pos = y.eq(labels[n])
70
+ mask[cls_pos] = 0 unless cls_pos.count > 1
71
+ end
72
+
73
+ silhouettes = mask * ((inter_dists - intra_dists) / Numo::DFloat.maximum(inter_dists, intra_dists))
74
+ silhouettes[silhouettes.isnan] = 0.0
75
+
76
+ silhouettes.mean
77
+ end
78
+ end
79
+ end
80
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.13.6'
6
+ VERSION = '0.13.7'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.6
4
+ version: 0.13.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-13 00:00:00.000000000 Z
11
+ date: 2019-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -185,6 +185,8 @@ files:
185
185
  - lib/rumale/ensemble/random_forest_regressor.rb
186
186
  - lib/rumale/evaluation_measure/accuracy.rb
187
187
  - lib/rumale/evaluation_measure/adjusted_rand_score.rb
188
+ - lib/rumale/evaluation_measure/calinski_harabasz_score.rb
189
+ - lib/rumale/evaluation_measure/davies_bouldin_score.rb
188
190
  - lib/rumale/evaluation_measure/explained_variance_score.rb
189
191
  - lib/rumale/evaluation_measure/f_score.rb
190
192
  - lib/rumale/evaluation_measure/log_loss.rb
@@ -200,6 +202,7 @@ files:
200
202
  - lib/rumale/evaluation_measure/r2_score.rb
201
203
  - lib/rumale/evaluation_measure/recall.rb
202
204
  - lib/rumale/evaluation_measure/roc_auc.rb
205
+ - lib/rumale/evaluation_measure/silhouette_score.rb
203
206
  - lib/rumale/kernel_approximation/rbf.rb
204
207
  - lib/rumale/kernel_machine/kernel_pca.rb
205
208
  - lib/rumale/kernel_machine/kernel_ridge.rb