rumale 0.13.6 → 0.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e78d2a2eeb35fc8409dac683e2f8a3b90e5c396d
4
- data.tar.gz: 8ae3c1396efeac327288a5fe534661ea65d0d766
3
+ metadata.gz: 6b7030e096e10df1a95d79aefe6d275c0ca16406
4
+ data.tar.gz: 85f6b809e41ecb4743df7e07b99bc3fcf13710e1
5
5
  SHA512:
6
- metadata.gz: f25dee4375b9d9707374341b7cebe19973ea66f8e8b42af92806aa50b2b41323fcd0ba1470b9a87266859e1c30eea8a1a583ffa620519998df6da91bca8e1b23
7
- data.tar.gz: 2e9a3cd0d87aae35e180e74c8335a93b1126f57fd9d4f5c727e60decf5b81982b96f0bdcd2e99c4104169341a1dc4db9a6d631c1fe5cf8eb709ccc5ae9f377e2
6
+ metadata.gz: efb25c2ea461c3ceb9b8d500a4ef77dd4db6e4a2c21400009e8fd940bd28dab6d4a725a320a5e3cc1a1b7676626d03b9c2ef73c9c02246c29e122461680488d7
7
+ data.tar.gz: 264c10852a7eb01ddb075c87969625f2cb82eb0bfa3050e21bf41f355d8213808c3684a63a0e7e95480965e0737ae76a85babee60b0a638274c31bc1741774b9
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.13.7
2
+ - Add some evaluator classes for clustering.
3
+ - SilhouetteScore
4
+ - CalinskiHarabaszScore
5
+ - DaviesBouldinScore
6
+
1
7
  # 0.13.6
2
8
  - Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
3
9
  - Fix a typo on README ([#13](https://github.com/yoshoku/rumale/pull/13)).
data/lib/rumale.rb CHANGED
@@ -104,3 +104,6 @@ require 'rumale/evaluation_measure/adjusted_rand_score'
104
104
  require 'rumale/evaluation_measure/purity'
105
105
  require 'rumale/evaluation_measure/mutual_information'
106
106
  require 'rumale/evaluation_measure/normalized_mutual_information'
107
+ require 'rumale/evaluation_measure/silhouette_score'
108
+ require 'rumale/evaluation_measure/davies_bouldin_score'
109
+ require 'rumale/evaluation_measure/calinski_harabasz_score'
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # CalinskiHarabaszScore is a class that calculates the Calinski and Harabasz score.
8
+ #
9
+ # @example
10
+ # evaluator = Rumale::EvaluationMeasure::CalinskiHarabaszScore.new
11
+ # puts evaluator.score(x, predicted)
12
+ #
13
+ # *Reference*
14
+ # - T. Calinski and J. Harabsz, "A dendrite method for cluster analysis," Communication in Statistics, Vol. 3 (1), pp. 1--27, 1972.
15
+ class CalinskiHarabaszScore
16
+ include Base::Evaluator
17
+
18
+ # Calculates the Calinski and Harabasz score.
19
+ #
20
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
21
+ # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
22
+ # @return [Float] The Calinski and Harabasz score.
23
+ def score(x, y)
24
+ check_sample_array(x)
25
+ check_label_array(y)
26
+ check_sample_label_size(x, y)
27
+
28
+ labels = y.to_a.uniq.sort
29
+ n_clusters = labels.size
30
+ n_dimensions = x.shape[1]
31
+
32
+ centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
33
+
34
+ within_group = 0.0
35
+ n_clusters.times do |n|
36
+ cls_samples = x[y.eq(labels[n]), true]
37
+ cls_centroid = cls_samples.mean(0)
38
+ centroids[n, true] = cls_centroid
39
+ within_group += ((cls_samples - cls_centroid)**2).sum
40
+ end
41
+
42
+ return 1.0 if within_group.zero?
43
+
44
+ mean_vec = x.mean(0)
45
+ between_group = 0.0
46
+ n_clusters.times do |n|
47
+ sz_cluster = y.eq(labels[n]).count
48
+ between_group += sz_cluster * ((centroids[n, true] - mean_vec)**2).sum
49
+ end
50
+
51
+ n_samples = x.shape[0]
52
+ (between_group / (n_clusters - 1)) / (within_group / (n_samples - n_clusters))
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/pairwise_metric'
5
+
6
+ module Rumale
7
+ module EvaluationMeasure
8
+ # DaviesBouldinScore is a class that calculates the Davies-Bouldin score.
9
+ #
10
+ # @example
11
+ # evaluator = Rumale::EvaluationMeasure::DaviesBouldinScore.new
12
+ # puts evaluator.score(x, predicted)
13
+ #
14
+ # *Reference*
15
+ # - D L. Davies and D W. Bouldin, "A Cluster Separation Measure," IEEE Trans. Pattern Analysis and Machine Intelligence, Vol. PAMI-1, No. 2, pp. 224--227, 1979.
16
+ class DaviesBouldinScore
17
+ include Base::Evaluator
18
+
19
+ # Calculates the Davies-Bouldin score.
20
+ #
21
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
22
+ # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
23
+ # @return [Float] The Davies-Bouldin score.
24
+ def score(x, y)
25
+ check_sample_array(x)
26
+ check_label_array(y)
27
+ check_sample_label_size(x, y)
28
+
29
+ labels = y.to_a.uniq.sort
30
+ n_clusters = labels.size
31
+ n_dimensions = x.shape[1]
32
+
33
+ dist_cluster = Numo::DFloat.zeros(n_clusters)
34
+ centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
35
+
36
+ n_clusters.times do |n|
37
+ cls_samples = x[y.eq(labels[n]), true]
38
+ cls_centroid = cls_samples.mean(0)
39
+ centroids[n, true] = cls_centroid
40
+ dist_cluster[n] = Rumale::PairwiseMetric.euclidean_distance(cls_samples, cls_centroid.expand_dims(0)).mean
41
+ end
42
+
43
+ dist_centroid = Rumale::PairwiseMetric.euclidean_distance(centroids)
44
+ # p dist_cluster
45
+ # p dist_centroid
46
+ dist_centroid[dist_centroid.eq(0)] = Float::INFINITY
47
+ dist_mat = (dist_cluster.expand_dims(1) + dist_cluster) / dist_centroid
48
+ dist_mat[dist_mat.diag_indices] = -Float::INFINITY
49
+ dist_mat.max(0).mean
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/pairwise_metric'
5
+
6
+ module Rumale
7
+ module EvaluationMeasure
8
+ # SilhouetteScore is a class that calculates the Silhouette Coefficient.
9
+ #
10
+ # @example
11
+ # evaluator = Rumale::EvaluationMeasure::SilhouetteScore.new
12
+ # puts evaluator.score(x, predicted)
13
+ #
14
+ # *Reference*
15
+ # - P J. Rousseuw, "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis," Journal of Computational and Applied Mathematics, Vol. 20, pp. 53--65, 1987.
16
+ class SilhouetteScore
17
+ include Base::Evaluator
18
+
19
+ # Create a new evaluator that calculates the silhouette coefficient.
20
+ #
21
+ # @param metric [String] The metric to calculate the sihouette coefficient.
22
+ # If metric is 'euclidean', Euclidean distance is used for dissimilarity between sample points.
23
+ # If metric is 'precomputed', the score method expects to be given a distance matrix.
24
+ def initialize(metric: 'euclidean')
25
+ check_params_string(metric: metric)
26
+ @metric = metric
27
+ end
28
+
29
+ # Calculates the silhouette coefficient.
30
+ #
31
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
32
+ # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
33
+ # @return [Float] The mean of silhouette coefficient.
34
+ def score(x, y)
35
+ check_sample_array(x)
36
+ check_label_array(y)
37
+ check_sample_label_size(x, y)
38
+
39
+ dist_mat = @metric == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
40
+
41
+ labels = y.to_a.uniq.sort
42
+ n_clusters = labels.size
43
+ n_samples = dist_mat.shape[0]
44
+
45
+ intra_dists = Numo::DFloat.zeros(n_samples)
46
+ n_clusters.times do |n|
47
+ cls_pos = y.eq(labels[n])
48
+ sz_cluster = cls_pos.count
49
+ next unless sz_cluster > 1
50
+ cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
51
+ cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
52
+ intra_dists[cls_pos] = cls_dist_mat.sum(0) / (sz_cluster - 1)
53
+ end
54
+
55
+ inter_dists = Numo::DFloat.zeros(n_samples) + Float::INFINITY
56
+ n_clusters.times do |m|
57
+ cls_pos = y.eq(labels[m])
58
+ n_clusters.times do |n|
59
+ next if m == n
60
+ not_cls_pos = y.eq(labels[n])
61
+ inter_dists[cls_pos] = Numo::DFloat.minimum(
62
+ inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
63
+ )
64
+ end
65
+ end
66
+
67
+ mask = Numo::DFloat.ones(n_samples)
68
+ n_clusters.times do |n|
69
+ cls_pos = y.eq(labels[n])
70
+ mask[cls_pos] = 0 unless cls_pos.count > 1
71
+ end
72
+
73
+ silhouettes = mask * ((inter_dists - intra_dists) / Numo::DFloat.maximum(inter_dists, intra_dists))
74
+ silhouettes[silhouettes.isnan] = 0.0
75
+
76
+ silhouettes.mean
77
+ end
78
+ end
79
+ end
80
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.13.6'
6
+ VERSION = '0.13.7'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.6
4
+ version: 0.13.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-13 00:00:00.000000000 Z
11
+ date: 2019-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -185,6 +185,8 @@ files:
185
185
  - lib/rumale/ensemble/random_forest_regressor.rb
186
186
  - lib/rumale/evaluation_measure/accuracy.rb
187
187
  - lib/rumale/evaluation_measure/adjusted_rand_score.rb
188
+ - lib/rumale/evaluation_measure/calinski_harabasz_score.rb
189
+ - lib/rumale/evaluation_measure/davies_bouldin_score.rb
188
190
  - lib/rumale/evaluation_measure/explained_variance_score.rb
189
191
  - lib/rumale/evaluation_measure/f_score.rb
190
192
  - lib/rumale/evaluation_measure/log_loss.rb
@@ -200,6 +202,7 @@ files:
200
202
  - lib/rumale/evaluation_measure/r2_score.rb
201
203
  - lib/rumale/evaluation_measure/recall.rb
202
204
  - lib/rumale/evaluation_measure/roc_auc.rb
205
+ - lib/rumale/evaluation_measure/silhouette_score.rb
203
206
  - lib/rumale/kernel_approximation/rbf.rb
204
207
  - lib/rumale/kernel_machine/kernel_pca.rb
205
208
  - lib/rumale/kernel_machine/kernel_ridge.rb