rumale-evaluation_measure 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # MutualInformation is a class that calculates the mutual information.
8
+ #
9
+ # @example
10
+ # require 'rumale/evaluation_measure/mutual_information'
11
+ #
12
+ # evaluator = Rumale::EvaluationMeasure::MutualInformation.new
13
+ # puts evaluator.score(ground_truth, predicted)
14
+ #
15
+ # *Reference*
16
+ # - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
17
+ class MutualInformation
18
+ include ::Rumale::Base::Evaluator
19
+
20
+ # Calculate mutual information
21
+ #
22
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
23
+ # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
24
+ # @return [Float] Mutual information.
25
+ def score(y_true, y_pred)
26
+ # initiazlie some variables.
27
+ mutual_information = 0.0
28
+ n_samples = y_pred.size
29
+ class_ids = y_true.to_a.uniq
30
+ cluster_ids = y_pred.to_a.uniq
31
+ # calculate mutual information.
32
+ cluster_ids.map do |k|
33
+ pr_sample_ids = y_pred.eq(k).where.to_a
34
+ n_pr_samples = pr_sample_ids.size
35
+ class_ids.map do |j|
36
+ tr_sample_ids = y_true.eq(j).where.to_a
37
+ n_tr_samples = tr_sample_ids.size
38
+ n_intr_samples = (pr_sample_ids & tr_sample_ids).size
39
+ if n_intr_samples.positive?
40
+ mutual_information +=
41
+ n_intr_samples.fdiv(n_samples) * Math.log((n_samples * n_intr_samples).fdiv(n_pr_samples * n_tr_samples))
42
+ end
43
+ end
44
+ end
45
+ mutual_information
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/evaluation_measure/mutual_information'
5
+
6
+ module Rumale
7
+ module EvaluationMeasure
8
+ # NormalizedMutualInformation is a class that calculates the normalized mutual information.
9
+ #
10
+ # @example
11
+ # require 'rumale/evaluation_measure/normalized_mutual_information'
12
+ #
13
+ # evaluator = Rumale::EvaluationMeasure::NormalizedMutualInformation.new
14
+ # puts evaluator.score(ground_truth, predicted)
15
+ #
16
+ # *Reference*
17
+ # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
18
+ # - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
19
+ class NormalizedMutualInformation
20
+ include ::Rumale::Base::Evaluator
21
+
22
+ # Calculate noramlzied mutual information
23
+ #
24
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
25
+ # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
26
+ # @return [Float] Normalized mutual information
27
+ def score(y_true, y_pred)
28
+ # calculate entropies.
29
+ class_entropy = entropy(y_true)
30
+ return 0.0 if class_entropy.zero?
31
+
32
+ cluster_entropy = entropy(y_pred)
33
+ return 0.0 if cluster_entropy.zero?
34
+
35
+ # calculate mutual information.
36
+ mi = MutualInformation.new
37
+ mi.score(y_true, y_pred) / Math.sqrt(class_entropy * cluster_entropy)
38
+ end
39
+
40
+ private
41
+
42
+ def entropy(y)
43
+ n_samples = y.size
44
+ indices = y.to_a.uniq
45
+ sum_log = indices.sum do |k|
46
+ ratio = y.eq(k).count.fdiv(n_samples)
47
+ ratio * Math.log(ratio)
48
+ end
49
+ -sum_log
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/evaluation_measure/precision_recall'
5
+
6
+ module Rumale
7
+ # This module consists of the classes for model evaluation.
8
+ module EvaluationMeasure
9
+ # Precision is a class that calculates the preicision of the predicted labels.
10
+ #
11
+ # @example
12
+ # require 'rumale/evaluation_measure/precision'
13
+ #
14
+ # evaluator = Rumale::EvaluationMeasure::Precision.new
15
+ # puts evaluator.score(ground_truth, predicted)
16
+ class Precision
17
+ include ::Rumale::Base::Evaluator
18
+ include ::Rumale::EvaluationMeasure::PrecisionRecall
19
+
20
+ # Return the average type for calculation of precision.
21
+ # @return [String] ('binary', 'micro', 'macro')
22
+ attr_reader :average
23
+
24
+ # Create a new evaluation measure calculater for precision score.
25
+ #
26
+ # @param average [String] The average type ('binary', 'micro', 'macro')
27
+ def initialize(average: 'binary')
28
+ @average = average
29
+ end
30
+
31
+ # Calculate average precision.
32
+ #
33
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
34
+ # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
35
+ # @return [Float] Average precision
36
+ def score(y_true, y_pred)
37
+ case @average
38
+ when 'binary'
39
+ precision_each_class(y_true, y_pred).last
40
+ when 'micro'
41
+ micro_average_precision(y_true, y_pred)
42
+ when 'macro'
43
+ macro_average_precision(y_true, y_pred)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require 'rumale/base/evaluator'
6
+
7
+ module Rumale
8
+ # This module consists of the classes for model evaluation.
9
+ module EvaluationMeasure
10
+ # @!visibility private
11
+ module PrecisionRecall
12
+ module_function
13
+
14
+ # @!visibility private
15
+ def precision_each_class(y_true, y_pred)
16
+ y_true.sort.to_a.uniq.map do |label|
17
+ target_positions = y_pred.eq(label)
18
+ next 0.0 if y_pred[target_positions].empty?
19
+
20
+ n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
21
+ n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
22
+ n_true_positives / (n_true_positives + n_false_positives)
23
+ end
24
+ end
25
+
26
+ # @!visibility private
27
+ def recall_each_class(y_true, y_pred)
28
+ y_true.sort.to_a.uniq.map do |label|
29
+ target_positions = y_true.eq(label)
30
+ next 0.0 if y_pred[target_positions].empty?
31
+
32
+ n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
33
+ n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
34
+ n_true_positives / (n_true_positives + n_false_negatives)
35
+ end
36
+ end
37
+
38
+ # @!visibility private
39
+ def f_score_each_class(y_true, y_pred)
40
+ precision_each_class(y_true, y_pred).zip(recall_each_class(y_true, y_pred)).map do |p, r|
41
+ next 0.0 if p.zero? && r.zero?
42
+
43
+ (2.0 * p * r) / (p + r)
44
+ end
45
+ end
46
+
47
+ # @!visibility private
48
+ def micro_average_precision(y_true, y_pred)
49
+ evaluated_values = y_true.sort.to_a.uniq.map do |label|
50
+ target_positions = y_pred.eq(label)
51
+ next [0.0, 0.0] if y_pred[target_positions].empty?
52
+
53
+ n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
54
+ n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
55
+ [n_true_positives, n_true_positives + n_false_positives]
56
+ end
57
+ res = evaluated_values.transpose.map(&:sum)
58
+ res.first / res.last
59
+ end
60
+
61
+ # @!visibility private
62
+ def micro_average_recall(y_true, y_pred)
63
+ evaluated_values = y_true.sort.to_a.uniq.map do |label|
64
+ target_positions = y_true.eq(label)
65
+ next 0.0 if y_pred[target_positions].empty?
66
+
67
+ n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
68
+ n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
69
+ [n_true_positives, n_true_positives + n_false_negatives]
70
+ end
71
+ res = evaluated_values.transpose.map(&:sum)
72
+ res.first / res.last
73
+ end
74
+
75
+ # @!visibility private
76
+ def micro_average_f_score(y_true, y_pred)
77
+ prec = micro_average_precision(y_true, y_pred)
78
+ recl = micro_average_recall(y_true, y_pred)
79
+ (2.0 * prec * recl) / (prec + recl)
80
+ end
81
+
82
+ # @!visibility private
83
+ def macro_average_precision(y_true, y_pred)
84
+ precision_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
85
+ end
86
+
87
+ # @!visibility private
88
+ def macro_average_recall(y_true, y_pred)
89
+ recall_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
90
+ end
91
+
92
+ # @!visibility private
93
+ def macro_average_f_score(y_true, y_pred)
94
+ f_score_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # Purity is a class that calculates the purity of cluatering results.
8
+ #
9
+ # @example
10
+ # require 'rumale/evaluation_measure/purity'
11
+ #
12
+ # evaluator = Rumale::EvaluationMeasure::Purity.new
13
+ # puts evaluator.score(ground_truth, predicted)
14
+ #
15
+ # *Reference*
16
+ # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
17
+ class Purity
18
+ include ::Rumale::Base::Evaluator
19
+
20
+ # Calculate purity
21
+ #
22
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
23
+ # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
24
+ # @return [Float] Purity
25
+ def score(y_true, y_pred)
26
+ # initiazlie some variables.
27
+ purity = 0
28
+ n_samples = y_pred.size
29
+ class_ids = y_true.to_a.uniq
30
+ cluster_ids = y_pred.to_a.uniq
31
+ # calculate purity.
32
+ cluster_ids.each do |k|
33
+ pr_sample_ids = y_pred.eq(k).where.to_a
34
+ purity += class_ids.map { |j| (pr_sample_ids & y_true.eq(j).where.to_a).size }.max
35
+ end
36
+ purity.fdiv(n_samples)
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # R2Score is a class that calculates the coefficient of determination for the predicted values.
8
+ #
9
+ # @example
10
+ # require 'rumale/evaluation_measure/r2_score'
11
+ #
12
+ # evaluator = Rumale::EvaluationMeasure::R2Score.new
13
+ # puts evaluator.score(ground_truth, predicted)
14
+ class R2Score
15
+ include ::Rumale::Base::Evaluator
16
+
17
+ # Create a new evaluation measure calculater for coefficient of determination.
18
+ def initialize; end
19
+
20
+ # Calculate the coefficient of determination.
21
+ #
22
+ # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
23
+ # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated taget values.
24
+ # @return [Float] Coefficient of determination
25
+ def score(y_true, y_pred)
26
+ n_samples, n_outputs = y_true.shape
27
+ numerator = ((y_true - y_pred)**2).sum(axis: 0)
28
+ yt_mean = y_true.sum(axis: 0) / n_samples
29
+ denominator = ((y_true - yt_mean)**2).sum(axis: 0)
30
+ if n_outputs.nil?
31
+ denominator.zero? ? 0.0 : 1.0 - numerator / denominator
32
+ else
33
+ scores = 1 - numerator / denominator
34
+ scores[denominator.eq(0)] = 0.0
35
+ scores.sum / scores.size
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/evaluation_measure/precision_recall'
5
+
6
+ module Rumale
7
+ # This module consists of the classes for model evaluation.
8
+ module EvaluationMeasure
9
+ # Recall is a class that calculates the recall of the predicted labels.
10
+ #
11
+ # @example
12
+ # require 'rumale/evaluation_measure/recall'
13
+ #
14
+ # evaluator = Rumale::EvaluationMeasure::Recall.new
15
+ # puts evaluator.score(ground_truth, predicted)
16
+ class Recall
17
+ include ::Rumale::Base::Evaluator
18
+ include ::Rumale::EvaluationMeasure::PrecisionRecall
19
+
20
+ # Return the average type for calculation of recall.
21
+ # @return [String] ('binary', 'micro', 'macro')
22
+ attr_reader :average
23
+
24
+ # Create a new evaluation measure calculater for recall score.
25
+ #
26
+ # @param average [String] The average type ('binary', 'micro', 'macro')
27
+ def initialize(average: 'binary')
28
+ @average = average
29
+ end
30
+
31
+ # Calculate average recall
32
+ #
33
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
34
+ # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
35
+ # @return [Float] Average recall
36
+ def score(y_true, y_pred)
37
+ case @average
38
+ when 'binary'
39
+ recall_each_class(y_true, y_pred).last
40
+ when 'micro'
41
+ micro_average_recall(y_true, y_pred)
42
+ when 'macro'
43
+ macro_average_recall(y_true, y_pred)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
8
+ #
9
+ # @example
10
+ # require 'rumale/preprocessing'
11
+ # require 'rumale/linear_model'
12
+ # require 'rumale/evaluation_measure/roc_auc'
13
+ #
14
+ # # Encode labels to integer array.
15
+ # labels = %w[A B B C A A C C C A]
16
+ # label_encoder = Rumale::Preprocessing::LabelEncoder.new
17
+ # y = label_encoder.fit_transform(labels)
18
+ # # Fit classifier.
19
+ # classifier = Rumale::LinearModel::LogisticRegression.new
20
+ # classifier.fit(x, y)
21
+ # # Predict class probabilities.
22
+ # y_score = classifier.predict_proba(x)
23
+ # # Encode labels to one-hot vectors.
24
+ # one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
25
+ # y_onehot = one_hot_encoder.fit_transform(y)
26
+ # # Calculate ROC AUC.
27
+ # evaluator = Rumale::EvaluationMeasure::ROCAUC.new
28
+ # puts evaluator.score(y_onehot, y_score)
29
+ class ROCAUC
30
+ include ::Rumale::Base::Evaluator
31
+
32
+ # Calculate area under the receiver operation characteristic curve (ROC AUC).
33
+ #
34
+ # @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
35
+ # Ground truth binary labels or one-hot encoded multi-labels.
36
+ # @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
37
+ # Predicted class probabilities or confidence scores.
38
+ # @return [Float] (macro-averaged) ROC AUC.
39
+ def score(y_true, y_score)
40
+ n_classes = y_score.shape[1]
41
+ if n_classes.nil?
42
+ fpr, tpr, = roc_curve(y_true, y_score)
43
+ return auc(fpr, tpr)
44
+ end
45
+
46
+ scores = Array.new(n_classes) do |c|
47
+ fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
48
+ auc(fpr, tpr)
49
+ end
50
+
51
+ scores.sum.fdiv(n_classes)
52
+ end
53
+
54
+ # Calculate receiver operation characteristic curve.
55
+ #
56
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
57
+ # @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
58
+ # @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
59
+ # If nil is given, the method considers the maximum value of the label as a positive label.
60
+ # @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
61
+ # thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
62
+ def roc_curve(y_true, y_score, pos_label = nil)
63
+ labels = y_true.to_a.uniq
64
+ if pos_label.nil?
65
+ unless labels.size == 2
66
+ raise ArgumentError,
67
+ 'y_true must be binary labels or pos_label must be specified if y_true is multi-label'
68
+ end
69
+ else
70
+ unless y_true.to_a.uniq.include?(pos_label)
71
+ raise ArgumentError,
72
+ 'y_true must have elements whose values are pos_label.'
73
+ end
74
+ end
75
+
76
+ false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
77
+
78
+ if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
79
+ # NOTE: Numo::NArray#insert is not a destructive method.
80
+ # rubocop:disable Style/RedundantSelfAssignment
81
+ true_pos = true_pos.insert(0, 0)
82
+ false_pos = false_pos.insert(0, 0)
83
+ thresholds = thresholds.insert(0, thresholds[0] + 1)
84
+ # rubocop:enable Style/RedundantSelfAssignment
85
+ end
86
+
87
+ tpr = true_pos / true_pos[-1].to_f
88
+ fpr = false_pos / false_pos[-1].to_f
89
+
90
+ [fpr, tpr, thresholds]
91
+ end
92
+
93
+ # Calculate area under the curve using the trapezoidal rule.
94
+ #
95
+ # @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
96
+ # x coordinates. These are expected to monotonously increase or decrease.
97
+ # @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
98
+ # @return [Float] area under the curve.
99
+ def auc(x, y)
100
+ n_samples = [x.shape[0], y.shape[0]].min
101
+ raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
102
+
103
+ (0...n_samples).to_a.each_cons(2).sum { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }
104
+ end
105
+
106
+ private
107
+
108
+ def binary_roc_curve(y_true, y_score, pos_label = nil)
109
+ pos_label = y_true.to_a.uniq.max if pos_label.nil?
110
+
111
+ bin_y_true = y_true.eq(pos_label)
112
+ desc_pred_ids = y_score.sort_index.reverse
113
+
114
+ desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
115
+ desc_y_score = y_score[desc_pred_ids]
116
+
117
+ threshold_ids = Numo::Int32.cast(desc_y_score.diff.ne(0).where.to_a.append(desc_y_true.size - 1))
118
+
119
+ true_pos = desc_y_true.cumsum[threshold_ids]
120
+ false_pos = 1 + threshold_ids - true_pos
121
+
122
+ [false_pos, true_pos, desc_y_score[threshold_ids]]
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+ require 'rumale/pairwise_metric'
5
+
6
+ module Rumale
7
+ module EvaluationMeasure
8
+ # SilhouetteScore is a class that calculates the Silhouette Coefficient.
9
+ #
10
+ # @example
11
+ # require 'rumale/evaluation_measure/silhouette_score'
12
+ #
13
+ # evaluator = Rumale::EvaluationMeasure::SilhouetteScore.new
14
+ # puts evaluator.score(x, predicted)
15
+ #
16
+ # *Reference*
17
+ # - Rousseuw, P J., "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis," Journal of Computational and Applied Mathematics, Vol. 20, pp. 53--65, 1987.
18
+ class SilhouetteScore
19
+ include ::Rumale::Base::Evaluator
20
+
21
+ # Create a new evaluator that calculates the silhouette coefficient.
22
+ #
23
+ # @param metric [String] The metric to calculate the sihouette coefficient.
24
+ # If metric is 'euclidean', Euclidean distance is used for dissimilarity between sample points.
25
+ # If metric is 'precomputed', the score method expects to be given a distance matrix.
26
+ def initialize(metric: 'euclidean')
27
+ @metric = metric
28
+ end
29
+
30
+ # Calculates the silhouette coefficient.
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
33
+ # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
34
+ # @return [Float] The mean of silhouette coefficient.
35
+ def score(x, y)
36
+ dist_mat = @metric == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x)
37
+
38
+ labels = y.to_a.uniq.sort
39
+ n_clusters = labels.size
40
+ n_samples = dist_mat.shape[0]
41
+
42
+ intra_dists = Numo::DFloat.zeros(n_samples)
43
+ n_clusters.times do |n|
44
+ cls_pos = y.eq(labels[n])
45
+ sz_cluster = cls_pos.count
46
+ next unless sz_cluster > 1
47
+
48
+ cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
49
+ cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
50
+ intra_dists[cls_pos] = cls_dist_mat.sum(axis: 0) / (sz_cluster - 1)
51
+ end
52
+
53
+ inter_dists = Numo::DFloat.zeros(n_samples) + Float::INFINITY
54
+ n_clusters.times do |m|
55
+ cls_pos = y.eq(labels[m])
56
+ n_clusters.times do |n|
57
+ next if m == n
58
+
59
+ not_cls_pos = y.eq(labels[n])
60
+ inter_dists[cls_pos] = Numo::DFloat.minimum(
61
+ inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
62
+ )
63
+ end
64
+ end
65
+
66
+ mask = Numo::DFloat.ones(n_samples)
67
+ n_clusters.times do |n|
68
+ cls_pos = y.eq(labels[n])
69
+ mask[cls_pos] = 0 unless cls_pos.count > 1
70
+ end
71
+
72
+ silhouettes = mask * ((inter_dists - intra_dists) / Numo::DFloat.maximum(inter_dists, intra_dists))
73
+ silhouettes[silhouettes.isnan] = 0.0
74
+
75
+ silhouettes.mean
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rumale is a machine learning library in Ruby.
4
+ module Rumale
5
+ # This module consists of the classes for model evaluation.
6
+ module EvaluationMeasure
7
+ # @!visibility private
8
+ VERSION = '0.24.0'
9
+ end
10
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'numo/narray'
4
+
5
+ require_relative 'evaluation_measure/version'
6
+
7
+ require_relative 'evaluation_measure/accuracy'
8
+ require_relative 'evaluation_measure/adjusted_rand_score'
9
+ require_relative 'evaluation_measure/calinski_harabasz_score'
10
+ require_relative 'evaluation_measure/davies_bouldin_score'
11
+ require_relative 'evaluation_measure/explained_variance_score'
12
+ require_relative 'evaluation_measure/f_score'
13
+ require_relative 'evaluation_measure/function'
14
+ require_relative 'evaluation_measure/log_loss'
15
+ require_relative 'evaluation_measure/mean_absolute_error'
16
+ require_relative 'evaluation_measure/mean_squared_error'
17
+ require_relative 'evaluation_measure/mean_squared_log_error'
18
+ require_relative 'evaluation_measure/median_absolute_error'
19
+ require_relative 'evaluation_measure/mutual_information'
20
+ require_relative 'evaluation_measure/normalized_mutual_information'
21
+ require_relative 'evaluation_measure/precision'
22
+ require_relative 'evaluation_measure/precision_recall'
23
+ require_relative 'evaluation_measure/purity'
24
+ require_relative 'evaluation_measure/r2_score'
25
+ require_relative 'evaluation_measure/recall'
26
+ require_relative 'evaluation_measure/roc_auc'
27
+ require_relative 'evaluation_measure/silhouette_score'