rumale-evaluation_measure 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/evaluation_measure/accuracy.rb +27 -0
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +73 -0
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +54 -0
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +47 -0
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +37 -0
- data/lib/rumale/evaluation_measure/f_score.rb +48 -0
- data/lib/rumale/evaluation_measure/function.rb +143 -0
- data/lib/rumale/evaluation_measure/log_loss.rb +53 -0
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +27 -0
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mutual_information.rb +49 -0
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +53 -0
- data/lib/rumale/evaluation_measure/precision.rb +48 -0
- data/lib/rumale/evaluation_measure/precision_recall.rb +98 -0
- data/lib/rumale/evaluation_measure/purity.rb +40 -0
- data/lib/rumale/evaluation_measure/r2_score.rb +40 -0
- data/lib/rumale/evaluation_measure/recall.rb +48 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +126 -0
- data/lib/rumale/evaluation_measure/silhouette_score.rb +79 -0
- data/lib/rumale/evaluation_measure/version.rb +10 -0
- data/lib/rumale/evaluation_measure.rb +27 -0
- metadata +104 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MutualInformation is a class that calculates the mutual information.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mutual_information'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MutualInformation.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
|
17
|
+
class MutualInformation
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculate mutual information
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
23
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
24
|
+
# @return [Float] Mutual information.
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
# initiazlie some variables.
|
27
|
+
mutual_information = 0.0
|
28
|
+
n_samples = y_pred.size
|
29
|
+
class_ids = y_true.to_a.uniq
|
30
|
+
cluster_ids = y_pred.to_a.uniq
|
31
|
+
# calculate mutual information.
|
32
|
+
cluster_ids.map do |k|
|
33
|
+
pr_sample_ids = y_pred.eq(k).where.to_a
|
34
|
+
n_pr_samples = pr_sample_ids.size
|
35
|
+
class_ids.map do |j|
|
36
|
+
tr_sample_ids = y_true.eq(j).where.to_a
|
37
|
+
n_tr_samples = tr_sample_ids.size
|
38
|
+
n_intr_samples = (pr_sample_ids & tr_sample_ids).size
|
39
|
+
if n_intr_samples.positive?
|
40
|
+
mutual_information +=
|
41
|
+
n_intr_samples.fdiv(n_samples) * Math.log((n_samples * n_intr_samples).fdiv(n_pr_samples * n_tr_samples))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
mutual_information
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/mutual_information'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# NormalizedMutualInformation is a class that calculates the normalized mutual information.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/evaluation_measure/normalized_mutual_information'
|
12
|
+
#
|
13
|
+
# evaluator = Rumale::EvaluationMeasure::NormalizedMutualInformation.new
|
14
|
+
# puts evaluator.score(ground_truth, predicted)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
18
|
+
# - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
|
19
|
+
class NormalizedMutualInformation
|
20
|
+
include ::Rumale::Base::Evaluator
|
21
|
+
|
22
|
+
# Calculate noramlzied mutual information
|
23
|
+
#
|
24
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
25
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
26
|
+
# @return [Float] Normalized mutual information
|
27
|
+
def score(y_true, y_pred)
|
28
|
+
# calculate entropies.
|
29
|
+
class_entropy = entropy(y_true)
|
30
|
+
return 0.0 if class_entropy.zero?
|
31
|
+
|
32
|
+
cluster_entropy = entropy(y_pred)
|
33
|
+
return 0.0 if cluster_entropy.zero?
|
34
|
+
|
35
|
+
# calculate mutual information.
|
36
|
+
mi = MutualInformation.new
|
37
|
+
mi.score(y_true, y_pred) / Math.sqrt(class_entropy * cluster_entropy)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def entropy(y)
|
43
|
+
n_samples = y.size
|
44
|
+
indices = y.to_a.uniq
|
45
|
+
sum_log = indices.sum do |k|
|
46
|
+
ratio = y.eq(k).count.fdiv(n_samples)
|
47
|
+
ratio * Math.log(ratio)
|
48
|
+
end
|
49
|
+
-sum_log
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/precision_recall'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes for model evaluation.
|
8
|
+
module EvaluationMeasure
|
9
|
+
# Precision is a class that calculates the preicision of the predicted labels.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/evaluation_measure/precision'
|
13
|
+
#
|
14
|
+
# evaluator = Rumale::EvaluationMeasure::Precision.new
|
15
|
+
# puts evaluator.score(ground_truth, predicted)
|
16
|
+
class Precision
|
17
|
+
include ::Rumale::Base::Evaluator
|
18
|
+
include ::Rumale::EvaluationMeasure::PrecisionRecall
|
19
|
+
|
20
|
+
# Return the average type for calculation of precision.
|
21
|
+
# @return [String] ('binary', 'micro', 'macro')
|
22
|
+
attr_reader :average
|
23
|
+
|
24
|
+
# Create a new evaluation measure calculater for precision score.
|
25
|
+
#
|
26
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
27
|
+
def initialize(average: 'binary')
|
28
|
+
@average = average
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate average precision.
|
32
|
+
#
|
33
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
34
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
35
|
+
# @return [Float] Average precision
|
36
|
+
def score(y_true, y_pred)
|
37
|
+
case @average
|
38
|
+
when 'binary'
|
39
|
+
precision_each_class(y_true, y_pred).last
|
40
|
+
when 'micro'
|
41
|
+
micro_average_precision(y_true, y_pred)
|
42
|
+
when 'macro'
|
43
|
+
macro_average_precision(y_true, y_pred)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require 'rumale/base/evaluator'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
# This module consists of the classes for model evaluation.
|
9
|
+
module EvaluationMeasure
|
10
|
+
# @!visibility private
|
11
|
+
module PrecisionRecall
|
12
|
+
module_function
|
13
|
+
|
14
|
+
# @!visibility private
|
15
|
+
def precision_each_class(y_true, y_pred)
|
16
|
+
y_true.sort.to_a.uniq.map do |label|
|
17
|
+
target_positions = y_pred.eq(label)
|
18
|
+
next 0.0 if y_pred[target_positions].empty?
|
19
|
+
|
20
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
21
|
+
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
22
|
+
n_true_positives / (n_true_positives + n_false_positives)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# @!visibility private
|
27
|
+
def recall_each_class(y_true, y_pred)
|
28
|
+
y_true.sort.to_a.uniq.map do |label|
|
29
|
+
target_positions = y_true.eq(label)
|
30
|
+
next 0.0 if y_pred[target_positions].empty?
|
31
|
+
|
32
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
33
|
+
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
34
|
+
n_true_positives / (n_true_positives + n_false_negatives)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# @!visibility private
|
39
|
+
def f_score_each_class(y_true, y_pred)
|
40
|
+
precision_each_class(y_true, y_pred).zip(recall_each_class(y_true, y_pred)).map do |p, r|
|
41
|
+
next 0.0 if p.zero? && r.zero?
|
42
|
+
|
43
|
+
(2.0 * p * r) / (p + r)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# @!visibility private
|
48
|
+
def micro_average_precision(y_true, y_pred)
|
49
|
+
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
50
|
+
target_positions = y_pred.eq(label)
|
51
|
+
next [0.0, 0.0] if y_pred[target_positions].empty?
|
52
|
+
|
53
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
54
|
+
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
55
|
+
[n_true_positives, n_true_positives + n_false_positives]
|
56
|
+
end
|
57
|
+
res = evaluated_values.transpose.map(&:sum)
|
58
|
+
res.first / res.last
|
59
|
+
end
|
60
|
+
|
61
|
+
# @!visibility private
|
62
|
+
def micro_average_recall(y_true, y_pred)
|
63
|
+
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
64
|
+
target_positions = y_true.eq(label)
|
65
|
+
next 0.0 if y_pred[target_positions].empty?
|
66
|
+
|
67
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
68
|
+
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
69
|
+
[n_true_positives, n_true_positives + n_false_negatives]
|
70
|
+
end
|
71
|
+
res = evaluated_values.transpose.map(&:sum)
|
72
|
+
res.first / res.last
|
73
|
+
end
|
74
|
+
|
75
|
+
# @!visibility private
|
76
|
+
def micro_average_f_score(y_true, y_pred)
|
77
|
+
prec = micro_average_precision(y_true, y_pred)
|
78
|
+
recl = micro_average_recall(y_true, y_pred)
|
79
|
+
(2.0 * prec * recl) / (prec + recl)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @!visibility private
|
83
|
+
def macro_average_precision(y_true, y_pred)
|
84
|
+
precision_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
|
85
|
+
end
|
86
|
+
|
87
|
+
# @!visibility private
|
88
|
+
def macro_average_recall(y_true, y_pred)
|
89
|
+
recall_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
|
90
|
+
end
|
91
|
+
|
92
|
+
# @!visibility private
|
93
|
+
def macro_average_f_score(y_true, y_pred)
|
94
|
+
f_score_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# Purity is a class that calculates the purity of cluatering results.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/purity'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::Purity.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
17
|
+
class Purity
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculate purity
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
23
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
24
|
+
# @return [Float] Purity
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
# initiazlie some variables.
|
27
|
+
purity = 0
|
28
|
+
n_samples = y_pred.size
|
29
|
+
class_ids = y_true.to_a.uniq
|
30
|
+
cluster_ids = y_pred.to_a.uniq
|
31
|
+
# calculate purity.
|
32
|
+
cluster_ids.each do |k|
|
33
|
+
pr_sample_ids = y_pred.eq(k).where.to_a
|
34
|
+
purity += class_ids.map { |j| (pr_sample_ids & y_true.eq(j).where.to_a).size }.max
|
35
|
+
end
|
36
|
+
purity.fdiv(n_samples)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# R2Score is a class that calculates the coefficient of determination for the predicted values.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/r2_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::R2Score.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class R2Score
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Create a new evaluation measure calculater for coefficient of determination.
|
18
|
+
def initialize; end
|
19
|
+
|
20
|
+
# Calculate the coefficient of determination.
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
23
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated taget values.
|
24
|
+
# @return [Float] Coefficient of determination
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
n_samples, n_outputs = y_true.shape
|
27
|
+
numerator = ((y_true - y_pred)**2).sum(axis: 0)
|
28
|
+
yt_mean = y_true.sum(axis: 0) / n_samples
|
29
|
+
denominator = ((y_true - yt_mean)**2).sum(axis: 0)
|
30
|
+
if n_outputs.nil?
|
31
|
+
denominator.zero? ? 0.0 : 1.0 - numerator / denominator
|
32
|
+
else
|
33
|
+
scores = 1 - numerator / denominator
|
34
|
+
scores[denominator.eq(0)] = 0.0
|
35
|
+
scores.sum / scores.size
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/precision_recall'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes for model evaluation.
|
8
|
+
module EvaluationMeasure
|
9
|
+
# Recall is a class that calculates the recall of the predicted labels.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/evaluation_measure/recall'
|
13
|
+
#
|
14
|
+
# evaluator = Rumale::EvaluationMeasure::Recall.new
|
15
|
+
# puts evaluator.score(ground_truth, predicted)
|
16
|
+
class Recall
|
17
|
+
include ::Rumale::Base::Evaluator
|
18
|
+
include ::Rumale::EvaluationMeasure::PrecisionRecall
|
19
|
+
|
20
|
+
# Return the average type for calculation of recall.
|
21
|
+
# @return [String] ('binary', 'micro', 'macro')
|
22
|
+
attr_reader :average
|
23
|
+
|
24
|
+
# Create a new evaluation measure calculater for recall score.
|
25
|
+
#
|
26
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
27
|
+
def initialize(average: 'binary')
|
28
|
+
@average = average
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate average recall
|
32
|
+
#
|
33
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
34
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
35
|
+
# @return [Float] Average recall
|
36
|
+
def score(y_true, y_pred)
|
37
|
+
case @average
|
38
|
+
when 'binary'
|
39
|
+
recall_each_class(y_true, y_pred).last
|
40
|
+
when 'micro'
|
41
|
+
micro_average_recall(y_true, y_pred)
|
42
|
+
when 'macro'
|
43
|
+
macro_average_recall(y_true, y_pred)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/preprocessing'
|
11
|
+
# require 'rumale/linear_model'
|
12
|
+
# require 'rumale/evaluation_measure/roc_auc'
|
13
|
+
#
|
14
|
+
# # Encode labels to integer array.
|
15
|
+
# labels = %w[A B B C A A C C C A]
|
16
|
+
# label_encoder = Rumale::Preprocessing::LabelEncoder.new
|
17
|
+
# y = label_encoder.fit_transform(labels)
|
18
|
+
# # Fit classifier.
|
19
|
+
# classifier = Rumale::LinearModel::LogisticRegression.new
|
20
|
+
# classifier.fit(x, y)
|
21
|
+
# # Predict class probabilities.
|
22
|
+
# y_score = classifier.predict_proba(x)
|
23
|
+
# # Encode labels to one-hot vectors.
|
24
|
+
# one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
|
25
|
+
# y_onehot = one_hot_encoder.fit_transform(y)
|
26
|
+
# # Calculate ROC AUC.
|
27
|
+
# evaluator = Rumale::EvaluationMeasure::ROCAUC.new
|
28
|
+
# puts evaluator.score(y_onehot, y_score)
|
29
|
+
class ROCAUC
|
30
|
+
include ::Rumale::Base::Evaluator
|
31
|
+
|
32
|
+
# Calculate area under the receiver operation characteristic curve (ROC AUC).
|
33
|
+
#
|
34
|
+
# @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
|
35
|
+
# Ground truth binary labels or one-hot encoded multi-labels.
|
36
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
|
37
|
+
# Predicted class probabilities or confidence scores.
|
38
|
+
# @return [Float] (macro-averaged) ROC AUC.
|
39
|
+
def score(y_true, y_score)
|
40
|
+
n_classes = y_score.shape[1]
|
41
|
+
if n_classes.nil?
|
42
|
+
fpr, tpr, = roc_curve(y_true, y_score)
|
43
|
+
return auc(fpr, tpr)
|
44
|
+
end
|
45
|
+
|
46
|
+
scores = Array.new(n_classes) do |c|
|
47
|
+
fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
|
48
|
+
auc(fpr, tpr)
|
49
|
+
end
|
50
|
+
|
51
|
+
scores.sum.fdiv(n_classes)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Calculate receiver operation characteristic curve.
|
55
|
+
#
|
56
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
|
57
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
|
58
|
+
# @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
|
59
|
+
# If nil is given, the method considers the maximum value of the label as a positive label.
|
60
|
+
# @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
|
61
|
+
# thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
|
62
|
+
def roc_curve(y_true, y_score, pos_label = nil)
|
63
|
+
labels = y_true.to_a.uniq
|
64
|
+
if pos_label.nil?
|
65
|
+
unless labels.size == 2
|
66
|
+
raise ArgumentError,
|
67
|
+
'y_true must be binary labels or pos_label must be specified if y_true is multi-label'
|
68
|
+
end
|
69
|
+
else
|
70
|
+
unless y_true.to_a.uniq.include?(pos_label)
|
71
|
+
raise ArgumentError,
|
72
|
+
'y_true must have elements whose values are pos_label.'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
|
77
|
+
|
78
|
+
if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
|
79
|
+
# NOTE: Numo::NArray#insert is not a destructive method.
|
80
|
+
# rubocop:disable Style/RedundantSelfAssignment
|
81
|
+
true_pos = true_pos.insert(0, 0)
|
82
|
+
false_pos = false_pos.insert(0, 0)
|
83
|
+
thresholds = thresholds.insert(0, thresholds[0] + 1)
|
84
|
+
# rubocop:enable Style/RedundantSelfAssignment
|
85
|
+
end
|
86
|
+
|
87
|
+
tpr = true_pos / true_pos[-1].to_f
|
88
|
+
fpr = false_pos / false_pos[-1].to_f
|
89
|
+
|
90
|
+
[fpr, tpr, thresholds]
|
91
|
+
end
|
92
|
+
|
93
|
+
# Calculate area under the curve using the trapezoidal rule.
|
94
|
+
#
|
95
|
+
# @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
|
96
|
+
# x coordinates. These are expected to monotonously increase or decrease.
|
97
|
+
# @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
|
98
|
+
# @return [Float] area under the curve.
|
99
|
+
def auc(x, y)
|
100
|
+
n_samples = [x.shape[0], y.shape[0]].min
|
101
|
+
raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
|
102
|
+
|
103
|
+
(0...n_samples).to_a.each_cons(2).sum { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def binary_roc_curve(y_true, y_score, pos_label = nil)
|
109
|
+
pos_label = y_true.to_a.uniq.max if pos_label.nil?
|
110
|
+
|
111
|
+
bin_y_true = y_true.eq(pos_label)
|
112
|
+
desc_pred_ids = y_score.sort_index.reverse
|
113
|
+
|
114
|
+
desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
|
115
|
+
desc_y_score = y_score[desc_pred_ids]
|
116
|
+
|
117
|
+
threshold_ids = Numo::Int32.cast(desc_y_score.diff.ne(0).where.to_a.append(desc_y_true.size - 1))
|
118
|
+
|
119
|
+
true_pos = desc_y_true.cumsum[threshold_ids]
|
120
|
+
false_pos = 1 + threshold_ids - true_pos
|
121
|
+
|
122
|
+
[false_pos, true_pos, desc_y_score[threshold_ids]]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/pairwise_metric'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# SilhouetteScore is a class that calculates the Silhouette Coefficient.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/evaluation_measure/silhouette_score'
|
12
|
+
#
|
13
|
+
# evaluator = Rumale::EvaluationMeasure::SilhouetteScore.new
|
14
|
+
# puts evaluator.score(x, predicted)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Rousseuw, P J., "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis," Journal of Computational and Applied Mathematics, Vol. 20, pp. 53--65, 1987.
|
18
|
+
class SilhouetteScore
|
19
|
+
include ::Rumale::Base::Evaluator
|
20
|
+
|
21
|
+
# Create a new evaluator that calculates the silhouette coefficient.
|
22
|
+
#
|
23
|
+
# @param metric [String] The metric to calculate the sihouette coefficient.
|
24
|
+
# If metric is 'euclidean', Euclidean distance is used for dissimilarity between sample points.
|
25
|
+
# If metric is 'precomputed', the score method expects to be given a distance matrix.
|
26
|
+
def initialize(metric: 'euclidean')
|
27
|
+
@metric = metric
|
28
|
+
end
|
29
|
+
|
30
|
+
# Calculates the silhouette coefficient.
|
31
|
+
#
|
32
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
33
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
34
|
+
# @return [Float] The mean of silhouette coefficient.
|
35
|
+
def score(x, y)
|
36
|
+
dist_mat = @metric == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x)
|
37
|
+
|
38
|
+
labels = y.to_a.uniq.sort
|
39
|
+
n_clusters = labels.size
|
40
|
+
n_samples = dist_mat.shape[0]
|
41
|
+
|
42
|
+
intra_dists = Numo::DFloat.zeros(n_samples)
|
43
|
+
n_clusters.times do |n|
|
44
|
+
cls_pos = y.eq(labels[n])
|
45
|
+
sz_cluster = cls_pos.count
|
46
|
+
next unless sz_cluster > 1
|
47
|
+
|
48
|
+
cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
|
49
|
+
cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
|
50
|
+
intra_dists[cls_pos] = cls_dist_mat.sum(axis: 0) / (sz_cluster - 1)
|
51
|
+
end
|
52
|
+
|
53
|
+
inter_dists = Numo::DFloat.zeros(n_samples) + Float::INFINITY
|
54
|
+
n_clusters.times do |m|
|
55
|
+
cls_pos = y.eq(labels[m])
|
56
|
+
n_clusters.times do |n|
|
57
|
+
next if m == n
|
58
|
+
|
59
|
+
not_cls_pos = y.eq(labels[n])
|
60
|
+
inter_dists[cls_pos] = Numo::DFloat.minimum(
|
61
|
+
inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
|
62
|
+
)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
mask = Numo::DFloat.ones(n_samples)
|
67
|
+
n_clusters.times do |n|
|
68
|
+
cls_pos = y.eq(labels[n])
|
69
|
+
mask[cls_pos] = 0 unless cls_pos.count > 1
|
70
|
+
end
|
71
|
+
|
72
|
+
silhouettes = mask * ((inter_dists - intra_dists) / Numo::DFloat.maximum(inter_dists, intra_dists))
|
73
|
+
silhouettes[silhouettes.isnan] = 0.0
|
74
|
+
|
75
|
+
silhouettes.mean
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require_relative 'evaluation_measure/version'
|
6
|
+
|
7
|
+
require_relative 'evaluation_measure/accuracy'
|
8
|
+
require_relative 'evaluation_measure/adjusted_rand_score'
|
9
|
+
require_relative 'evaluation_measure/calinski_harabasz_score'
|
10
|
+
require_relative 'evaluation_measure/davies_bouldin_score'
|
11
|
+
require_relative 'evaluation_measure/explained_variance_score'
|
12
|
+
require_relative 'evaluation_measure/f_score'
|
13
|
+
require_relative 'evaluation_measure/function'
|
14
|
+
require_relative 'evaluation_measure/log_loss'
|
15
|
+
require_relative 'evaluation_measure/mean_absolute_error'
|
16
|
+
require_relative 'evaluation_measure/mean_squared_error'
|
17
|
+
require_relative 'evaluation_measure/mean_squared_log_error'
|
18
|
+
require_relative 'evaluation_measure/median_absolute_error'
|
19
|
+
require_relative 'evaluation_measure/mutual_information'
|
20
|
+
require_relative 'evaluation_measure/normalized_mutual_information'
|
21
|
+
require_relative 'evaluation_measure/precision'
|
22
|
+
require_relative 'evaluation_measure/precision_recall'
|
23
|
+
require_relative 'evaluation_measure/purity'
|
24
|
+
require_relative 'evaluation_measure/r2_score'
|
25
|
+
require_relative 'evaluation_measure/recall'
|
26
|
+
require_relative 'evaluation_measure/roc_auc'
|
27
|
+
require_relative 'evaluation_measure/silhouette_score'
|