rumale-evaluation_measure 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/evaluation_measure/accuracy.rb +27 -0
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +73 -0
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +54 -0
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +47 -0
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +37 -0
- data/lib/rumale/evaluation_measure/f_score.rb +48 -0
- data/lib/rumale/evaluation_measure/function.rb +143 -0
- data/lib/rumale/evaluation_measure/log_loss.rb +53 -0
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +27 -0
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mutual_information.rb +49 -0
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +53 -0
- data/lib/rumale/evaluation_measure/precision.rb +48 -0
- data/lib/rumale/evaluation_measure/precision_recall.rb +98 -0
- data/lib/rumale/evaluation_measure/purity.rb +40 -0
- data/lib/rumale/evaluation_measure/r2_score.rb +40 -0
- data/lib/rumale/evaluation_measure/recall.rb +48 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +126 -0
- data/lib/rumale/evaluation_measure/silhouette_score.rb +79 -0
- data/lib/rumale/evaluation_measure/version.rb +10 -0
- data/lib/rumale/evaluation_measure.rb +27 -0
- metadata +104 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MutualInformation is a class that calculates the mutual information.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mutual_information'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MutualInformation.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
|
17
|
+
class MutualInformation
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculate mutual information
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
23
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
24
|
+
# @return [Float] Mutual information.
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
# initiazlie some variables.
|
27
|
+
mutual_information = 0.0
|
28
|
+
n_samples = y_pred.size
|
29
|
+
class_ids = y_true.to_a.uniq
|
30
|
+
cluster_ids = y_pred.to_a.uniq
|
31
|
+
# calculate mutual information.
|
32
|
+
cluster_ids.map do |k|
|
33
|
+
pr_sample_ids = y_pred.eq(k).where.to_a
|
34
|
+
n_pr_samples = pr_sample_ids.size
|
35
|
+
class_ids.map do |j|
|
36
|
+
tr_sample_ids = y_true.eq(j).where.to_a
|
37
|
+
n_tr_samples = tr_sample_ids.size
|
38
|
+
n_intr_samples = (pr_sample_ids & tr_sample_ids).size
|
39
|
+
if n_intr_samples.positive?
|
40
|
+
mutual_information +=
|
41
|
+
n_intr_samples.fdiv(n_samples) * Math.log((n_samples * n_intr_samples).fdiv(n_pr_samples * n_tr_samples))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
mutual_information
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/mutual_information'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# NormalizedMutualInformation is a class that calculates the normalized mutual information.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/evaluation_measure/normalized_mutual_information'
|
12
|
+
#
|
13
|
+
# evaluator = Rumale::EvaluationMeasure::NormalizedMutualInformation.new
|
14
|
+
# puts evaluator.score(ground_truth, predicted)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
18
|
+
# - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
|
19
|
+
class NormalizedMutualInformation
|
20
|
+
include ::Rumale::Base::Evaluator
|
21
|
+
|
22
|
+
# Calculate noramlzied mutual information
|
23
|
+
#
|
24
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
25
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
26
|
+
# @return [Float] Normalized mutual information
|
27
|
+
def score(y_true, y_pred)
|
28
|
+
# calculate entropies.
|
29
|
+
class_entropy = entropy(y_true)
|
30
|
+
return 0.0 if class_entropy.zero?
|
31
|
+
|
32
|
+
cluster_entropy = entropy(y_pred)
|
33
|
+
return 0.0 if cluster_entropy.zero?
|
34
|
+
|
35
|
+
# calculate mutual information.
|
36
|
+
mi = MutualInformation.new
|
37
|
+
mi.score(y_true, y_pred) / Math.sqrt(class_entropy * cluster_entropy)
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def entropy(y)
|
43
|
+
n_samples = y.size
|
44
|
+
indices = y.to_a.uniq
|
45
|
+
sum_log = indices.sum do |k|
|
46
|
+
ratio = y.eq(k).count.fdiv(n_samples)
|
47
|
+
ratio * Math.log(ratio)
|
48
|
+
end
|
49
|
+
-sum_log
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/precision_recall'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes for model evaluation.
|
8
|
+
module EvaluationMeasure
|
9
|
+
# Precision is a class that calculates the preicision of the predicted labels.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/evaluation_measure/precision'
|
13
|
+
#
|
14
|
+
# evaluator = Rumale::EvaluationMeasure::Precision.new
|
15
|
+
# puts evaluator.score(ground_truth, predicted)
|
16
|
+
class Precision
|
17
|
+
include ::Rumale::Base::Evaluator
|
18
|
+
include ::Rumale::EvaluationMeasure::PrecisionRecall
|
19
|
+
|
20
|
+
# Return the average type for calculation of precision.
|
21
|
+
# @return [String] ('binary', 'micro', 'macro')
|
22
|
+
attr_reader :average
|
23
|
+
|
24
|
+
# Create a new evaluation measure calculater for precision score.
|
25
|
+
#
|
26
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
27
|
+
def initialize(average: 'binary')
|
28
|
+
@average = average
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate average precision.
|
32
|
+
#
|
33
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
34
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
35
|
+
# @return [Float] Average precision
|
36
|
+
def score(y_true, y_pred)
|
37
|
+
case @average
|
38
|
+
when 'binary'
|
39
|
+
precision_each_class(y_true, y_pred).last
|
40
|
+
when 'micro'
|
41
|
+
micro_average_precision(y_true, y_pred)
|
42
|
+
when 'macro'
|
43
|
+
macro_average_precision(y_true, y_pred)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require 'rumale/base/evaluator'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
# This module consists of the classes for model evaluation.
|
9
|
+
module EvaluationMeasure
|
10
|
+
# @!visibility private
|
11
|
+
module PrecisionRecall
|
12
|
+
module_function
|
13
|
+
|
14
|
+
# @!visibility private
|
15
|
+
def precision_each_class(y_true, y_pred)
|
16
|
+
y_true.sort.to_a.uniq.map do |label|
|
17
|
+
target_positions = y_pred.eq(label)
|
18
|
+
next 0.0 if y_pred[target_positions].empty?
|
19
|
+
|
20
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
21
|
+
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
22
|
+
n_true_positives / (n_true_positives + n_false_positives)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# @!visibility private
|
27
|
+
def recall_each_class(y_true, y_pred)
|
28
|
+
y_true.sort.to_a.uniq.map do |label|
|
29
|
+
target_positions = y_true.eq(label)
|
30
|
+
next 0.0 if y_pred[target_positions].empty?
|
31
|
+
|
32
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
33
|
+
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
34
|
+
n_true_positives / (n_true_positives + n_false_negatives)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# @!visibility private
|
39
|
+
def f_score_each_class(y_true, y_pred)
|
40
|
+
precision_each_class(y_true, y_pred).zip(recall_each_class(y_true, y_pred)).map do |p, r|
|
41
|
+
next 0.0 if p.zero? && r.zero?
|
42
|
+
|
43
|
+
(2.0 * p * r) / (p + r)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# @!visibility private
|
48
|
+
def micro_average_precision(y_true, y_pred)
|
49
|
+
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
50
|
+
target_positions = y_pred.eq(label)
|
51
|
+
next [0.0, 0.0] if y_pred[target_positions].empty?
|
52
|
+
|
53
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
54
|
+
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
55
|
+
[n_true_positives, n_true_positives + n_false_positives]
|
56
|
+
end
|
57
|
+
res = evaluated_values.transpose.map(&:sum)
|
58
|
+
res.first / res.last
|
59
|
+
end
|
60
|
+
|
61
|
+
# @!visibility private
|
62
|
+
def micro_average_recall(y_true, y_pred)
|
63
|
+
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
64
|
+
target_positions = y_true.eq(label)
|
65
|
+
next 0.0 if y_pred[target_positions].empty?
|
66
|
+
|
67
|
+
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
68
|
+
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
69
|
+
[n_true_positives, n_true_positives + n_false_negatives]
|
70
|
+
end
|
71
|
+
res = evaluated_values.transpose.map(&:sum)
|
72
|
+
res.first / res.last
|
73
|
+
end
|
74
|
+
|
75
|
+
# @!visibility private
|
76
|
+
def micro_average_f_score(y_true, y_pred)
|
77
|
+
prec = micro_average_precision(y_true, y_pred)
|
78
|
+
recl = micro_average_recall(y_true, y_pred)
|
79
|
+
(2.0 * prec * recl) / (prec + recl)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @!visibility private
|
83
|
+
def macro_average_precision(y_true, y_pred)
|
84
|
+
precision_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
|
85
|
+
end
|
86
|
+
|
87
|
+
# @!visibility private
|
88
|
+
def macro_average_recall(y_true, y_pred)
|
89
|
+
recall_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
|
90
|
+
end
|
91
|
+
|
92
|
+
# @!visibility private
|
93
|
+
def macro_average_f_score(y_true, y_pred)
|
94
|
+
f_score_each_class(y_true, y_pred).sum / y_true.to_a.uniq.size
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# Purity is a class that calculates the purity of cluatering results.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/purity'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::Purity.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
17
|
+
class Purity
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculate purity
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
23
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
24
|
+
# @return [Float] Purity
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
# initiazlie some variables.
|
27
|
+
purity = 0
|
28
|
+
n_samples = y_pred.size
|
29
|
+
class_ids = y_true.to_a.uniq
|
30
|
+
cluster_ids = y_pred.to_a.uniq
|
31
|
+
# calculate purity.
|
32
|
+
cluster_ids.each do |k|
|
33
|
+
pr_sample_ids = y_pred.eq(k).where.to_a
|
34
|
+
purity += class_ids.map { |j| (pr_sample_ids & y_true.eq(j).where.to_a).size }.max
|
35
|
+
end
|
36
|
+
purity.fdiv(n_samples)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# R2Score is a class that calculates the coefficient of determination for the predicted values.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/r2_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::R2Score.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class R2Score
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Create a new evaluation measure calculater for coefficient of determination.
|
18
|
+
def initialize; end
|
19
|
+
|
20
|
+
# Calculate the coefficient of determination.
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
23
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated taget values.
|
24
|
+
# @return [Float] Coefficient of determination
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
n_samples, n_outputs = y_true.shape
|
27
|
+
numerator = ((y_true - y_pred)**2).sum(axis: 0)
|
28
|
+
yt_mean = y_true.sum(axis: 0) / n_samples
|
29
|
+
denominator = ((y_true - yt_mean)**2).sum(axis: 0)
|
30
|
+
if n_outputs.nil?
|
31
|
+
denominator.zero? ? 0.0 : 1.0 - numerator / denominator
|
32
|
+
else
|
33
|
+
scores = 1 - numerator / denominator
|
34
|
+
scores[denominator.eq(0)] = 0.0
|
35
|
+
scores.sum / scores.size
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/precision_recall'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes for model evaluation.
|
8
|
+
module EvaluationMeasure
|
9
|
+
# Recall is a class that calculates the recall of the predicted labels.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/evaluation_measure/recall'
|
13
|
+
#
|
14
|
+
# evaluator = Rumale::EvaluationMeasure::Recall.new
|
15
|
+
# puts evaluator.score(ground_truth, predicted)
|
16
|
+
class Recall
|
17
|
+
include ::Rumale::Base::Evaluator
|
18
|
+
include ::Rumale::EvaluationMeasure::PrecisionRecall
|
19
|
+
|
20
|
+
# Return the average type for calculation of recall.
|
21
|
+
# @return [String] ('binary', 'micro', 'macro')
|
22
|
+
attr_reader :average
|
23
|
+
|
24
|
+
# Create a new evaluation measure calculater for recall score.
|
25
|
+
#
|
26
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
27
|
+
def initialize(average: 'binary')
|
28
|
+
@average = average
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate average recall
|
32
|
+
#
|
33
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
34
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
35
|
+
# @return [Float] Average recall
|
36
|
+
def score(y_true, y_pred)
|
37
|
+
case @average
|
38
|
+
when 'binary'
|
39
|
+
recall_each_class(y_true, y_pred).last
|
40
|
+
when 'micro'
|
41
|
+
micro_average_recall(y_true, y_pred)
|
42
|
+
when 'macro'
|
43
|
+
macro_average_recall(y_true, y_pred)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/preprocessing'
|
11
|
+
# require 'rumale/linear_model'
|
12
|
+
# require 'rumale/evaluation_measure/roc_auc'
|
13
|
+
#
|
14
|
+
# # Encode labels to integer array.
|
15
|
+
# labels = %w[A B B C A A C C C A]
|
16
|
+
# label_encoder = Rumale::Preprocessing::LabelEncoder.new
|
17
|
+
# y = label_encoder.fit_transform(labels)
|
18
|
+
# # Fit classifier.
|
19
|
+
# classifier = Rumale::LinearModel::LogisticRegression.new
|
20
|
+
# classifier.fit(x, y)
|
21
|
+
# # Predict class probabilities.
|
22
|
+
# y_score = classifier.predict_proba(x)
|
23
|
+
# # Encode labels to one-hot vectors.
|
24
|
+
# one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
|
25
|
+
# y_onehot = one_hot_encoder.fit_transform(y)
|
26
|
+
# # Calculate ROC AUC.
|
27
|
+
# evaluator = Rumale::EvaluationMeasure::ROCAUC.new
|
28
|
+
# puts evaluator.score(y_onehot, y_score)
|
29
|
+
class ROCAUC
|
30
|
+
include ::Rumale::Base::Evaluator
|
31
|
+
|
32
|
+
# Calculate area under the receiver operation characteristic curve (ROC AUC).
|
33
|
+
#
|
34
|
+
# @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
|
35
|
+
# Ground truth binary labels or one-hot encoded multi-labels.
|
36
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
|
37
|
+
# Predicted class probabilities or confidence scores.
|
38
|
+
# @return [Float] (macro-averaged) ROC AUC.
|
39
|
+
def score(y_true, y_score)
|
40
|
+
n_classes = y_score.shape[1]
|
41
|
+
if n_classes.nil?
|
42
|
+
fpr, tpr, = roc_curve(y_true, y_score)
|
43
|
+
return auc(fpr, tpr)
|
44
|
+
end
|
45
|
+
|
46
|
+
scores = Array.new(n_classes) do |c|
|
47
|
+
fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
|
48
|
+
auc(fpr, tpr)
|
49
|
+
end
|
50
|
+
|
51
|
+
scores.sum.fdiv(n_classes)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Calculate receiver operation characteristic curve.
|
55
|
+
#
|
56
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
|
57
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
|
58
|
+
# @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
|
59
|
+
# If nil is given, the method considers the maximum value of the label as a positive label.
|
60
|
+
# @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
|
61
|
+
# thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
|
62
|
+
def roc_curve(y_true, y_score, pos_label = nil)
|
63
|
+
labels = y_true.to_a.uniq
|
64
|
+
if pos_label.nil?
|
65
|
+
unless labels.size == 2
|
66
|
+
raise ArgumentError,
|
67
|
+
'y_true must be binary labels or pos_label must be specified if y_true is multi-label'
|
68
|
+
end
|
69
|
+
else
|
70
|
+
unless y_true.to_a.uniq.include?(pos_label)
|
71
|
+
raise ArgumentError,
|
72
|
+
'y_true must have elements whose values are pos_label.'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
|
77
|
+
|
78
|
+
if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
|
79
|
+
# NOTE: Numo::NArray#insert is not a destructive method.
|
80
|
+
# rubocop:disable Style/RedundantSelfAssignment
|
81
|
+
true_pos = true_pos.insert(0, 0)
|
82
|
+
false_pos = false_pos.insert(0, 0)
|
83
|
+
thresholds = thresholds.insert(0, thresholds[0] + 1)
|
84
|
+
# rubocop:enable Style/RedundantSelfAssignment
|
85
|
+
end
|
86
|
+
|
87
|
+
tpr = true_pos / true_pos[-1].to_f
|
88
|
+
fpr = false_pos / false_pos[-1].to_f
|
89
|
+
|
90
|
+
[fpr, tpr, thresholds]
|
91
|
+
end
|
92
|
+
|
93
|
+
# Calculate area under the curve using the trapezoidal rule.
|
94
|
+
#
|
95
|
+
# @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
|
96
|
+
# x coordinates. These are expected to monotonously increase or decrease.
|
97
|
+
# @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
|
98
|
+
# @return [Float] area under the curve.
|
99
|
+
def auc(x, y)
|
100
|
+
n_samples = [x.shape[0], y.shape[0]].min
|
101
|
+
raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
|
102
|
+
|
103
|
+
(0...n_samples).to_a.each_cons(2).sum { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def binary_roc_curve(y_true, y_score, pos_label = nil)
|
109
|
+
pos_label = y_true.to_a.uniq.max if pos_label.nil?
|
110
|
+
|
111
|
+
bin_y_true = y_true.eq(pos_label)
|
112
|
+
desc_pred_ids = y_score.sort_index.reverse
|
113
|
+
|
114
|
+
desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
|
115
|
+
desc_y_score = y_score[desc_pred_ids]
|
116
|
+
|
117
|
+
threshold_ids = Numo::Int32.cast(desc_y_score.diff.ne(0).where.to_a.append(desc_y_true.size - 1))
|
118
|
+
|
119
|
+
true_pos = desc_y_true.cumsum[threshold_ids]
|
120
|
+
false_pos = 1 + threshold_ids - true_pos
|
121
|
+
|
122
|
+
[false_pos, true_pos, desc_y_score[threshold_ids]]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/pairwise_metric'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# SilhouetteScore is a class that calculates the Silhouette Coefficient.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/evaluation_measure/silhouette_score'
|
12
|
+
#
|
13
|
+
# evaluator = Rumale::EvaluationMeasure::SilhouetteScore.new
|
14
|
+
# puts evaluator.score(x, predicted)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Rousseuw, P J., "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis," Journal of Computational and Applied Mathematics, Vol. 20, pp. 53--65, 1987.
|
18
|
+
class SilhouetteScore
|
19
|
+
include ::Rumale::Base::Evaluator
|
20
|
+
|
21
|
+
# Create a new evaluator that calculates the silhouette coefficient.
|
22
|
+
#
|
23
|
+
# @param metric [String] The metric to calculate the sihouette coefficient.
|
24
|
+
# If metric is 'euclidean', Euclidean distance is used for dissimilarity between sample points.
|
25
|
+
# If metric is 'precomputed', the score method expects to be given a distance matrix.
|
26
|
+
def initialize(metric: 'euclidean')
|
27
|
+
@metric = metric
|
28
|
+
end
|
29
|
+
|
30
|
+
# Calculates the silhouette coefficient.
|
31
|
+
#
|
32
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
33
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
34
|
+
# @return [Float] The mean of silhouette coefficient.
|
35
|
+
def score(x, y)
|
36
|
+
dist_mat = @metric == 'precomputed' ? x : ::Rumale::PairwiseMetric.euclidean_distance(x)
|
37
|
+
|
38
|
+
labels = y.to_a.uniq.sort
|
39
|
+
n_clusters = labels.size
|
40
|
+
n_samples = dist_mat.shape[0]
|
41
|
+
|
42
|
+
intra_dists = Numo::DFloat.zeros(n_samples)
|
43
|
+
n_clusters.times do |n|
|
44
|
+
cls_pos = y.eq(labels[n])
|
45
|
+
sz_cluster = cls_pos.count
|
46
|
+
next unless sz_cluster > 1
|
47
|
+
|
48
|
+
cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
|
49
|
+
cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
|
50
|
+
intra_dists[cls_pos] = cls_dist_mat.sum(axis: 0) / (sz_cluster - 1)
|
51
|
+
end
|
52
|
+
|
53
|
+
inter_dists = Numo::DFloat.zeros(n_samples) + Float::INFINITY
|
54
|
+
n_clusters.times do |m|
|
55
|
+
cls_pos = y.eq(labels[m])
|
56
|
+
n_clusters.times do |n|
|
57
|
+
next if m == n
|
58
|
+
|
59
|
+
not_cls_pos = y.eq(labels[n])
|
60
|
+
inter_dists[cls_pos] = Numo::DFloat.minimum(
|
61
|
+
inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
|
62
|
+
)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
mask = Numo::DFloat.ones(n_samples)
|
67
|
+
n_clusters.times do |n|
|
68
|
+
cls_pos = y.eq(labels[n])
|
69
|
+
mask[cls_pos] = 0 unless cls_pos.count > 1
|
70
|
+
end
|
71
|
+
|
72
|
+
silhouettes = mask * ((inter_dists - intra_dists) / Numo::DFloat.maximum(inter_dists, intra_dists))
|
73
|
+
silhouettes[silhouettes.isnan] = 0.0
|
74
|
+
|
75
|
+
silhouettes.mean
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require_relative 'evaluation_measure/version'
|
6
|
+
|
7
|
+
require_relative 'evaluation_measure/accuracy'
|
8
|
+
require_relative 'evaluation_measure/adjusted_rand_score'
|
9
|
+
require_relative 'evaluation_measure/calinski_harabasz_score'
|
10
|
+
require_relative 'evaluation_measure/davies_bouldin_score'
|
11
|
+
require_relative 'evaluation_measure/explained_variance_score'
|
12
|
+
require_relative 'evaluation_measure/f_score'
|
13
|
+
require_relative 'evaluation_measure/function'
|
14
|
+
require_relative 'evaluation_measure/log_loss'
|
15
|
+
require_relative 'evaluation_measure/mean_absolute_error'
|
16
|
+
require_relative 'evaluation_measure/mean_squared_error'
|
17
|
+
require_relative 'evaluation_measure/mean_squared_log_error'
|
18
|
+
require_relative 'evaluation_measure/median_absolute_error'
|
19
|
+
require_relative 'evaluation_measure/mutual_information'
|
20
|
+
require_relative 'evaluation_measure/normalized_mutual_information'
|
21
|
+
require_relative 'evaluation_measure/precision'
|
22
|
+
require_relative 'evaluation_measure/precision_recall'
|
23
|
+
require_relative 'evaluation_measure/purity'
|
24
|
+
require_relative 'evaluation_measure/r2_score'
|
25
|
+
require_relative 'evaluation_measure/recall'
|
26
|
+
require_relative 'evaluation_measure/roc_auc'
|
27
|
+
require_relative 'evaluation_measure/silhouette_score'
|