rumale-evaluation_measure 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/evaluation_measure/accuracy.rb +27 -0
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +73 -0
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +54 -0
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +47 -0
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +37 -0
- data/lib/rumale/evaluation_measure/f_score.rb +48 -0
- data/lib/rumale/evaluation_measure/function.rb +143 -0
- data/lib/rumale/evaluation_measure/log_loss.rb +53 -0
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +27 -0
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mutual_information.rb +49 -0
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +53 -0
- data/lib/rumale/evaluation_measure/precision.rb +48 -0
- data/lib/rumale/evaluation_measure/precision_recall.rb +98 -0
- data/lib/rumale/evaluation_measure/purity.rb +40 -0
- data/lib/rumale/evaluation_measure/r2_score.rb +40 -0
- data/lib/rumale/evaluation_measure/recall.rb +48 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +126 -0
- data/lib/rumale/evaluation_measure/silhouette_score.rb +79 -0
- data/lib/rumale/evaluation_measure/version.rb +10 -0
- data/lib/rumale/evaluation_measure.rb +27 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d22a96529d4ce3e701d769258ce6de524d06b9716405a9300a0675bcddd28409
|
4
|
+
data.tar.gz: bf7f2f49863313143016aeb0a1b596a1d2bfb405dd3803e0a8fe17d134086896
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 63634dc026752d51849ba6bd64f40c92dd2ae396565cbd183d39c7713b5c3243bbc8b5a2329d013927fbb1de6e542f5fe9cc6889cfcc1fb64260d11a6bc5284f
|
7
|
+
data.tar.gz: 59a9cd733f2d49e1db27443a1d80950f16d073af611e2c3efc39e7842d13240a7105b37136317a7a81adf3445c52f4e8cbada0e3db5ea3c0366995c3c1c5e958
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::EvaluationMeasure
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/rumale-evaluation_measure)
|
4
|
+
[](https://github.com/yoshoku/rumale/blob/main/rumale-evaluation_measure/LICENSE.txt)
|
5
|
+
[](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::EvaluationMeasure provides evaluation measures,
|
9
|
+
such as accuracy, precision, recall, and f-score,
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-evaluation_measure'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-evaluation_measure
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - EvaluationMeasure](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# Accuracy is a class that calculates the accuracy of classifier from the predicted labels.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/accuracy'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::Accuracy.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class Accuracy
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean accuracy.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
20
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
21
|
+
# @return [Float] Mean accuracy
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
(y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).sum / y_true.size.to_f
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# AdjustedRandScore is a class that calculates the adjusted rand index.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/adjusted_rand_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::AdjustedRandScore.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance", J. Machine Learnig Research, Vol. 11, pp.2837--2854, 2010.
|
17
|
+
class AdjustedRandScore
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculate adjusted rand index.
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
23
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
24
|
+
# @return [Float] Adjusted rand index.
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
# initiazlie some variables.
|
27
|
+
n_samples = y_pred.size
|
28
|
+
n_classes = y_true.to_a.uniq.size
|
29
|
+
n_clusters = y_pred.to_a.uniq.size
|
30
|
+
|
31
|
+
# check special cases.
|
32
|
+
return 1.0 if special_cases?(n_samples, n_classes, n_clusters)
|
33
|
+
|
34
|
+
# calculate adjusted rand index.
|
35
|
+
table = contingency_table(y_true, y_pred)
|
36
|
+
sum_comb_a = table.sum(axis: 1).to_a.sum { |v| comb_two(v) }
|
37
|
+
sum_comb_b = table.sum(axis: 0).to_a.sum { |v| comb_two(v) }
|
38
|
+
sum_comb = table.flatten.to_a.sum { |v| comb_two(v) }
|
39
|
+
prod_comb = (sum_comb_a * sum_comb_b).fdiv(comb_two(n_samples))
|
40
|
+
mean_comb = (sum_comb_a + sum_comb_b).fdiv(2)
|
41
|
+
(sum_comb - prod_comb).fdiv(mean_comb - prod_comb)
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def contingency_table(y_true, y_pred)
|
47
|
+
class_ids = y_true.to_a.uniq
|
48
|
+
cluster_ids = y_pred.to_a.uniq
|
49
|
+
n_classes = class_ids.size
|
50
|
+
n_clusters = cluster_ids.size
|
51
|
+
table = Numo::Int32.zeros(n_classes, n_clusters)
|
52
|
+
n_classes.times do |i|
|
53
|
+
b_true = y_true.eq(class_ids[i])
|
54
|
+
n_clusters.times do |j|
|
55
|
+
b_pred = y_pred.eq(cluster_ids[j])
|
56
|
+
table[i, j] = (b_true & b_pred).count
|
57
|
+
end
|
58
|
+
end
|
59
|
+
table
|
60
|
+
end
|
61
|
+
|
62
|
+
def special_cases?(n_samples, n_classes, n_clusters)
|
63
|
+
((n_classes.zero? && n_clusters.zero?) ||
|
64
|
+
(n_classes == 1 && n_clusters == 1) ||
|
65
|
+
(n_classes == n_samples && n_clusters == n_samples))
|
66
|
+
end
|
67
|
+
|
68
|
+
def comb_two(k)
|
69
|
+
k * (k - 1) / 2
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# CalinskiHarabaszScore is a class that calculates the Calinski and Harabasz score.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/calinski_harabasz_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::CalinskiHarabaszScore.new
|
13
|
+
# puts evaluator.score(x, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Calinski, T., and Harabsz, J., "A dendrite method for cluster analysis," Communication in Statistics, Vol. 3 (1), pp. 1--27, 1972.
|
17
|
+
class CalinskiHarabaszScore
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculates the Calinski and Harabasz score.
|
21
|
+
#
|
22
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
23
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
24
|
+
# @return [Float] The Calinski and Harabasz score.
|
25
|
+
def score(x, y)
|
26
|
+
labels = y.to_a.uniq.sort
|
27
|
+
n_clusters = labels.size
|
28
|
+
n_dimensions = x.shape[1]
|
29
|
+
|
30
|
+
centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
|
31
|
+
|
32
|
+
within_group = 0.0
|
33
|
+
n_clusters.times do |n|
|
34
|
+
cls_samples = x[y.eq(labels[n]), true]
|
35
|
+
cls_centroid = cls_samples.mean(0)
|
36
|
+
centroids[n, true] = cls_centroid
|
37
|
+
within_group += ((cls_samples - cls_centroid)**2).sum
|
38
|
+
end
|
39
|
+
|
40
|
+
return 1.0 if within_group.zero?
|
41
|
+
|
42
|
+
mean_vec = x.mean(0)
|
43
|
+
between_group = 0.0
|
44
|
+
n_clusters.times do |n|
|
45
|
+
sz_cluster = y.eq(labels[n]).count
|
46
|
+
between_group += sz_cluster * ((centroids[n, true] - mean_vec)**2).sum
|
47
|
+
end
|
48
|
+
|
49
|
+
n_samples = x.shape[0]
|
50
|
+
(between_group / (n_clusters - 1)) / (within_group / (n_samples - n_clusters))
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/pairwise_metric'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# DaviesBouldinScore is a class that calculates the Davies-Bouldin score.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/evaluation_measure/davies_bouldin_score'
|
12
|
+
#
|
13
|
+
# evaluator = Rumale::EvaluationMeasure::DaviesBouldinScore.new
|
14
|
+
# puts evaluator.score(x, predicted)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Davies, D L., and Bouldin, D W., "A Cluster Separation Measure," IEEE Trans. Pattern Analysis and Machine Intelligence, Vol. PAMI-1, No. 2, pp. 224--227, 1979.
|
18
|
+
class DaviesBouldinScore
|
19
|
+
# Calculates the Davies-Bouldin score.
|
20
|
+
#
|
21
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
22
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
23
|
+
# @return [Float] The Davies-Bouldin score.
|
24
|
+
def score(x, y)
|
25
|
+
labels = y.to_a.uniq.sort
|
26
|
+
n_clusters = labels.size
|
27
|
+
n_dimensions = x.shape[1]
|
28
|
+
|
29
|
+
dist_cluster = Numo::DFloat.zeros(n_clusters)
|
30
|
+
centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
|
31
|
+
|
32
|
+
n_clusters.times do |n|
|
33
|
+
cls_samples = x[y.eq(labels[n]), true]
|
34
|
+
cls_centroid = cls_samples.mean(0)
|
35
|
+
centroids[n, true] = cls_centroid
|
36
|
+
dist_cluster[n] = Rumale::PairwiseMetric.euclidean_distance(cls_samples, cls_centroid.expand_dims(0)).mean
|
37
|
+
end
|
38
|
+
|
39
|
+
dist_centroid = Rumale::PairwiseMetric.euclidean_distance(centroids)
|
40
|
+
dist_centroid[dist_centroid.eq(0)] = Float::INFINITY
|
41
|
+
dist_mat = (dist_cluster.expand_dims(1) + dist_cluster) / dist_centroid
|
42
|
+
dist_mat[dist_mat.diag_indices] = -Float::INFINITY
|
43
|
+
dist_mat.max(0).mean
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# ExplainedVarianceScore is a class that calculates the explained variance score.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/explained_variance_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::ExplainedVarianceScore.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class ExplainedVarianceScore
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate explained variance score.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Explained variance score.
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
diff = y_true - y_pred
|
24
|
+
numerator = ((diff - diff.mean(0))**2).mean(0)
|
25
|
+
denominator = ((y_true - y_true.mean(0))**2).mean(0)
|
26
|
+
|
27
|
+
n_outputs = y_true.shape[1]
|
28
|
+
if n_outputs.nil?
|
29
|
+
denominator.zero? ? 0 : 1.0 - numerator / denominator
|
30
|
+
else
|
31
|
+
valids = denominator.ne(0)
|
32
|
+
(1.0 - numerator[valids] / denominator[valids]).sum / n_outputs
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/precision_recall'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes for model evaluation.
|
8
|
+
module EvaluationMeasure
|
9
|
+
# FScore is a class that calculates the F1-score of the predicted labels.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/evaluation_measure/f_score'
|
13
|
+
#
|
14
|
+
# evaluator = Rumale::EvaluationMeasure::FScore.new
|
15
|
+
# puts evaluator.score(ground_truth, predicted)
|
16
|
+
class FScore
|
17
|
+
include ::Rumale::Base::Evaluator
|
18
|
+
include ::Rumale::EvaluationMeasure::PrecisionRecall
|
19
|
+
|
20
|
+
# Return the average type for calculation of F1-score.
|
21
|
+
# @return [String] ('binary', 'micro', 'macro')
|
22
|
+
attr_reader :average
|
23
|
+
|
24
|
+
# Create a new evaluation measure calculater for F1-score.
|
25
|
+
#
|
26
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
27
|
+
def initialize(average: 'binary')
|
28
|
+
@average = average
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate average F1-score
|
32
|
+
#
|
33
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
34
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
35
|
+
# @return [Float] Average F1-score
|
36
|
+
def score(y_true, y_pred)
|
37
|
+
case @average
|
38
|
+
when 'binary'
|
39
|
+
f_score_each_class(y_true, y_pred).last
|
40
|
+
when 'micro'
|
41
|
+
micro_average_f_score(y_true, y_pred)
|
42
|
+
when 'macro'
|
43
|
+
macro_average_f_score(y_true, y_pred)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require 'rumale/evaluation_measure/accuracy'
|
6
|
+
require 'rumale/evaluation_measure/precision_recall'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
# This module consists of the classes for model evaluation.
|
10
|
+
module EvaluationMeasure
|
11
|
+
module_function
|
12
|
+
|
13
|
+
# Calculate confusion matrix for evaluating classification performance.
|
14
|
+
#
|
15
|
+
# @example
|
16
|
+
# require 'rumale/evaluation_measure/function'
|
17
|
+
#
|
18
|
+
# y_true = Numo::Int32[2, 0, 2, 2, 0, 1]
|
19
|
+
# y_pred = Numo::Int32[0, 0, 2, 2, 0, 2]
|
20
|
+
# p Rumale::EvaluationMeasure.confusion_matrix(y_true, y_pred)
|
21
|
+
#
|
22
|
+
# # Numo::Int32#shape=[3,3]
|
23
|
+
# # [[2, 0, 0],
|
24
|
+
# # [0, 0, 1],
|
25
|
+
# # [1, 0, 2]]
|
26
|
+
#
|
27
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) The ground truth labels.
|
28
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) The predicted labels.
|
29
|
+
# @return [Numo::Int32] (shape: [n_classes, n_classes]) The confusion matrix.
|
30
|
+
def confusion_matrix(y_true, y_pred)
|
31
|
+
labels = y_true.to_a.uniq.sort
|
32
|
+
n_labels = labels.size
|
33
|
+
|
34
|
+
conf_mat = Numo::Int32.zeros(n_labels, n_labels)
|
35
|
+
|
36
|
+
labels.each_with_index do |lbl_a, i|
|
37
|
+
y_p = y_pred[y_true.eq(lbl_a)]
|
38
|
+
labels.each_with_index do |lbl_b, j|
|
39
|
+
conf_mat[i, j] = y_p.eq(lbl_b).count
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
conf_mat
|
44
|
+
end
|
45
|
+
|
46
|
+
# Output a summary of classification performance for each class.
|
47
|
+
#
|
48
|
+
# @example
|
49
|
+
# y_true = Numo::Int32[0, 1, 1, 2, 2, 2, 0]
|
50
|
+
# y_pred = Numo::Int32[1, 1, 1, 0, 0, 2, 0]
|
51
|
+
# puts Rumale::EvaluationMeasure.classification_report(y_true, y_pred)
|
52
|
+
#
|
53
|
+
# # precision recall f1-score support
|
54
|
+
# #
|
55
|
+
# # 0 0.33 0.50 0.40 2
|
56
|
+
# # 1 0.67 1.00 0.80 2
|
57
|
+
# # 2 1.00 0.33 0.50 3
|
58
|
+
# #
|
59
|
+
# # accuracy 0.57 7
|
60
|
+
# # macro avg 0.67 0.61 0.57 7
|
61
|
+
# # weighted avg 0.71 0.57 0.56 7
|
62
|
+
#
|
63
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) The ground truth labels.
|
64
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) The predicted labels.
|
65
|
+
# @param target_name [Nil/Array] The label names.
|
66
|
+
# @param output_hash [Boolean] The flag indicating whether to output with Ruby Hash.
|
67
|
+
# @return [String/Hash] The summary of classification performance.
|
68
|
+
# If output_hash is true, it returns the summary with Ruby Hash.
|
69
|
+
def classification_report(y_true, y_pred, target_name: nil, output_hash: false) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
70
|
+
# calculate each evaluation measure.
|
71
|
+
classes = y_true.to_a.uniq.sort
|
72
|
+
supports = Numo::Int32.asarray(classes.map { |label| y_true.eq(label).count })
|
73
|
+
precisions = Rumale::EvaluationMeasure::PrecisionRecall.precision_each_class(y_true, y_pred)
|
74
|
+
recalls = Rumale::EvaluationMeasure::PrecisionRecall.recall_each_class(y_true, y_pred)
|
75
|
+
fscores = Rumale::EvaluationMeasure::PrecisionRecall.f_score_each_class(y_true, y_pred)
|
76
|
+
macro_precision = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_precision(y_true, y_pred)
|
77
|
+
macro_recall = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_recall(y_true, y_pred)
|
78
|
+
macro_fscore = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_f_score(y_true, y_pred)
|
79
|
+
accuracy = Rumale::EvaluationMeasure::Accuracy.new.score(y_true, y_pred)
|
80
|
+
sum_supports = supports.sum
|
81
|
+
weights = Numo::DFloat.cast(supports) / sum_supports
|
82
|
+
weighted_precision = (Numo::DFloat.cast(precisions) * weights).sum
|
83
|
+
weighted_recall = (Numo::DFloat.cast(recalls) * weights).sum
|
84
|
+
weighted_fscore = (Numo::DFloat.cast(fscores) * weights).sum
|
85
|
+
# output reults.
|
86
|
+
target_name ||= classes
|
87
|
+
target_name.map!(&:to_s)
|
88
|
+
if output_hash
|
89
|
+
res = {}
|
90
|
+
target_name.each_with_index do |label, n|
|
91
|
+
res[label] = {
|
92
|
+
precision: precisions[n],
|
93
|
+
recall: recalls[n],
|
94
|
+
fscore: fscores[n],
|
95
|
+
support: supports[n]
|
96
|
+
}
|
97
|
+
end
|
98
|
+
res[:accuracy] = accuracy
|
99
|
+
res[:macro_avg] = {
|
100
|
+
precision: macro_precision,
|
101
|
+
recall: macro_recall,
|
102
|
+
fscore: macro_fscore,
|
103
|
+
support: sum_supports
|
104
|
+
}
|
105
|
+
res[:weighted_avg] = {
|
106
|
+
precision: weighted_precision,
|
107
|
+
recall: weighted_recall,
|
108
|
+
fscore: weighted_fscore,
|
109
|
+
support: sum_supports
|
110
|
+
}
|
111
|
+
else
|
112
|
+
width = [12, target_name.map(&:size).max].max # 12 is 'weighted avg'.size
|
113
|
+
res = +''
|
114
|
+
res << "#{' ' * width} precision recall f1-score support\n"
|
115
|
+
res << "\n"
|
116
|
+
target_name.each_with_index do |label, n|
|
117
|
+
label_str = format("%##{width}s", label)
|
118
|
+
precision_str = format('%#10s', format('%.2f', precisions[n]))
|
119
|
+
recall_str = format('%#10s', format('%.2f', recalls[n]))
|
120
|
+
fscore_str = format('%#10s', format('%.2f', fscores[n]))
|
121
|
+
supports_str = format('%#10s', supports[n])
|
122
|
+
res << "#{label_str} #{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
|
123
|
+
end
|
124
|
+
res << "\n"
|
125
|
+
supports_str = format('%#10s', sum_supports)
|
126
|
+
accuracy_str = format('%#30s', format('%.2f', accuracy))
|
127
|
+
res << format("%##{width}s ", 'accuracy')
|
128
|
+
res << "#{accuracy_str}#{supports_str}\n"
|
129
|
+
precision_str = format('%#10s', format('%.2f', macro_precision))
|
130
|
+
recall_str = format('%#10s', format('%.2f', macro_recall))
|
131
|
+
fscore_str = format('%#10s', format('%.2f', macro_fscore))
|
132
|
+
res << format("%##{width}s ", 'macro avg')
|
133
|
+
res << "#{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
|
134
|
+
precision_str = format('%#10s', format('%.2f', weighted_precision))
|
135
|
+
recall_str = format('%#10s', format('%.2f', weighted_recall))
|
136
|
+
fscore_str = format('%#10s', format('%.2f', weighted_fscore))
|
137
|
+
res << format("%##{width}s ", 'weighted avg')
|
138
|
+
res << "#{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
|
139
|
+
end
|
140
|
+
res
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# LogLoss is a class that calculates the logarithmic loss of predicted class probability.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/log_loss'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::LogLoss.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class LogLoss
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean logarithmic loss.
|
18
|
+
# If both y_true and y_pred are array (both shapes are [n_samples]), this method calculates
|
19
|
+
# mean logarithmic loss for binary classification.
|
20
|
+
#
|
21
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
22
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted class probability.
|
23
|
+
# @param eps [Float] A small value close to zero to avoid outputting infinity in logarithmic calcuation.
|
24
|
+
# @return [Float] mean logarithmic loss
|
25
|
+
def score(y_true, y_pred, eps = 1e-15)
|
26
|
+
n_samples, n_classes = y_pred.shape
|
27
|
+
clipped_p = y_pred.clip(eps, 1 - eps)
|
28
|
+
|
29
|
+
log_loss = if n_classes.nil?
|
30
|
+
negative_label = y_true.to_a.uniq.min
|
31
|
+
bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
|
32
|
+
-(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
|
33
|
+
else
|
34
|
+
binarized_y_true = binarize(y_true)
|
35
|
+
clipped_p /= clipped_p.sum(axis: 1).expand_dims(1)
|
36
|
+
-(binarized_y_true * Numo::NMath.log(clipped_p)).sum(axis: 1)
|
37
|
+
end
|
38
|
+
log_loss.sum / n_samples
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def binarize(y)
|
44
|
+
classes = y.to_a.uniq.sort
|
45
|
+
n_samples = y.size
|
46
|
+
n_classes = classes.size
|
47
|
+
binarized = Numo::DFloat.zeros(n_samples, n_classes)
|
48
|
+
n_samples.times { |n| binarized[n, classes.index(y[n])] = 1 }
|
49
|
+
binarized
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MeanAbsoluteError is a class that calculates the mean absolute error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mean_absolute_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MeanAbsoluteError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MeanAbsoluteError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean absolute error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Mean absolute error
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
(y_true - y_pred).abs.mean
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MeanSquaredError is a class that calculates the mean squared error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mean_squared_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MeanSquaredError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MeanSquaredError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean squared error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Mean squared error
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
((y_true - y_pred)**2).mean
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MeanSquaredLogError is a class that calculates the mean squared logarithmic error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mean_squared_log_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MeanSquaredLogError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MeanSquaredLogError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean squared logarithmic error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Mean squared logarithmic error.
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
((Numo::NMath.log(y_true + 1) - Numo::NMath.log(y_pred + 1))**2).mean
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MedianAbsoluteError is a class that calculates the median absolute error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/median_absolute_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MedianAbsoluteError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MedianAbsoluteError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate median absolute error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples]) Estimated target values.
|
21
|
+
# @return [Float] Median absolute error.
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
(y_true - y_pred).abs.median
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|