rumale-evaluation_measure 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/evaluation_measure/accuracy.rb +27 -0
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +73 -0
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +54 -0
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +47 -0
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +37 -0
- data/lib/rumale/evaluation_measure/f_score.rb +48 -0
- data/lib/rumale/evaluation_measure/function.rb +143 -0
- data/lib/rumale/evaluation_measure/log_loss.rb +53 -0
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +27 -0
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +27 -0
- data/lib/rumale/evaluation_measure/mutual_information.rb +49 -0
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +53 -0
- data/lib/rumale/evaluation_measure/precision.rb +48 -0
- data/lib/rumale/evaluation_measure/precision_recall.rb +98 -0
- data/lib/rumale/evaluation_measure/purity.rb +40 -0
- data/lib/rumale/evaluation_measure/r2_score.rb +40 -0
- data/lib/rumale/evaluation_measure/recall.rb +48 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +126 -0
- data/lib/rumale/evaluation_measure/silhouette_score.rb +79 -0
- data/lib/rumale/evaluation_measure/version.rb +10 -0
- data/lib/rumale/evaluation_measure.rb +27 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d22a96529d4ce3e701d769258ce6de524d06b9716405a9300a0675bcddd28409
|
4
|
+
data.tar.gz: bf7f2f49863313143016aeb0a1b596a1d2bfb405dd3803e0a8fe17d134086896
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 63634dc026752d51849ba6bd64f40c92dd2ae396565cbd183d39c7713b5c3243bbc8b5a2329d013927fbb1de6e542f5fe9cc6889cfcc1fb64260d11a6bc5284f
|
7
|
+
data.tar.gz: 59a9cd733f2d49e1db27443a1d80950f16d073af611e2c3efc39e7842d13240a7105b37136317a7a81adf3445c52f4e8cbada0e3db5ea3c0366995c3c1c5e958
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::EvaluationMeasure
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/rumale-evaluation_measure.svg)](https://badge.fury.io/rb/rumale-evaluation_measure)
|
4
|
+
[![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-evaluation_measure/LICENSE.txt)
|
5
|
+
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::EvaluationMeasure provides evaluation measures,
|
9
|
+
such as accuracy, precision, recall, and f-score,
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-evaluation_measure'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-evaluation_measure
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - EvaluationMeasure](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# Accuracy is a class that calculates the accuracy of classifier from the predicted labels.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/accuracy'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::Accuracy.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class Accuracy
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean accuracy.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
20
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
21
|
+
# @return [Float] Mean accuracy
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
(y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).sum / y_true.size.to_f
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# AdjustedRandScore is a class that calculates the adjusted rand index.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/adjusted_rand_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::AdjustedRandScore.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance", J. Machine Learnig Research, Vol. 11, pp.2837--2854, 2010.
|
17
|
+
class AdjustedRandScore
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculate adjusted rand index.
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
23
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
24
|
+
# @return [Float] Adjusted rand index.
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
# initiazlie some variables.
|
27
|
+
n_samples = y_pred.size
|
28
|
+
n_classes = y_true.to_a.uniq.size
|
29
|
+
n_clusters = y_pred.to_a.uniq.size
|
30
|
+
|
31
|
+
# check special cases.
|
32
|
+
return 1.0 if special_cases?(n_samples, n_classes, n_clusters)
|
33
|
+
|
34
|
+
# calculate adjusted rand index.
|
35
|
+
table = contingency_table(y_true, y_pred)
|
36
|
+
sum_comb_a = table.sum(axis: 1).to_a.sum { |v| comb_two(v) }
|
37
|
+
sum_comb_b = table.sum(axis: 0).to_a.sum { |v| comb_two(v) }
|
38
|
+
sum_comb = table.flatten.to_a.sum { |v| comb_two(v) }
|
39
|
+
prod_comb = (sum_comb_a * sum_comb_b).fdiv(comb_two(n_samples))
|
40
|
+
mean_comb = (sum_comb_a + sum_comb_b).fdiv(2)
|
41
|
+
(sum_comb - prod_comb).fdiv(mean_comb - prod_comb)
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def contingency_table(y_true, y_pred)
|
47
|
+
class_ids = y_true.to_a.uniq
|
48
|
+
cluster_ids = y_pred.to_a.uniq
|
49
|
+
n_classes = class_ids.size
|
50
|
+
n_clusters = cluster_ids.size
|
51
|
+
table = Numo::Int32.zeros(n_classes, n_clusters)
|
52
|
+
n_classes.times do |i|
|
53
|
+
b_true = y_true.eq(class_ids[i])
|
54
|
+
n_clusters.times do |j|
|
55
|
+
b_pred = y_pred.eq(cluster_ids[j])
|
56
|
+
table[i, j] = (b_true & b_pred).count
|
57
|
+
end
|
58
|
+
end
|
59
|
+
table
|
60
|
+
end
|
61
|
+
|
62
|
+
def special_cases?(n_samples, n_classes, n_clusters)
|
63
|
+
((n_classes.zero? && n_clusters.zero?) ||
|
64
|
+
(n_classes == 1 && n_clusters == 1) ||
|
65
|
+
(n_classes == n_samples && n_clusters == n_samples))
|
66
|
+
end
|
67
|
+
|
68
|
+
def comb_two(k)
|
69
|
+
k * (k - 1) / 2
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# CalinskiHarabaszScore is a class that calculates the Calinski and Harabasz score.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/calinski_harabasz_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::CalinskiHarabaszScore.new
|
13
|
+
# puts evaluator.score(x, predicted)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - Calinski, T., and Harabsz, J., "A dendrite method for cluster analysis," Communication in Statistics, Vol. 3 (1), pp. 1--27, 1972.
|
17
|
+
class CalinskiHarabaszScore
|
18
|
+
include ::Rumale::Base::Evaluator
|
19
|
+
|
20
|
+
# Calculates the Calinski and Harabasz score.
|
21
|
+
#
|
22
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
23
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
24
|
+
# @return [Float] The Calinski and Harabasz score.
|
25
|
+
def score(x, y)
|
26
|
+
labels = y.to_a.uniq.sort
|
27
|
+
n_clusters = labels.size
|
28
|
+
n_dimensions = x.shape[1]
|
29
|
+
|
30
|
+
centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
|
31
|
+
|
32
|
+
within_group = 0.0
|
33
|
+
n_clusters.times do |n|
|
34
|
+
cls_samples = x[y.eq(labels[n]), true]
|
35
|
+
cls_centroid = cls_samples.mean(0)
|
36
|
+
centroids[n, true] = cls_centroid
|
37
|
+
within_group += ((cls_samples - cls_centroid)**2).sum
|
38
|
+
end
|
39
|
+
|
40
|
+
return 1.0 if within_group.zero?
|
41
|
+
|
42
|
+
mean_vec = x.mean(0)
|
43
|
+
between_group = 0.0
|
44
|
+
n_clusters.times do |n|
|
45
|
+
sz_cluster = y.eq(labels[n]).count
|
46
|
+
between_group += sz_cluster * ((centroids[n, true] - mean_vec)**2).sum
|
47
|
+
end
|
48
|
+
|
49
|
+
n_samples = x.shape[0]
|
50
|
+
(between_group / (n_clusters - 1)) / (within_group / (n_samples - n_clusters))
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/pairwise_metric'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module EvaluationMeasure
|
8
|
+
# DaviesBouldinScore is a class that calculates the Davies-Bouldin score.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# require 'rumale/evaluation_measure/davies_bouldin_score'
|
12
|
+
#
|
13
|
+
# evaluator = Rumale::EvaluationMeasure::DaviesBouldinScore.new
|
14
|
+
# puts evaluator.score(x, predicted)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Davies, D L., and Bouldin, D W., "A Cluster Separation Measure," IEEE Trans. Pattern Analysis and Machine Intelligence, Vol. PAMI-1, No. 2, pp. 224--227, 1979.
|
18
|
+
class DaviesBouldinScore
|
19
|
+
# Calculates the Davies-Bouldin score.
|
20
|
+
#
|
21
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
|
22
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
|
23
|
+
# @return [Float] The Davies-Bouldin score.
|
24
|
+
def score(x, y)
|
25
|
+
labels = y.to_a.uniq.sort
|
26
|
+
n_clusters = labels.size
|
27
|
+
n_dimensions = x.shape[1]
|
28
|
+
|
29
|
+
dist_cluster = Numo::DFloat.zeros(n_clusters)
|
30
|
+
centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
|
31
|
+
|
32
|
+
n_clusters.times do |n|
|
33
|
+
cls_samples = x[y.eq(labels[n]), true]
|
34
|
+
cls_centroid = cls_samples.mean(0)
|
35
|
+
centroids[n, true] = cls_centroid
|
36
|
+
dist_cluster[n] = Rumale::PairwiseMetric.euclidean_distance(cls_samples, cls_centroid.expand_dims(0)).mean
|
37
|
+
end
|
38
|
+
|
39
|
+
dist_centroid = Rumale::PairwiseMetric.euclidean_distance(centroids)
|
40
|
+
dist_centroid[dist_centroid.eq(0)] = Float::INFINITY
|
41
|
+
dist_mat = (dist_cluster.expand_dims(1) + dist_cluster) / dist_centroid
|
42
|
+
dist_mat[dist_mat.diag_indices] = -Float::INFINITY
|
43
|
+
dist_mat.max(0).mean
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# ExplainedVarianceScore is a class that calculates the explained variance score.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/explained_variance_score'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::ExplainedVarianceScore.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class ExplainedVarianceScore
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate explained variance score.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Explained variance score.
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
diff = y_true - y_pred
|
24
|
+
numerator = ((diff - diff.mean(0))**2).mean(0)
|
25
|
+
denominator = ((y_true - y_true.mean(0))**2).mean(0)
|
26
|
+
|
27
|
+
n_outputs = y_true.shape[1]
|
28
|
+
if n_outputs.nil?
|
29
|
+
denominator.zero? ? 0 : 1.0 - numerator / denominator
|
30
|
+
else
|
31
|
+
valids = denominator.ne(0)
|
32
|
+
(1.0 - numerator[valids] / denominator[valids]).sum / n_outputs
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
require 'rumale/evaluation_measure/precision_recall'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes for model evaluation.
|
8
|
+
module EvaluationMeasure
|
9
|
+
# FScore is a class that calculates the F1-score of the predicted labels.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/evaluation_measure/f_score'
|
13
|
+
#
|
14
|
+
# evaluator = Rumale::EvaluationMeasure::FScore.new
|
15
|
+
# puts evaluator.score(ground_truth, predicted)
|
16
|
+
class FScore
|
17
|
+
include ::Rumale::Base::Evaluator
|
18
|
+
include ::Rumale::EvaluationMeasure::PrecisionRecall
|
19
|
+
|
20
|
+
# Return the average type for calculation of F1-score.
|
21
|
+
# @return [String] ('binary', 'micro', 'macro')
|
22
|
+
attr_reader :average
|
23
|
+
|
24
|
+
# Create a new evaluation measure calculater for F1-score.
|
25
|
+
#
|
26
|
+
# @param average [String] The average type ('binary', 'micro', 'macro')
|
27
|
+
def initialize(average: 'binary')
|
28
|
+
@average = average
|
29
|
+
end
|
30
|
+
|
31
|
+
# Calculate average F1-score
|
32
|
+
#
|
33
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
34
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
35
|
+
# @return [Float] Average F1-score
|
36
|
+
def score(y_true, y_pred)
|
37
|
+
case @average
|
38
|
+
when 'binary'
|
39
|
+
f_score_each_class(y_true, y_pred).last
|
40
|
+
when 'micro'
|
41
|
+
micro_average_f_score(y_true, y_pred)
|
42
|
+
when 'macro'
|
43
|
+
macro_average_f_score(y_true, y_pred)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require 'rumale/evaluation_measure/accuracy'
|
6
|
+
require 'rumale/evaluation_measure/precision_recall'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
# This module consists of the classes for model evaluation.
|
10
|
+
module EvaluationMeasure
|
11
|
+
module_function
|
12
|
+
|
13
|
+
# Calculate confusion matrix for evaluating classification performance.
|
14
|
+
#
|
15
|
+
# @example
|
16
|
+
# require 'rumale/evaluation_measure/function'
|
17
|
+
#
|
18
|
+
# y_true = Numo::Int32[2, 0, 2, 2, 0, 1]
|
19
|
+
# y_pred = Numo::Int32[0, 0, 2, 2, 0, 2]
|
20
|
+
# p Rumale::EvaluationMeasure.confusion_matrix(y_true, y_pred)
|
21
|
+
#
|
22
|
+
# # Numo::Int32#shape=[3,3]
|
23
|
+
# # [[2, 0, 0],
|
24
|
+
# # [0, 0, 1],
|
25
|
+
# # [1, 0, 2]]
|
26
|
+
#
|
27
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) The ground truth labels.
|
28
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) The predicted labels.
|
29
|
+
# @return [Numo::Int32] (shape: [n_classes, n_classes]) The confusion matrix.
|
30
|
+
def confusion_matrix(y_true, y_pred)
|
31
|
+
labels = y_true.to_a.uniq.sort
|
32
|
+
n_labels = labels.size
|
33
|
+
|
34
|
+
conf_mat = Numo::Int32.zeros(n_labels, n_labels)
|
35
|
+
|
36
|
+
labels.each_with_index do |lbl_a, i|
|
37
|
+
y_p = y_pred[y_true.eq(lbl_a)]
|
38
|
+
labels.each_with_index do |lbl_b, j|
|
39
|
+
conf_mat[i, j] = y_p.eq(lbl_b).count
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
conf_mat
|
44
|
+
end
|
45
|
+
|
46
|
+
# Output a summary of classification performance for each class.
|
47
|
+
#
|
48
|
+
# @example
|
49
|
+
# y_true = Numo::Int32[0, 1, 1, 2, 2, 2, 0]
|
50
|
+
# y_pred = Numo::Int32[1, 1, 1, 0, 0, 2, 0]
|
51
|
+
# puts Rumale::EvaluationMeasure.classification_report(y_true, y_pred)
|
52
|
+
#
|
53
|
+
# # precision recall f1-score support
|
54
|
+
# #
|
55
|
+
# # 0 0.33 0.50 0.40 2
|
56
|
+
# # 1 0.67 1.00 0.80 2
|
57
|
+
# # 2 1.00 0.33 0.50 3
|
58
|
+
# #
|
59
|
+
# # accuracy 0.57 7
|
60
|
+
# # macro avg 0.67 0.61 0.57 7
|
61
|
+
# # weighted avg 0.71 0.57 0.56 7
|
62
|
+
#
|
63
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) The ground truth labels.
|
64
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) The predicted labels.
|
65
|
+
# @param target_name [Nil/Array] The label names.
|
66
|
+
# @param output_hash [Boolean] The flag indicating whether to output with Ruby Hash.
|
67
|
+
# @return [String/Hash] The summary of classification performance.
|
68
|
+
# If output_hash is true, it returns the summary with Ruby Hash.
|
69
|
+
def classification_report(y_true, y_pred, target_name: nil, output_hash: false) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
70
|
+
# calculate each evaluation measure.
|
71
|
+
classes = y_true.to_a.uniq.sort
|
72
|
+
supports = Numo::Int32.asarray(classes.map { |label| y_true.eq(label).count })
|
73
|
+
precisions = Rumale::EvaluationMeasure::PrecisionRecall.precision_each_class(y_true, y_pred)
|
74
|
+
recalls = Rumale::EvaluationMeasure::PrecisionRecall.recall_each_class(y_true, y_pred)
|
75
|
+
fscores = Rumale::EvaluationMeasure::PrecisionRecall.f_score_each_class(y_true, y_pred)
|
76
|
+
macro_precision = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_precision(y_true, y_pred)
|
77
|
+
macro_recall = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_recall(y_true, y_pred)
|
78
|
+
macro_fscore = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_f_score(y_true, y_pred)
|
79
|
+
accuracy = Rumale::EvaluationMeasure::Accuracy.new.score(y_true, y_pred)
|
80
|
+
sum_supports = supports.sum
|
81
|
+
weights = Numo::DFloat.cast(supports) / sum_supports
|
82
|
+
weighted_precision = (Numo::DFloat.cast(precisions) * weights).sum
|
83
|
+
weighted_recall = (Numo::DFloat.cast(recalls) * weights).sum
|
84
|
+
weighted_fscore = (Numo::DFloat.cast(fscores) * weights).sum
|
85
|
+
# output reults.
|
86
|
+
target_name ||= classes
|
87
|
+
target_name.map!(&:to_s)
|
88
|
+
if output_hash
|
89
|
+
res = {}
|
90
|
+
target_name.each_with_index do |label, n|
|
91
|
+
res[label] = {
|
92
|
+
precision: precisions[n],
|
93
|
+
recall: recalls[n],
|
94
|
+
fscore: fscores[n],
|
95
|
+
support: supports[n]
|
96
|
+
}
|
97
|
+
end
|
98
|
+
res[:accuracy] = accuracy
|
99
|
+
res[:macro_avg] = {
|
100
|
+
precision: macro_precision,
|
101
|
+
recall: macro_recall,
|
102
|
+
fscore: macro_fscore,
|
103
|
+
support: sum_supports
|
104
|
+
}
|
105
|
+
res[:weighted_avg] = {
|
106
|
+
precision: weighted_precision,
|
107
|
+
recall: weighted_recall,
|
108
|
+
fscore: weighted_fscore,
|
109
|
+
support: sum_supports
|
110
|
+
}
|
111
|
+
else
|
112
|
+
width = [12, target_name.map(&:size).max].max # 12 is 'weighted avg'.size
|
113
|
+
res = +''
|
114
|
+
res << "#{' ' * width} precision recall f1-score support\n"
|
115
|
+
res << "\n"
|
116
|
+
target_name.each_with_index do |label, n|
|
117
|
+
label_str = format("%##{width}s", label)
|
118
|
+
precision_str = format('%#10s', format('%.2f', precisions[n]))
|
119
|
+
recall_str = format('%#10s', format('%.2f', recalls[n]))
|
120
|
+
fscore_str = format('%#10s', format('%.2f', fscores[n]))
|
121
|
+
supports_str = format('%#10s', supports[n])
|
122
|
+
res << "#{label_str} #{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
|
123
|
+
end
|
124
|
+
res << "\n"
|
125
|
+
supports_str = format('%#10s', sum_supports)
|
126
|
+
accuracy_str = format('%#30s', format('%.2f', accuracy))
|
127
|
+
res << format("%##{width}s ", 'accuracy')
|
128
|
+
res << "#{accuracy_str}#{supports_str}\n"
|
129
|
+
precision_str = format('%#10s', format('%.2f', macro_precision))
|
130
|
+
recall_str = format('%#10s', format('%.2f', macro_recall))
|
131
|
+
fscore_str = format('%#10s', format('%.2f', macro_fscore))
|
132
|
+
res << format("%##{width}s ", 'macro avg')
|
133
|
+
res << "#{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
|
134
|
+
precision_str = format('%#10s', format('%.2f', weighted_precision))
|
135
|
+
recall_str = format('%#10s', format('%.2f', weighted_recall))
|
136
|
+
fscore_str = format('%#10s', format('%.2f', weighted_fscore))
|
137
|
+
res << format("%##{width}s ", 'weighted avg')
|
138
|
+
res << "#{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
|
139
|
+
end
|
140
|
+
res
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# LogLoss is a class that calculates the logarithmic loss of predicted class probability.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/log_loss'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::LogLoss.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class LogLoss
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean logarithmic loss.
|
18
|
+
# If both y_true and y_pred are array (both shapes are [n_samples]), this method calculates
|
19
|
+
# mean logarithmic loss for binary classification.
|
20
|
+
#
|
21
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
22
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted class probability.
|
23
|
+
# @param eps [Float] A small value close to zero to avoid outputting infinity in logarithmic calcuation.
|
24
|
+
# @return [Float] mean logarithmic loss
|
25
|
+
def score(y_true, y_pred, eps = 1e-15)
|
26
|
+
n_samples, n_classes = y_pred.shape
|
27
|
+
clipped_p = y_pred.clip(eps, 1 - eps)
|
28
|
+
|
29
|
+
log_loss = if n_classes.nil?
|
30
|
+
negative_label = y_true.to_a.uniq.min
|
31
|
+
bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
|
32
|
+
-(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
|
33
|
+
else
|
34
|
+
binarized_y_true = binarize(y_true)
|
35
|
+
clipped_p /= clipped_p.sum(axis: 1).expand_dims(1)
|
36
|
+
-(binarized_y_true * Numo::NMath.log(clipped_p)).sum(axis: 1)
|
37
|
+
end
|
38
|
+
log_loss.sum / n_samples
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def binarize(y)
|
44
|
+
classes = y.to_a.uniq.sort
|
45
|
+
n_samples = y.size
|
46
|
+
n_classes = classes.size
|
47
|
+
binarized = Numo::DFloat.zeros(n_samples, n_classes)
|
48
|
+
n_samples.times { |n| binarized[n, classes.index(y[n])] = 1 }
|
49
|
+
binarized
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MeanAbsoluteError is a class that calculates the mean absolute error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mean_absolute_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MeanAbsoluteError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MeanAbsoluteError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean absolute error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Mean absolute error
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
(y_true - y_pred).abs.mean
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MeanSquaredError is a class that calculates the mean squared error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mean_squared_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MeanSquaredError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MeanSquaredError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean squared error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Mean squared error
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
((y_true - y_pred)**2).mean
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MeanSquaredLogError is a class that calculates the mean squared logarithmic error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/mean_squared_log_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MeanSquaredLogError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MeanSquaredLogError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate mean squared logarithmic error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
|
21
|
+
# @return [Float] Mean squared logarithmic error.
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
((Numo::NMath.log(y_true + 1) - Numo::NMath.log(y_pred + 1))**2).mean
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# MedianAbsoluteError is a class that calculates the median absolute error.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# require 'rumale/evaluation_measure/median_absolute_error'
|
11
|
+
#
|
12
|
+
# evaluator = Rumale::EvaluationMeasure::MedianAbsoluteError.new
|
13
|
+
# puts evaluator.score(ground_truth, predicted)
|
14
|
+
class MedianAbsoluteError
|
15
|
+
include ::Rumale::Base::Evaluator
|
16
|
+
|
17
|
+
# Calculate median absolute error.
|
18
|
+
#
|
19
|
+
# @param y_true [Numo::DFloat] (shape: [n_samples]) Ground truth target values.
|
20
|
+
# @param y_pred [Numo::DFloat] (shape: [n_samples]) Estimated target values.
|
21
|
+
# @return [Float] Median absolute error.
|
22
|
+
def score(y_true, y_pred)
|
23
|
+
(y_true - y_pred).abs.median
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|