rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +5 -1
  3. data/README.md +3 -288
  4. data/lib/rumale/version.rb +1 -1
  5. data/lib/rumale.rb +20 -131
  6. metadata +252 -150
  7. data/CHANGELOG.md +0 -643
  8. data/CODE_OF_CONDUCT.md +0 -74
  9. data/ext/rumale/extconf.rb +0 -37
  10. data/ext/rumale/rumaleext.c +0 -545
  11. data/ext/rumale/rumaleext.h +0 -12
  12. data/lib/rumale/base/base_estimator.rb +0 -49
  13. data/lib/rumale/base/classifier.rb +0 -36
  14. data/lib/rumale/base/cluster_analyzer.rb +0 -31
  15. data/lib/rumale/base/evaluator.rb +0 -17
  16. data/lib/rumale/base/regressor.rb +0 -36
  17. data/lib/rumale/base/splitter.rb +0 -21
  18. data/lib/rumale/base/transformer.rb +0 -22
  19. data/lib/rumale/clustering/dbscan.rb +0 -123
  20. data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
  21. data/lib/rumale/clustering/hdbscan.rb +0 -291
  22. data/lib/rumale/clustering/k_means.rb +0 -122
  23. data/lib/rumale/clustering/k_medoids.rb +0 -141
  24. data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
  25. data/lib/rumale/clustering/power_iteration.rb +0 -127
  26. data/lib/rumale/clustering/single_linkage.rb +0 -203
  27. data/lib/rumale/clustering/snn.rb +0 -76
  28. data/lib/rumale/clustering/spectral_clustering.rb +0 -115
  29. data/lib/rumale/dataset.rb +0 -246
  30. data/lib/rumale/decomposition/factor_analysis.rb +0 -150
  31. data/lib/rumale/decomposition/fast_ica.rb +0 -188
  32. data/lib/rumale/decomposition/nmf.rb +0 -124
  33. data/lib/rumale/decomposition/pca.rb +0 -159
  34. data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
  35. data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
  36. data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
  37. data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
  38. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
  39. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
  40. data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
  41. data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
  42. data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
  43. data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
  44. data/lib/rumale/ensemble/voting_classifier.rb +0 -126
  45. data/lib/rumale/ensemble/voting_regressor.rb +0 -82
  46. data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
  47. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
  48. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
  49. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
  50. data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
  51. data/lib/rumale/evaluation_measure/f_score.rb +0 -50
  52. data/lib/rumale/evaluation_measure/function.rb +0 -147
  53. data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
  54. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
  55. data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
  56. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
  57. data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
  58. data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
  59. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
  60. data/lib/rumale/evaluation_measure/precision.rb +0 -50
  61. data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
  62. data/lib/rumale/evaluation_measure/purity.rb +0 -40
  63. data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
  64. data/lib/rumale/evaluation_measure/recall.rb +0 -50
  65. data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
  66. data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
  67. data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
  68. data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
  69. data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
  70. data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
  71. data/lib/rumale/kernel_approximation/rbf.rb +0 -102
  72. data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
  73. data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
  74. data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
  75. data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
  76. data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
  77. data/lib/rumale/linear_model/base_sgd.rb +0 -285
  78. data/lib/rumale/linear_model/elastic_net.rb +0 -119
  79. data/lib/rumale/linear_model/lasso.rb +0 -115
  80. data/lib/rumale/linear_model/linear_regression.rb +0 -201
  81. data/lib/rumale/linear_model/logistic_regression.rb +0 -275
  82. data/lib/rumale/linear_model/nnls.rb +0 -137
  83. data/lib/rumale/linear_model/ridge.rb +0 -209
  84. data/lib/rumale/linear_model/svc.rb +0 -213
  85. data/lib/rumale/linear_model/svr.rb +0 -132
  86. data/lib/rumale/manifold/mds.rb +0 -155
  87. data/lib/rumale/manifold/tsne.rb +0 -222
  88. data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
  89. data/lib/rumale/metric_learning/mlkr.rb +0 -161
  90. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
  91. data/lib/rumale/model_selection/cross_validation.rb +0 -125
  92. data/lib/rumale/model_selection/function.rb +0 -42
  93. data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
  94. data/lib/rumale/model_selection/group_k_fold.rb +0 -93
  95. data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
  96. data/lib/rumale/model_selection/k_fold.rb +0 -81
  97. data/lib/rumale/model_selection/shuffle_split.rb +0 -90
  98. data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
  99. data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
  100. data/lib/rumale/model_selection/time_series_split.rb +0 -91
  101. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
  102. data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
  103. data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
  104. data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
  105. data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
  106. data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
  107. data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
  108. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
  109. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
  110. data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
  111. data/lib/rumale/neural_network/adam.rb +0 -56
  112. data/lib/rumale/neural_network/base_mlp.rb +0 -248
  113. data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
  114. data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
  115. data/lib/rumale/pairwise_metric.rb +0 -152
  116. data/lib/rumale/pipeline/feature_union.rb +0 -69
  117. data/lib/rumale/pipeline/pipeline.rb +0 -175
  118. data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
  119. data/lib/rumale/preprocessing/binarizer.rb +0 -60
  120. data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
  121. data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
  122. data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
  123. data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
  124. data/lib/rumale/preprocessing/label_encoder.rb +0 -79
  125. data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
  126. data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
  127. data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
  128. data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
  129. data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
  130. data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
  131. data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
  132. data/lib/rumale/probabilistic_output.rb +0 -114
  133. data/lib/rumale/tree/base_decision_tree.rb +0 -150
  134. data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
  135. data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
  136. data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
  137. data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
  138. data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
  139. data/lib/rumale/tree/node.rb +0 -39
  140. data/lib/rumale/utils.rb +0 -42
  141. data/lib/rumale/validation.rb +0 -128
  142. data/lib/rumale/values.rb +0 -13
@@ -1,56 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # CalinskiHarabaszScore is a class that calculates the Calinski and Harabasz score.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::CalinskiHarabaszScore.new
11
- # puts evaluator.score(x, predicted)
12
- #
13
- # *Reference*
14
- # - Calinski, T., and Harabsz, J., "A dendrite method for cluster analysis," Communication in Statistics, Vol. 3 (1), pp. 1--27, 1972.
15
- class CalinskiHarabaszScore
16
- include Base::Evaluator
17
-
18
- # Calculates the Calinski and Harabasz score.
19
- #
20
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
21
- # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
22
- # @return [Float] The Calinski and Harabasz score.
23
- def score(x, y)
24
- x = check_convert_sample_array(x)
25
- y = check_convert_label_array(y)
26
- check_sample_label_size(x, y)
27
-
28
- labels = y.to_a.uniq.sort
29
- n_clusters = labels.size
30
- n_dimensions = x.shape[1]
31
-
32
- centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
33
-
34
- within_group = 0.0
35
- n_clusters.times do |n|
36
- cls_samples = x[y.eq(labels[n]), true]
37
- cls_centroid = cls_samples.mean(0)
38
- centroids[n, true] = cls_centroid
39
- within_group += ((cls_samples - cls_centroid)**2).sum
40
- end
41
-
42
- return 1.0 if within_group.zero?
43
-
44
- mean_vec = x.mean(0)
45
- between_group = 0.0
46
- n_clusters.times do |n|
47
- sz_cluster = y.eq(labels[n]).count
48
- between_group += sz_cluster * ((centroids[n, true] - mean_vec)**2).sum
49
- end
50
-
51
- n_samples = x.shape[0]
52
- (between_group / (n_clusters - 1)) / (within_group / (n_samples - n_clusters))
53
- end
54
- end
55
- end
56
- end
@@ -1,53 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
- require 'rumale/pairwise_metric'
5
-
6
- module Rumale
7
- module EvaluationMeasure
8
- # DaviesBouldinScore is a class that calculates the Davies-Bouldin score.
9
- #
10
- # @example
11
- # evaluator = Rumale::EvaluationMeasure::DaviesBouldinScore.new
12
- # puts evaluator.score(x, predicted)
13
- #
14
- # *Reference*
15
- # - Davies, D L., and Bouldin, D W., "A Cluster Separation Measure," IEEE Trans. Pattern Analysis and Machine Intelligence, Vol. PAMI-1, No. 2, pp. 224--227, 1979.
16
- class DaviesBouldinScore
17
- include Base::Evaluator
18
-
19
- # Calculates the Davies-Bouldin score.
20
- #
21
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for calculating score.
22
- # @param y [Numo::Int32] (shape: [n_samples]) The predicted labels for each sample.
23
- # @return [Float] The Davies-Bouldin score.
24
- def score(x, y)
25
- x = check_convert_sample_array(x)
26
- y = check_convert_label_array(y)
27
- check_sample_label_size(x, y)
28
-
29
- labels = y.to_a.uniq.sort
30
- n_clusters = labels.size
31
- n_dimensions = x.shape[1]
32
-
33
- dist_cluster = Numo::DFloat.zeros(n_clusters)
34
- centroids = Numo::DFloat.zeros(n_clusters, n_dimensions)
35
-
36
- n_clusters.times do |n|
37
- cls_samples = x[y.eq(labels[n]), true]
38
- cls_centroid = cls_samples.mean(0)
39
- centroids[n, true] = cls_centroid
40
- dist_cluster[n] = Rumale::PairwiseMetric.euclidean_distance(cls_samples, cls_centroid.expand_dims(0)).mean
41
- end
42
-
43
- dist_centroid = Rumale::PairwiseMetric.euclidean_distance(centroids)
44
- # p dist_cluster
45
- # p dist_centroid
46
- dist_centroid[dist_centroid.eq(0)] = Float::INFINITY
47
- dist_mat = (dist_cluster.expand_dims(1) + dist_cluster) / dist_centroid
48
- dist_mat[dist_mat.diag_indices] = -Float::INFINITY
49
- dist_mat.max(0).mean
50
- end
51
- end
52
- end
53
- end
@@ -1,39 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # ExplainedVarianceScore is a class that calculates the explained variance score.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::ExplainedVarianceScore.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- class ExplainedVarianceScore
13
- include Base::Evaluator
14
-
15
- # Calculate explained variance score.
16
- #
17
- # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
18
- # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
19
- # @return [Float] Explained variance score.
20
- def score(y_true, y_pred)
21
- y_true = check_convert_tvalue_array(y_true)
22
- y_pred = check_convert_tvalue_array(y_pred)
23
- raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
24
-
25
- diff = y_true - y_pred
26
- numerator = ((diff - diff.mean(0))**2).mean(0)
27
- denominator = ((y_true - y_true.mean(0))**2).mean(0)
28
-
29
- n_outputs = y_true.shape[1]
30
- if n_outputs.nil?
31
- denominator.zero? ? 0 : 1.0 - numerator / denominator
32
- else
33
- valids = denominator.ne(0)
34
- (1.0 - numerator[valids] / denominator[valids]).sum / n_outputs
35
- end
36
- end
37
- end
38
- end
39
- end
@@ -1,50 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
- require 'rumale/evaluation_measure/precision_recall'
5
-
6
- module Rumale
7
- # This module consists of the classes for model evaluation.
8
- module EvaluationMeasure
9
- # FScore is a class that calculates the F1-score of the predicted labels.
10
- #
11
- # @example
12
- # evaluator = Rumale::EvaluationMeasure::FScore.new
13
- # puts evaluator.score(ground_truth, predicted)
14
- class FScore
15
- include Base::Evaluator
16
- include EvaluationMeasure::PrecisionRecall
17
-
18
- # Return the average type for calculation of F1-score.
19
- # @return [String] ('binary', 'micro', 'macro')
20
- attr_reader :average
21
-
22
- # Create a new evaluation measure calculater for F1-score.
23
- #
24
- # @param average [String] The average type ('binary', 'micro', 'macro')
25
- def initialize(average: 'binary')
26
- check_params_string(average: average)
27
- @average = average
28
- end
29
-
30
- # Calculate average F1-score
31
- #
32
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
34
- # @return [Float] Average F1-score
35
- def score(y_true, y_pred)
36
- y_true = check_convert_label_array(y_true)
37
- y_pred = check_convert_label_array(y_pred)
38
-
39
- case @average
40
- when 'binary'
41
- f_score_each_class(y_true, y_pred).last
42
- when 'micro'
43
- micro_average_f_score(y_true, y_pred)
44
- when 'macro'
45
- macro_average_f_score(y_true, y_pred)
46
- end
47
- end
48
- end
49
- end
50
- end
@@ -1,147 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
- require 'rumale/evaluation_measure/accuracy'
5
- require 'rumale/evaluation_measure/precision_recall'
6
-
7
- module Rumale
8
- module EvaluationMeasure
9
- module_function
10
-
11
- # Calculate confusion matrix for evaluating classification performance.
12
- #
13
- # @example
14
- # y_true = Numo::Int32[2, 0, 2, 2, 0, 1]
15
- # y_pred = Numo::Int32[0, 0, 2, 2, 0, 2]
16
- # p Rumale::EvaluationMeasure.confusion_matrix(y_true, y_pred)
17
- #
18
- # # Numo::Int32#shape=[3,3]
19
- # # [[2, 0, 0],
20
- # # [0, 0, 1],
21
- # # [1, 0, 2]]
22
- #
23
- # @param y_true [Numo::Int32] (shape: [n_samples]) The ground truth labels.
24
- # @param y_pred [Numo::Int32] (shape: [n_samples]) The predicted labels.
25
- # @return [Numo::Int32] (shape: [n_classes, n_classes]) The confusion matrix.
26
- def confusion_matrix(y_true, y_pred)
27
- y_true = Rumale::Validation.check_convert_label_array(y_true)
28
- y_pred = Rumale::Validation.check_convert_label_array(y_pred)
29
-
30
- labels = y_true.to_a.uniq.sort
31
- n_labels = labels.size
32
-
33
- conf_mat = Numo::Int32.zeros(n_labels, n_labels)
34
-
35
- labels.each_with_index do |lbl_a, i|
36
- y_p = y_pred[y_true.eq(lbl_a)]
37
- labels.each_with_index do |lbl_b, j|
38
- conf_mat[i, j] = y_p.eq(lbl_b).count
39
- end
40
- end
41
-
42
- conf_mat
43
- end
44
-
45
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
46
-
47
- # Output a summary of classification performance for each class.
48
- #
49
- # @example
50
- # y_true = Numo::Int32[0, 1, 1, 2, 2, 2, 0]
51
- # y_pred = Numo::Int32[1, 1, 1, 0, 0, 2, 0]
52
- # puts Rumale::EvaluationMeasure.classification_report(y_true, y_pred)
53
- #
54
- # # precision recall f1-score support
55
- # #
56
- # # 0 0.33 0.50 0.40 2
57
- # # 1 0.67 1.00 0.80 2
58
- # # 2 1.00 0.33 0.50 3
59
- # #
60
- # # accuracy 0.57 7
61
- # # macro avg 0.67 0.61 0.57 7
62
- # # weighted avg 0.71 0.57 0.56 7
63
- #
64
- # @param y_true [Numo::Int32] (shape: [n_samples]) The ground truth labels.
65
- # @param y_pred [Numo::Int32] (shape: [n_samples]) The predicted labels.
66
- # @param target_name [Nil/Array] The label names.
67
- # @param output_hash [Boolean] The flag indicating whether to output with Ruby Hash.
68
- # @return [String/Hash] The summary of classification performance.
69
- # If output_hash is true, it returns the summary with Ruby Hash.
70
- def classification_report(y_true, y_pred, target_name: nil, output_hash: false)
71
- y_true = Rumale::Validation.check_convert_label_array(y_true)
72
- y_pred = Rumale::Validation.check_convert_label_array(y_pred)
73
- # calculate each evaluation measure.
74
- classes = y_true.to_a.uniq.sort
75
- supports = Numo::Int32.asarray(classes.map { |label| y_true.eq(label).count })
76
- precisions = Rumale::EvaluationMeasure::PrecisionRecall.precision_each_class(y_true, y_pred)
77
- recalls = Rumale::EvaluationMeasure::PrecisionRecall.recall_each_class(y_true, y_pred)
78
- fscores = Rumale::EvaluationMeasure::PrecisionRecall.f_score_each_class(y_true, y_pred)
79
- macro_precision = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_precision(y_true, y_pred)
80
- macro_recall = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_recall(y_true, y_pred)
81
- macro_fscore = Rumale::EvaluationMeasure::PrecisionRecall.macro_average_f_score(y_true, y_pred)
82
- accuracy = Rumale::EvaluationMeasure::Accuracy.new.score(y_true, y_pred)
83
- sum_supports = supports.sum
84
- weights = Numo::DFloat.cast(supports) / sum_supports
85
- weighted_precision = (Numo::DFloat.cast(precisions) * weights).sum
86
- weighted_recall = (Numo::DFloat.cast(recalls) * weights).sum
87
- weighted_fscore = (Numo::DFloat.cast(fscores) * weights).sum
88
- # output reults.
89
- target_name ||= classes
90
- target_name.map!(&:to_s)
91
- if output_hash
92
- res = {}
93
- target_name.each_with_index do |label, n|
94
- res[label] = {
95
- precision: precisions[n],
96
- recall: recalls[n],
97
- fscore: fscores[n],
98
- support: supports[n]
99
- }
100
- end
101
- res[:accuracy] = accuracy
102
- res[:macro_avg] = {
103
- precision: macro_precision,
104
- recall: macro_recall,
105
- fscore: macro_fscore,
106
- support: sum_supports
107
- }
108
- res[:weighted_avg] = {
109
- precision: weighted_precision,
110
- recall: weighted_recall,
111
- fscore: weighted_fscore,
112
- support: sum_supports
113
- }
114
- else
115
- width = [12, target_name.map(&:size).max].max # 12 is 'weighted avg'.size
116
- res = +''
117
- res << "#{' ' * width} precision recall f1-score support\n"
118
- res << "\n"
119
- target_name.each_with_index do |label, n|
120
- label_str = format("%##{width}s", label)
121
- precision_str = format('%#10s', format('%.2f', precisions[n]))
122
- recall_str = format('%#10s', format('%.2f', recalls[n]))
123
- fscore_str = format('%#10s', format('%.2f', fscores[n]))
124
- supports_str = format('%#10s', supports[n])
125
- res << "#{label_str} #{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
126
- end
127
- res << "\n"
128
- supports_str = format('%#10s', sum_supports)
129
- accuracy_str = format('%#30s', format('%.2f', accuracy))
130
- res << format("%##{width}s ", 'accuracy')
131
- res << "#{accuracy_str}#{supports_str}\n"
132
- precision_str = format('%#10s', format('%.2f', macro_precision))
133
- recall_str = format('%#10s', format('%.2f', macro_recall))
134
- fscore_str = format('%#10s', format('%.2f', macro_fscore))
135
- res << format("%##{width}s ", 'macro avg')
136
- res << "#{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
137
- precision_str = format('%#10s', format('%.2f', weighted_precision))
138
- recall_str = format('%#10s', format('%.2f', weighted_recall))
139
- fscore_str = format('%#10s', format('%.2f', weighted_fscore))
140
- res << format("%##{width}s ", 'weighted avg')
141
- res << "#{precision_str}#{recall_str}#{fscore_str}#{supports_str}\n"
142
- end
143
- res
144
- end
145
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
146
- end
147
- end
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
- require 'rumale/preprocessing/label_binarizer'
5
-
6
- module Rumale
7
- module EvaluationMeasure
8
- # LogLoss is a class that calculates the logarithmic loss of predicted class probability.
9
- #
10
- # @example
11
- # evaluator = Rumale::EvaluationMeasure::LogLoss.new
12
- # puts evaluator.score(ground_truth, predicted)
13
- class LogLoss
14
- include Base::Evaluator
15
-
16
- # Calculate mean logarithmic loss.
17
- # If both y_true and y_pred are array (both shapes are [n_samples]), this method calculates
18
- # mean logarithmic loss for binary classification.
19
- #
20
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
21
- # @param y_pred [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted class probability.
22
- # @param eps [Float] A small value close to zero to avoid outputting infinity in logarithmic calcuation.
23
- # @return [Float] mean logarithmic loss
24
- def score(y_true, y_pred, eps = 1e-15)
25
- y_true = check_convert_label_array(y_true)
26
- y_pred = check_convert_tvalue_array(y_pred)
27
-
28
- n_samples, n_classes = y_pred.shape
29
- clipped_p = y_pred.clip(eps, 1 - eps)
30
-
31
- log_loss = if n_classes.nil?
32
- negative_label = y_true.to_a.uniq.min
33
- bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
34
- -(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
35
- else
36
- encoder = Rumale::Preprocessing::LabelBinarizer.new
37
- encoded_y_true = Numo::DFloat.cast(encoder.fit_transform(y_true))
38
- clipped_p /= clipped_p.sum(1).expand_dims(1)
39
- -(encoded_y_true * Numo::NMath.log(clipped_p)).sum(1)
40
- end
41
- log_loss.sum / n_samples
42
- end
43
- end
44
- end
45
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # MeanAbsoluteError is a class that calculates the mean absolute error.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::MeanAbsoluteError.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- class MeanAbsoluteError
13
- include Base::Evaluator
14
-
15
- # Calculate mean absolute error.
16
- #
17
- # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
18
- # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
19
- # @return [Float] Mean absolute error
20
- def score(y_true, y_pred)
21
- y_true = check_convert_tvalue_array(y_true)
22
- y_pred = check_convert_tvalue_array(y_pred)
23
- raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
24
-
25
- (y_true - y_pred).abs.mean
26
- end
27
- end
28
- end
29
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # MeanSquaredError is a class that calculates the mean squared error.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::MeanSquaredError.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- class MeanSquaredError
13
- include Base::Evaluator
14
-
15
- # Calculate mean squared error.
16
- #
17
- # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
18
- # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
19
- # @return [Float] Mean squared error
20
- def score(y_true, y_pred)
21
- y_true = check_convert_tvalue_array(y_true)
22
- y_pred = check_convert_tvalue_array(y_pred)
23
- raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
24
-
25
- ((y_true - y_pred)**2).mean
26
- end
27
- end
28
- end
29
- end
@@ -1,29 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # MeanSquaredLogError is a class that calculates the mean squared logarithmic error.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::MeanSquaredError.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- class MeanSquaredLogError
13
- include Base::Evaluator
14
-
15
- # Calculate mean squared logarithmic error.
16
- #
17
- # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
18
- # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
19
- # @return [Float] Mean squared logarithmic error.
20
- def score(y_true, y_pred)
21
- y_true = check_convert_tvalue_array(y_true)
22
- y_pred = check_convert_tvalue_array(y_pred)
23
- raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
24
-
25
- ((Numo::NMath.log(y_true + 1) - Numo::NMath.log(y_pred + 1))**2).mean
26
- end
27
- end
28
- end
29
- end
@@ -1,30 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # MedianAbsoluteError is a class that calculates the median absolute error.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::MedianAbsoluteError.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- class MedianAbsoluteError
13
- include Base::Evaluator
14
-
15
- # Calculate median absolute error.
16
- #
17
- # @param y_true [Numo::DFloat] (shape: [n_samples]) Ground truth target values.
18
- # @param y_pred [Numo::DFloat] (shape: [n_samples]) Estimated target values.
19
- # @return [Float] Median absolute error.
20
- def score(y_true, y_pred)
21
- y_true = check_convert_tvalue_array(y_true)
22
- y_pred = check_convert_tvalue_array(y_pred)
23
- raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
24
- raise ArgumentError, 'Expect target values to be 1-D arrray' if [y_true.shape.size, y_pred.shape.size].max > 1
25
-
26
- (y_true - y_pred).abs.median
27
- end
28
- end
29
- end
30
- end
@@ -1,49 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
-
5
- module Rumale
6
- module EvaluationMeasure
7
- # MutualInformation is a class that calculates the mutual information.
8
- #
9
- # @example
10
- # evaluator = Rumale::EvaluationMeasure::MutualInformation.new
11
- # puts evaluator.score(ground_truth, predicted)
12
- #
13
- # *Reference*
14
- # - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
15
- class MutualInformation
16
- include Base::Evaluator
17
-
18
- # Calculate mutual information
19
- #
20
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
21
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
22
- # @return [Float] Mutual information.
23
- def score(y_true, y_pred)
24
- y_true = check_convert_label_array(y_true)
25
- y_pred = check_convert_label_array(y_pred)
26
- # initiazlie some variables.
27
- mutual_information = 0.0
28
- n_samples = y_pred.size
29
- class_ids = y_true.to_a.uniq
30
- cluster_ids = y_pred.to_a.uniq
31
- # calculate mutual information.
32
- cluster_ids.map do |k|
33
- pr_sample_ids = y_pred.eq(k).where.to_a
34
- n_pr_samples = pr_sample_ids.size
35
- class_ids.map do |j|
36
- tr_sample_ids = y_true.eq(j).where.to_a
37
- n_tr_samples = tr_sample_ids.size
38
- n_intr_samples = (pr_sample_ids & tr_sample_ids).size
39
- if n_intr_samples.positive?
40
- mutual_information +=
41
- n_intr_samples.fdiv(n_samples) * Math.log((n_samples * n_intr_samples).fdiv(n_pr_samples * n_tr_samples))
42
- end
43
- end
44
- end
45
- mutual_information
46
- end
47
- end
48
- end
49
- end
@@ -1,53 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
- require 'rumale/evaluation_measure/mutual_information'
5
-
6
- module Rumale
7
- module EvaluationMeasure
8
- # NormalizedMutualInformation is a class that calculates the normalized mutual information.
9
- #
10
- # @example
11
- # evaluator = Rumale::EvaluationMeasure::NormalizedMutualInformation.new
12
- # puts evaluator.score(ground_truth, predicted)
13
- #
14
- # *Reference*
15
- # - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
16
- # - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
17
- class NormalizedMutualInformation
18
- include Base::Evaluator
19
-
20
- # Calculate noramlzied mutual information
21
- #
22
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
23
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
24
- # @return [Float] Normalized mutual information
25
- def score(y_true, y_pred)
26
- y_true = check_convert_label_array(y_true)
27
- y_pred = check_convert_label_array(y_pred)
28
- # calculate entropies.
29
- class_entropy = entropy(y_true)
30
- return 0.0 if class_entropy.zero?
31
-
32
- cluster_entropy = entropy(y_pred)
33
- return 0.0 if cluster_entropy.zero?
34
-
35
- # calculate mutual information.
36
- mi = MutualInformation.new
37
- mi.score(y_true, y_pred) / Math.sqrt(class_entropy * cluster_entropy)
38
- end
39
-
40
- private
41
-
42
- def entropy(y)
43
- n_samples = y.size
44
- indices = y.to_a.uniq
45
- sum_log = indices.map do |k|
46
- ratio = y.eq(k).count.fdiv(n_samples)
47
- ratio * Math.log(ratio)
48
- end.reduce(:+)
49
- -sum_log
50
- end
51
- end
52
- end
53
- end
@@ -1,50 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/evaluator'
4
- require 'rumale/evaluation_measure/precision_recall'
5
-
6
- module Rumale
7
- # This module consists of the classes for model evaluation.
8
- module EvaluationMeasure
9
- # Precision is a class that calculates the preicision of the predicted labels.
10
- #
11
- # @example
12
- # evaluator = Rumale::EvaluationMeasure::Precision.new
13
- # puts evaluator.score(ground_truth, predicted)
14
- class Precision
15
- include Base::Evaluator
16
- include EvaluationMeasure::PrecisionRecall
17
-
18
- # Return the average type for calculation of precision.
19
- # @return [String] ('binary', 'micro', 'macro')
20
- attr_reader :average
21
-
22
- # Create a new evaluation measure calculater for precision score.
23
- #
24
- # @param average [String] The average type ('binary', 'micro', 'macro')
25
- def initialize(average: 'binary')
26
- check_params_string(average: average)
27
- @average = average
28
- end
29
-
30
- # Calculate average precision.
31
- #
32
- # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
- # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
34
- # @return [Float] Average precision
35
- def score(y_true, y_pred)
36
- y_true = check_convert_label_array(y_true)
37
- y_pred = check_convert_label_array(y_pred)
38
-
39
- case @average
40
- when 'binary'
41
- precision_each_class(y_true, y_pred).last
42
- when 'micro'
43
- micro_average_precision(y_true, y_pred)
44
- when 'macro'
45
- macro_average_precision(y_true, y_pred)
46
- end
47
- end
48
- end
49
- end
50
- end