rumale 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/rumale.rb +3 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +122 -0
- data/lib/rumale/optimizer/ada_grad.rb +59 -0
- data/lib/rumale/preprocessing/max_abs_scaler.rb +76 -0
- data/lib/rumale/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38e459906ed25e84791a9d872e9536d633d9db2b
|
4
|
+
data.tar.gz: ea80195eeacbaf3ed7fccaf828e3e809baa862a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 639d266a1045d9ee1fbf37f770bf6171ff8afc5a8183441fbaf0283af7a17dc2e6e05ee44cb601c8b9ea301d5f46d914fc77601fd60345b904aff509f07e5277
|
7
|
+
data.tar.gz: 7010cbf3f11f0139a0dda334cfce4f3fd8ffe059eb776f93e7aba32f91c3f517c8fe4723975ef2036e9981131d741345bab4158ce8354431a8a8b57000dacc2f
|
data/CHANGELOG.md
CHANGED
data/lib/rumale.rb
CHANGED
@@ -17,6 +17,7 @@ require 'rumale/base/transformer'
|
|
17
17
|
require 'rumale/base/splitter'
|
18
18
|
require 'rumale/base/evaluator'
|
19
19
|
require 'rumale/optimizer/sgd'
|
20
|
+
require 'rumale/optimizer/ada_grad'
|
20
21
|
require 'rumale/optimizer/rmsprop'
|
21
22
|
require 'rumale/optimizer/adam'
|
22
23
|
require 'rumale/optimizer/nadam'
|
@@ -52,6 +53,7 @@ require 'rumale/decomposition/pca'
|
|
52
53
|
require 'rumale/decomposition/nmf'
|
53
54
|
require 'rumale/preprocessing/l2_normalizer'
|
54
55
|
require 'rumale/preprocessing/min_max_scaler'
|
56
|
+
require 'rumale/preprocessing/max_abs_scaler'
|
55
57
|
require 'rumale/preprocessing/standard_scaler'
|
56
58
|
require 'rumale/preprocessing/label_encoder'
|
57
59
|
require 'rumale/preprocessing/one_hot_encoder'
|
@@ -65,6 +67,7 @@ require 'rumale/evaluation_measure/accuracy'
|
|
65
67
|
require 'rumale/evaluation_measure/precision'
|
66
68
|
require 'rumale/evaluation_measure/recall'
|
67
69
|
require 'rumale/evaluation_measure/f_score'
|
70
|
+
require 'rumale/evaluation_measure/roc_auc'
|
68
71
|
require 'rumale/evaluation_measure/log_loss'
|
69
72
|
require 'rumale/evaluation_measure/r2_score'
|
70
73
|
require 'rumale/evaluation_measure/explained_variance_score'
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# # Encode labels to integer array.
|
11
|
+
# labels = %w[A B B C A A C C C A]
|
12
|
+
# label_encoder = Rumale::Preprocessing::LabelEncoder.new
|
13
|
+
# y = label_encoder.fit_transform(labels)
|
14
|
+
# # Fit classifier.
|
15
|
+
# classifier = Rumale::LinearModel::LogisticRegression.new
|
16
|
+
# classifier.fit(x, y)
|
17
|
+
# # Predict class probabilities.
|
18
|
+
# y_score = classifier.predict_proba(x)
|
19
|
+
# # Encode labels to one-hot vectors.
|
20
|
+
# one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
|
21
|
+
# y_onehot = one_hot_encoder.fit_transform(y)
|
22
|
+
# # Calculate ROC AUC.
|
23
|
+
# evaluator = Rumale::EvaluationMeasure::ROCAUC.new
|
24
|
+
# puts evaluator.score(y_onehot, y_score)
|
25
|
+
class ROCAUC
|
26
|
+
include Base::Evaluator
|
27
|
+
|
28
|
+
# Calculate area under the receiver operation characteristic curve (ROC AUC).
|
29
|
+
#
|
30
|
+
# @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
|
31
|
+
# Ground truth binary labels or one-hot encoded multi-labels.
|
32
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
|
33
|
+
# Predicted class probabilities or confidence scores.
|
34
|
+
# @return [Float] (macro-averaged) ROC AUC.
|
35
|
+
def score(y_true, y_score)
|
36
|
+
check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
|
37
|
+
raise ArgumentError, 'Expect to have the same shape for y_true and y_score.' unless y_true.shape == y_score.shape
|
38
|
+
|
39
|
+
n_classes = y_score.shape[1]
|
40
|
+
if n_classes.nil?
|
41
|
+
fpr, tpr, = roc_curve(y_true, y_score)
|
42
|
+
return auc(fpr, tpr)
|
43
|
+
end
|
44
|
+
|
45
|
+
scores = Array.new(n_classes) do |c|
|
46
|
+
fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
|
47
|
+
auc(fpr, tpr)
|
48
|
+
end
|
49
|
+
|
50
|
+
scores.reduce(&:+).fdiv(n_classes)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Calculate receiver operation characteristic curve.
|
54
|
+
#
|
55
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
|
56
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
|
57
|
+
# @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
|
58
|
+
# If nil is given, the method considers the maximum value of the label as a positive label.
|
59
|
+
# @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
|
60
|
+
# thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
|
61
|
+
def roc_curve(y_true, y_score, pos_label = nil)
|
62
|
+
check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
|
63
|
+
raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
|
64
|
+
raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?
|
65
|
+
labels = y_true.to_a.uniq
|
66
|
+
if pos_label.nil?
|
67
|
+
raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
|
68
|
+
else
|
69
|
+
raise ArgumentError, 'y_true must have elements whose values are pos_label.' unless y_true.to_a.uniq.include?(pos_label)
|
70
|
+
end
|
71
|
+
|
72
|
+
false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
|
73
|
+
|
74
|
+
if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
|
75
|
+
true_pos = true_pos.insert(0, 0)
|
76
|
+
false_pos = false_pos.insert(0, 0)
|
77
|
+
thresholds = thresholds.insert(0, thresholds[0] + 1)
|
78
|
+
end
|
79
|
+
|
80
|
+
tpr = true_pos / true_pos[-1].to_f
|
81
|
+
fpr = false_pos / false_pos[-1].to_f
|
82
|
+
|
83
|
+
[fpr, tpr, thresholds]
|
84
|
+
end
|
85
|
+
|
86
|
+
# Calculate area under the curve using the trapezoidal rule.
|
87
|
+
#
|
88
|
+
# @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
|
89
|
+
# x coordinates. These are expected to monotonously increase or decrease.
|
90
|
+
# @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
|
91
|
+
# @return [Float] area under the curve.
|
92
|
+
def auc(x, y)
|
93
|
+
check_params_type(Numo::NArray, x: x, y: y)
|
94
|
+
raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
|
95
|
+
raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?
|
96
|
+
n_samples = [x.shape[0], y.shape[0]].min
|
97
|
+
raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
|
98
|
+
(0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def binary_roc_curve(y_true, y_score, pos_label = nil)
|
104
|
+
pos_label = y_true.to_a.uniq.max if pos_label.nil?
|
105
|
+
|
106
|
+
bin_y_true = y_true.eq(pos_label)
|
107
|
+
desc_pred_ids = y_score.sort_index.reverse
|
108
|
+
|
109
|
+
desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
|
110
|
+
desc_y_score = y_score[desc_pred_ids]
|
111
|
+
|
112
|
+
dist_value_ids = desc_y_score.diff.ne(0).where
|
113
|
+
threshold_ids = dist_value_ids.append(desc_y_true.size - 1)
|
114
|
+
|
115
|
+
true_pos = desc_y_true.cumsum[threshold_ids]
|
116
|
+
false_pos = 1 + threshold_ids - true_pos
|
117
|
+
|
118
|
+
[false_pos, true_pos, desc_y_score[threshold_ids]]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Optimizer
|
8
|
+
# AdaGrad is a class that implements AdaGrad optimizer.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# optimizer = Rumale::Optimizer::AdaGrad.new(learning_rate: 0.01, momentum: 0.9)
|
12
|
+
# estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
|
13
|
+
# estimator.fit(samples, values)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - J. Duchi, E Hazan, and Y. Singer, "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
|
17
|
+
class AdaGrad
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Validation
|
20
|
+
|
21
|
+
# Create a new optimizer with AdaGrad.
|
22
|
+
#
|
23
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
24
|
+
def initialize(learning_rate: 0.01)
|
25
|
+
check_params_float(learning_rate: learning_rate)
|
26
|
+
check_params_positive(learning_rate: learning_rate)
|
27
|
+
@params = {}
|
28
|
+
@params[:learning_rate] = learning_rate
|
29
|
+
@moment = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
# Calculate the updated weight with AdaGrad adaptive learning rate.
|
33
|
+
#
|
34
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
35
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
36
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
37
|
+
def call(weight, gradient)
|
38
|
+
@moment ||= Numo::DFloat.zeros(weight.shape[0])
|
39
|
+
@moment += gradient**2
|
40
|
+
weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
|
41
|
+
end
|
42
|
+
|
43
|
+
# Dump marshal data.
|
44
|
+
# @return [Hash] The marshal data.
|
45
|
+
def marshal_dump
|
46
|
+
{ params: @params,
|
47
|
+
moment: @moment }
|
48
|
+
end
|
49
|
+
|
50
|
+
# Load marshal data.
|
51
|
+
# @return [nil]
|
52
|
+
def marshal_load(obj)
|
53
|
+
@params = obj[:params]
|
54
|
+
@moment = obj[:moment]
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Normalize samples by scaling each feature with its maximum absolute value.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# normalizer = Rumale::Preprocessing::MaxAbsScaler.new
|
12
|
+
# new_training_samples = normalizer.fit_transform(training_samples)
|
13
|
+
# new_testing_samples = normalizer.transform(testing_samples)
|
14
|
+
class MaxAbsScaler
|
15
|
+
include Base::BaseEstimator
|
16
|
+
include Base::Transformer
|
17
|
+
|
18
|
+
# Return the vector consists of the maximum absolute value for each feature.
|
19
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
20
|
+
attr_reader :max_abs_vec
|
21
|
+
|
22
|
+
# Creates a new normalizer for scaling each feature with its maximum absolute value.
|
23
|
+
def initialize
|
24
|
+
@params = {}
|
25
|
+
@max_abs_vec = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# Calculate the minimum and maximum value of each feature for scaling.
|
29
|
+
#
|
30
|
+
# @overload fit(x) -> MaxAbsScaler
|
31
|
+
#
|
32
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
33
|
+
# @return [MaxAbsScaler]
|
34
|
+
def fit(x, _y = nil)
|
35
|
+
check_sample_array(x)
|
36
|
+
@max_abs_vec = x.abs.max(0)
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
# Calculate the maximum absolute value for each feature, and then normalize samples.
|
41
|
+
#
|
42
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
43
|
+
#
|
44
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
45
|
+
# @return [Numo::DFloat] The scaled samples.
|
46
|
+
def fit_transform(x, _y = nil)
|
47
|
+
check_sample_array(x)
|
48
|
+
fit(x).transform(x)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Perform scaling the given samples with maximum absolute value for each feature.
|
52
|
+
#
|
53
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
54
|
+
# @return [Numo::DFloat] The scaled samples.
|
55
|
+
def transform(x)
|
56
|
+
check_sample_array(x)
|
57
|
+
x / @max_abs_vec
|
58
|
+
end
|
59
|
+
|
60
|
+
# Dump marshal data.
|
61
|
+
# @return [Hash] The marshal data about MaxAbsScaler.
|
62
|
+
def marshal_dump
|
63
|
+
{ params: @params,
|
64
|
+
max_abs_vec: @max_abs_vec }
|
65
|
+
end
|
66
|
+
|
67
|
+
# Load marshal data.
|
68
|
+
# @return [nil]
|
69
|
+
def marshal_load(obj)
|
70
|
+
@params = obj[:params]
|
71
|
+
@max_abs_vec = obj[:max_abs_vec]
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- lib/rumale/evaluation_measure/purity.rb
|
141
141
|
- lib/rumale/evaluation_measure/r2_score.rb
|
142
142
|
- lib/rumale/evaluation_measure/recall.rb
|
143
|
+
- lib/rumale/evaluation_measure/roc_auc.rb
|
143
144
|
- lib/rumale/kernel_approximation/rbf.rb
|
144
145
|
- lib/rumale/kernel_machine/kernel_svc.rb
|
145
146
|
- lib/rumale/linear_model/base_linear_model.rb
|
@@ -159,6 +160,7 @@ files:
|
|
159
160
|
- lib/rumale/naive_bayes/naive_bayes.rb
|
160
161
|
- lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
|
161
162
|
- lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
|
163
|
+
- lib/rumale/optimizer/ada_grad.rb
|
162
164
|
- lib/rumale/optimizer/adam.rb
|
163
165
|
- lib/rumale/optimizer/nadam.rb
|
164
166
|
- lib/rumale/optimizer/rmsprop.rb
|
@@ -171,6 +173,7 @@ files:
|
|
171
173
|
- lib/rumale/polynomial_model/factorization_machine_regressor.rb
|
172
174
|
- lib/rumale/preprocessing/l2_normalizer.rb
|
173
175
|
- lib/rumale/preprocessing/label_encoder.rb
|
176
|
+
- lib/rumale/preprocessing/max_abs_scaler.rb
|
174
177
|
- lib/rumale/preprocessing/min_max_scaler.rb
|
175
178
|
- lib/rumale/preprocessing/one_hot_encoder.rb
|
176
179
|
- lib/rumale/preprocessing/standard_scaler.rb
|