rumale 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/rumale.rb +3 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +122 -0
- data/lib/rumale/optimizer/ada_grad.rb +59 -0
- data/lib/rumale/preprocessing/max_abs_scaler.rb +76 -0
- data/lib/rumale/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38e459906ed25e84791a9d872e9536d633d9db2b
|
4
|
+
data.tar.gz: ea80195eeacbaf3ed7fccaf828e3e809baa862a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 639d266a1045d9ee1fbf37f770bf6171ff8afc5a8183441fbaf0283af7a17dc2e6e05ee44cb601c8b9ea301d5f46d914fc77601fd60345b904aff509f07e5277
|
7
|
+
data.tar.gz: 7010cbf3f11f0139a0dda334cfce4f3fd8ffe059eb776f93e7aba32f91c3f517c8fe4723975ef2036e9981131d741345bab4158ce8354431a8a8b57000dacc2f
|
data/CHANGELOG.md
CHANGED
data/lib/rumale.rb
CHANGED
@@ -17,6 +17,7 @@ require 'rumale/base/transformer'
|
|
17
17
|
require 'rumale/base/splitter'
|
18
18
|
require 'rumale/base/evaluator'
|
19
19
|
require 'rumale/optimizer/sgd'
|
20
|
+
require 'rumale/optimizer/ada_grad'
|
20
21
|
require 'rumale/optimizer/rmsprop'
|
21
22
|
require 'rumale/optimizer/adam'
|
22
23
|
require 'rumale/optimizer/nadam'
|
@@ -52,6 +53,7 @@ require 'rumale/decomposition/pca'
|
|
52
53
|
require 'rumale/decomposition/nmf'
|
53
54
|
require 'rumale/preprocessing/l2_normalizer'
|
54
55
|
require 'rumale/preprocessing/min_max_scaler'
|
56
|
+
require 'rumale/preprocessing/max_abs_scaler'
|
55
57
|
require 'rumale/preprocessing/standard_scaler'
|
56
58
|
require 'rumale/preprocessing/label_encoder'
|
57
59
|
require 'rumale/preprocessing/one_hot_encoder'
|
@@ -65,6 +67,7 @@ require 'rumale/evaluation_measure/accuracy'
|
|
65
67
|
require 'rumale/evaluation_measure/precision'
|
66
68
|
require 'rumale/evaluation_measure/recall'
|
67
69
|
require 'rumale/evaluation_measure/f_score'
|
70
|
+
require 'rumale/evaluation_measure/roc_auc'
|
68
71
|
require 'rumale/evaluation_measure/log_loss'
|
69
72
|
require 'rumale/evaluation_measure/r2_score'
|
70
73
|
require 'rumale/evaluation_measure/explained_variance_score'
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/evaluator'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module EvaluationMeasure
|
7
|
+
# ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# # Encode labels to integer array.
|
11
|
+
# labels = %w[A B B C A A C C C A]
|
12
|
+
# label_encoder = Rumale::Preprocessing::LabelEncoder.new
|
13
|
+
# y = label_encoder.fit_transform(labels)
|
14
|
+
# # Fit classifier.
|
15
|
+
# classifier = Rumale::LinearModel::LogisticRegression.new
|
16
|
+
# classifier.fit(x, y)
|
17
|
+
# # Predict class probabilities.
|
18
|
+
# y_score = classifier.predict_proba(x)
|
19
|
+
# # Encode labels to one-hot vectors.
|
20
|
+
# one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
|
21
|
+
# y_onehot = one_hot_encoder.fit_transform(y)
|
22
|
+
# # Calculate ROC AUC.
|
23
|
+
# evaluator = Rumale::EvaluationMeasure::ROCAUC.new
|
24
|
+
# puts evaluator.score(y_onehot, y_score)
|
25
|
+
class ROCAUC
|
26
|
+
include Base::Evaluator
|
27
|
+
|
28
|
+
# Calculate area under the receiver operation characteristic curve (ROC AUC).
|
29
|
+
#
|
30
|
+
# @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
|
31
|
+
# Ground truth binary labels or one-hot encoded multi-labels.
|
32
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
|
33
|
+
# Predicted class probabilities or confidence scores.
|
34
|
+
# @return [Float] (macro-averaged) ROC AUC.
|
35
|
+
def score(y_true, y_score)
|
36
|
+
check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
|
37
|
+
raise ArgumentError, 'Expect to have the same shape for y_true and y_score.' unless y_true.shape == y_score.shape
|
38
|
+
|
39
|
+
n_classes = y_score.shape[1]
|
40
|
+
if n_classes.nil?
|
41
|
+
fpr, tpr, = roc_curve(y_true, y_score)
|
42
|
+
return auc(fpr, tpr)
|
43
|
+
end
|
44
|
+
|
45
|
+
scores = Array.new(n_classes) do |c|
|
46
|
+
fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
|
47
|
+
auc(fpr, tpr)
|
48
|
+
end
|
49
|
+
|
50
|
+
scores.reduce(&:+).fdiv(n_classes)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Calculate receiver operation characteristic curve.
|
54
|
+
#
|
55
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
|
56
|
+
# @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
|
57
|
+
# @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
|
58
|
+
# If nil is given, the method considers the maximum value of the label as a positive label.
|
59
|
+
# @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
|
60
|
+
# thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
|
61
|
+
def roc_curve(y_true, y_score, pos_label = nil)
|
62
|
+
check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
|
63
|
+
raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
|
64
|
+
raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?
|
65
|
+
labels = y_true.to_a.uniq
|
66
|
+
if pos_label.nil?
|
67
|
+
raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
|
68
|
+
else
|
69
|
+
raise ArgumentError, 'y_true must have elements whose values are pos_label.' unless y_true.to_a.uniq.include?(pos_label)
|
70
|
+
end
|
71
|
+
|
72
|
+
false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
|
73
|
+
|
74
|
+
if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
|
75
|
+
true_pos = true_pos.insert(0, 0)
|
76
|
+
false_pos = false_pos.insert(0, 0)
|
77
|
+
thresholds = thresholds.insert(0, thresholds[0] + 1)
|
78
|
+
end
|
79
|
+
|
80
|
+
tpr = true_pos / true_pos[-1].to_f
|
81
|
+
fpr = false_pos / false_pos[-1].to_f
|
82
|
+
|
83
|
+
[fpr, tpr, thresholds]
|
84
|
+
end
|
85
|
+
|
86
|
+
# Calculate area under the curve using the trapezoidal rule.
|
87
|
+
#
|
88
|
+
# @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
|
89
|
+
# x coordinates. These are expected to monotonously increase or decrease.
|
90
|
+
# @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
|
91
|
+
# @return [Float] area under the curve.
|
92
|
+
def auc(x, y)
|
93
|
+
check_params_type(Numo::NArray, x: x, y: y)
|
94
|
+
raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
|
95
|
+
raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?
|
96
|
+
n_samples = [x.shape[0], y.shape[0]].min
|
97
|
+
raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
|
98
|
+
(0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
def binary_roc_curve(y_true, y_score, pos_label = nil)
|
104
|
+
pos_label = y_true.to_a.uniq.max if pos_label.nil?
|
105
|
+
|
106
|
+
bin_y_true = y_true.eq(pos_label)
|
107
|
+
desc_pred_ids = y_score.sort_index.reverse
|
108
|
+
|
109
|
+
desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
|
110
|
+
desc_y_score = y_score[desc_pred_ids]
|
111
|
+
|
112
|
+
dist_value_ids = desc_y_score.diff.ne(0).where
|
113
|
+
threshold_ids = dist_value_ids.append(desc_y_true.size - 1)
|
114
|
+
|
115
|
+
true_pos = desc_y_true.cumsum[threshold_ids]
|
116
|
+
false_pos = 1 + threshold_ids - true_pos
|
117
|
+
|
118
|
+
[false_pos, true_pos, desc_y_score[threshold_ids]]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Optimizer
|
8
|
+
# AdaGrad is a class that implements AdaGrad optimizer.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# optimizer = Rumale::Optimizer::AdaGrad.new(learning_rate: 0.01, momentum: 0.9)
|
12
|
+
# estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
|
13
|
+
# estimator.fit(samples, values)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - J. Duchi, E Hazan, and Y. Singer, "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
|
17
|
+
class AdaGrad
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Validation
|
20
|
+
|
21
|
+
# Create a new optimizer with AdaGrad.
|
22
|
+
#
|
23
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
24
|
+
def initialize(learning_rate: 0.01)
|
25
|
+
check_params_float(learning_rate: learning_rate)
|
26
|
+
check_params_positive(learning_rate: learning_rate)
|
27
|
+
@params = {}
|
28
|
+
@params[:learning_rate] = learning_rate
|
29
|
+
@moment = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
# Calculate the updated weight with AdaGrad adaptive learning rate.
|
33
|
+
#
|
34
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
35
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
36
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
37
|
+
def call(weight, gradient)
|
38
|
+
@moment ||= Numo::DFloat.zeros(weight.shape[0])
|
39
|
+
@moment += gradient**2
|
40
|
+
weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
|
41
|
+
end
|
42
|
+
|
43
|
+
# Dump marshal data.
|
44
|
+
# @return [Hash] The marshal data.
|
45
|
+
def marshal_dump
|
46
|
+
{ params: @params,
|
47
|
+
moment: @moment }
|
48
|
+
end
|
49
|
+
|
50
|
+
# Load marshal data.
|
51
|
+
# @return [nil]
|
52
|
+
def marshal_load(obj)
|
53
|
+
@params = obj[:params]
|
54
|
+
@moment = obj[:moment]
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Normalize samples by scaling each feature with its maximum absolute value.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# normalizer = Rumale::Preprocessing::MaxAbsScaler.new
|
12
|
+
# new_training_samples = normalizer.fit_transform(training_samples)
|
13
|
+
# new_testing_samples = normalizer.transform(testing_samples)
|
14
|
+
class MaxAbsScaler
|
15
|
+
include Base::BaseEstimator
|
16
|
+
include Base::Transformer
|
17
|
+
|
18
|
+
# Return the vector consists of the maximum absolute value for each feature.
|
19
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
20
|
+
attr_reader :max_abs_vec
|
21
|
+
|
22
|
+
# Creates a new normalizer for scaling each feature with its maximum absolute value.
|
23
|
+
def initialize
|
24
|
+
@params = {}
|
25
|
+
@max_abs_vec = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# Calculate the minimum and maximum value of each feature for scaling.
|
29
|
+
#
|
30
|
+
# @overload fit(x) -> MaxAbsScaler
|
31
|
+
#
|
32
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
33
|
+
# @return [MaxAbsScaler]
|
34
|
+
def fit(x, _y = nil)
|
35
|
+
check_sample_array(x)
|
36
|
+
@max_abs_vec = x.abs.max(0)
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
# Calculate the maximum absolute value for each feature, and then normalize samples.
|
41
|
+
#
|
42
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
43
|
+
#
|
44
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
|
45
|
+
# @return [Numo::DFloat] The scaled samples.
|
46
|
+
def fit_transform(x, _y = nil)
|
47
|
+
check_sample_array(x)
|
48
|
+
fit(x).transform(x)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Perform scaling the given samples with maximum absolute value for each feature.
|
52
|
+
#
|
53
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
54
|
+
# @return [Numo::DFloat] The scaled samples.
|
55
|
+
def transform(x)
|
56
|
+
check_sample_array(x)
|
57
|
+
x / @max_abs_vec
|
58
|
+
end
|
59
|
+
|
60
|
+
# Dump marshal data.
|
61
|
+
# @return [Hash] The marshal data about MaxAbsScaler.
|
62
|
+
def marshal_dump
|
63
|
+
{ params: @params,
|
64
|
+
max_abs_vec: @max_abs_vec }
|
65
|
+
end
|
66
|
+
|
67
|
+
# Load marshal data.
|
68
|
+
# @return [nil]
|
69
|
+
def marshal_load(obj)
|
70
|
+
@params = obj[:params]
|
71
|
+
@max_abs_vec = obj[:max_abs_vec]
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- lib/rumale/evaluation_measure/purity.rb
|
141
141
|
- lib/rumale/evaluation_measure/r2_score.rb
|
142
142
|
- lib/rumale/evaluation_measure/recall.rb
|
143
|
+
- lib/rumale/evaluation_measure/roc_auc.rb
|
143
144
|
- lib/rumale/kernel_approximation/rbf.rb
|
144
145
|
- lib/rumale/kernel_machine/kernel_svc.rb
|
145
146
|
- lib/rumale/linear_model/base_linear_model.rb
|
@@ -159,6 +160,7 @@ files:
|
|
159
160
|
- lib/rumale/naive_bayes/naive_bayes.rb
|
160
161
|
- lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
|
161
162
|
- lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
|
163
|
+
- lib/rumale/optimizer/ada_grad.rb
|
162
164
|
- lib/rumale/optimizer/adam.rb
|
163
165
|
- lib/rumale/optimizer/nadam.rb
|
164
166
|
- lib/rumale/optimizer/rmsprop.rb
|
@@ -171,6 +173,7 @@ files:
|
|
171
173
|
- lib/rumale/polynomial_model/factorization_machine_regressor.rb
|
172
174
|
- lib/rumale/preprocessing/l2_normalizer.rb
|
173
175
|
- lib/rumale/preprocessing/label_encoder.rb
|
176
|
+
- lib/rumale/preprocessing/max_abs_scaler.rb
|
174
177
|
- lib/rumale/preprocessing/min_max_scaler.rb
|
175
178
|
- lib/rumale/preprocessing/one_hot_encoder.rb
|
176
179
|
- lib/rumale/preprocessing/standard_scaler.rb
|