rumale 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dba389e77a984b46e5352a2b4aae15f8eec2362d
4
- data.tar.gz: 2eab0f18fc0e4b16af317bfa7b81db8203c62a20
3
+ metadata.gz: 38e459906ed25e84791a9d872e9536d633d9db2b
4
+ data.tar.gz: ea80195eeacbaf3ed7fccaf828e3e809baa862a7
5
5
  SHA512:
6
- metadata.gz: 034b0fc6f79ed66af3a50d025e66f17a3815c0c0e0634bd3eccec19546d585b17f158e376700aafa3ae89d52a895efefd79ff048d61b9f87dabe51f72393b75f
7
- data.tar.gz: 4124f95f72392af658b342d7c21526717417de246ce99f4ee857cc4d26e799dc4a7ea0bbb59aa45c6e8621178ee84a4a4ead426aa79f09e62bf903e273cdf05b
6
+ metadata.gz: 639d266a1045d9ee1fbf37f770bf6171ff8afc5a8183441fbaf0283af7a17dc2e6e05ee44cb601c8b9ea301d5f46d914fc77601fd60345b904aff509f07e5277
7
+ data.tar.gz: 7010cbf3f11f0139a0dda334cfce4f3fd8ffe059eb776f93e7aba32f91c3f517c8fe4723975ef2036e9981131d741345bab4158ce8354431a8a8b57000dacc2f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ # 0.8.3
2
+ - Add optimizer class for AdaGrad.
3
+ - Add evaluator class for ROC AUC.
4
+ - Add class for scaling with maximum absolute value.
5
+
1
6
  # 0.8.2
2
7
  - Add class for Adam optimizer.
3
8
  - Add data splitter classes for random permutation cross validation.
data/lib/rumale.rb CHANGED
@@ -17,6 +17,7 @@ require 'rumale/base/transformer'
17
17
  require 'rumale/base/splitter'
18
18
  require 'rumale/base/evaluator'
19
19
  require 'rumale/optimizer/sgd'
20
+ require 'rumale/optimizer/ada_grad'
20
21
  require 'rumale/optimizer/rmsprop'
21
22
  require 'rumale/optimizer/adam'
22
23
  require 'rumale/optimizer/nadam'
@@ -52,6 +53,7 @@ require 'rumale/decomposition/pca'
52
53
  require 'rumale/decomposition/nmf'
53
54
  require 'rumale/preprocessing/l2_normalizer'
54
55
  require 'rumale/preprocessing/min_max_scaler'
56
+ require 'rumale/preprocessing/max_abs_scaler'
55
57
  require 'rumale/preprocessing/standard_scaler'
56
58
  require 'rumale/preprocessing/label_encoder'
57
59
  require 'rumale/preprocessing/one_hot_encoder'
@@ -65,6 +67,7 @@ require 'rumale/evaluation_measure/accuracy'
65
67
  require 'rumale/evaluation_measure/precision'
66
68
  require 'rumale/evaluation_measure/recall'
67
69
  require 'rumale/evaluation_measure/f_score'
70
+ require 'rumale/evaluation_measure/roc_auc'
68
71
  require 'rumale/evaluation_measure/log_loss'
69
72
  require 'rumale/evaluation_measure/r2_score'
70
73
  require 'rumale/evaluation_measure/explained_variance_score'
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
8
+ #
9
+ # @example
10
+ # # Encode labels to integer array.
11
+ # labels = %w[A B B C A A C C C A]
12
+ # label_encoder = Rumale::Preprocessing::LabelEncoder.new
13
+ # y = label_encoder.fit_transform(labels)
14
+ # # Fit classifier.
15
+ # classifier = Rumale::LinearModel::LogisticRegression.new
16
+ # classifier.fit(x, y)
17
+ # # Predict class probabilities.
18
+ # y_score = classifier.predict_proba(x)
19
+ # # Encode labels to one-hot vectors.
20
+ # one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
21
+ # y_onehot = one_hot_encoder.fit_transform(y)
22
+ # # Calculate ROC AUC.
23
+ # evaluator = Rumale::EvaluationMeasure::ROCAUC.new
24
+ # puts evaluator.score(y_onehot, y_score)
25
+ class ROCAUC
26
+ include Base::Evaluator
27
+
28
+ # Calculate area under the receiver operation characteristic curve (ROC AUC).
29
+ #
30
+ # @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
31
+ # Ground truth binary labels or one-hot encoded multi-labels.
32
+ # @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
33
+ # Predicted class probabilities or confidence scores.
34
+ # @return [Float] (macro-averaged) ROC AUC.
35
+ def score(y_true, y_score)
36
+ check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
37
+ raise ArgumentError, 'Expect to have the same shape for y_true and y_score.' unless y_true.shape == y_score.shape
38
+
39
+ n_classes = y_score.shape[1]
40
+ if n_classes.nil?
41
+ fpr, tpr, = roc_curve(y_true, y_score)
42
+ return auc(fpr, tpr)
43
+ end
44
+
45
+ scores = Array.new(n_classes) do |c|
46
+ fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
47
+ auc(fpr, tpr)
48
+ end
49
+
50
+ scores.reduce(&:+).fdiv(n_classes)
51
+ end
52
+
53
+ # Calculate receiver operation characteristic curve.
54
+ #
55
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
56
+ # @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
57
+ # @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
58
+ # If nil is given, the method considers the maximum value of the label as a positive label.
59
+ # @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
60
+ # thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
61
+ def roc_curve(y_true, y_score, pos_label = nil)
62
+ check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
63
+ raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
64
+ raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?
65
+ labels = y_true.to_a.uniq
66
+ if pos_label.nil?
67
+ raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
68
+ else
69
+ raise ArgumentError, 'y_true must have elements whose values are pos_label.' unless y_true.to_a.uniq.include?(pos_label)
70
+ end
71
+
72
+ false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
73
+
74
+ if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
75
+ true_pos = true_pos.insert(0, 0)
76
+ false_pos = false_pos.insert(0, 0)
77
+ thresholds = thresholds.insert(0, thresholds[0] + 1)
78
+ end
79
+
80
+ tpr = true_pos / true_pos[-1].to_f
81
+ fpr = false_pos / false_pos[-1].to_f
82
+
83
+ [fpr, tpr, thresholds]
84
+ end
85
+
86
+ # Calculate area under the curve using the trapezoidal rule.
87
+ #
88
+ # @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
89
+ # x coordinates. These are expected to monotonously increase or decrease.
90
+ # @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
91
+ # @return [Float] area under the curve.
92
+ def auc(x, y)
93
+ check_params_type(Numo::NArray, x: x, y: y)
94
+ raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
95
+ raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?
96
+ n_samples = [x.shape[0], y.shape[0]].min
97
+ raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
98
+ (0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
99
+ end
100
+
101
+ private
102
+
103
+ def binary_roc_curve(y_true, y_score, pos_label = nil)
104
+ pos_label = y_true.to_a.uniq.max if pos_label.nil?
105
+
106
+ bin_y_true = y_true.eq(pos_label)
107
+ desc_pred_ids = y_score.sort_index.reverse
108
+
109
+ desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
110
+ desc_y_score = y_score[desc_pred_ids]
111
+
112
+ dist_value_ids = desc_y_score.diff.ne(0).where
113
+ threshold_ids = dist_value_ids.append(desc_y_true.size - 1)
114
+
115
+ true_pos = desc_y_true.cumsum[threshold_ids]
116
+ false_pos = 1 + threshold_ids - true_pos
117
+
118
+ [false_pos, true_pos, desc_y_score[threshold_ids]]
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/base_estimator'
5
+
6
+ module Rumale
7
+ module Optimizer
8
+ # AdaGrad is a class that implements AdaGrad optimizer.
9
+ #
10
+ # @example
11
+ # optimizer = Rumale::Optimizer::AdaGrad.new(learning_rate: 0.01, momentum: 0.9)
12
+ # estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
13
+ # estimator.fit(samples, values)
14
+ #
15
+ # *Reference*
16
+ # - J. Duchi, E Hazan, and Y. Singer, "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
17
+ class AdaGrad
18
+ include Base::BaseEstimator
19
+ include Validation
20
+
21
+ # Create a new optimizer with AdaGrad.
22
+ #
23
+ # @param learning_rate [Float] The initial value of learning rate.
24
+ def initialize(learning_rate: 0.01)
25
+ check_params_float(learning_rate: learning_rate)
26
+ check_params_positive(learning_rate: learning_rate)
27
+ @params = {}
28
+ @params[:learning_rate] = learning_rate
29
+ @moment = nil
30
+ end
31
+
32
+ # Calculate the updated weight with AdaGrad adaptive learning rate.
33
+ #
34
+ # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
35
+ # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
36
+ # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
37
+ def call(weight, gradient)
38
+ @moment ||= Numo::DFloat.zeros(weight.shape[0])
39
+ @moment += gradient**2
40
+ weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
41
+ end
42
+
43
+ # Dump marshal data.
44
+ # @return [Hash] The marshal data.
45
+ def marshal_dump
46
+ { params: @params,
47
+ moment: @moment }
48
+ end
49
+
50
+ # Load marshal data.
51
+ # @return [nil]
52
+ def marshal_load(obj)
53
+ @params = obj[:params]
54
+ @moment = obj[:moment]
55
+ nil
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Normalize samples by scaling each feature with its maximum absolute value.
9
+ #
10
+ # @example
11
+ # normalizer = Rumale::Preprocessing::MaxAbsScaler.new
12
+ # new_training_samples = normalizer.fit_transform(training_samples)
13
+ # new_testing_samples = normalizer.transform(testing_samples)
14
+ class MaxAbsScaler
15
+ include Base::BaseEstimator
16
+ include Base::Transformer
17
+
18
+ # Return the vector consists of the maximum absolute value for each feature.
19
+ # @return [Numo::DFloat] (shape: [n_features])
20
+ attr_reader :max_abs_vec
21
+
22
+ # Creates a new normalizer for scaling each feature with its maximum absolute value.
23
+ def initialize
24
+ @params = {}
25
+ @max_abs_vec = nil
26
+ end
27
+
28
+ # Calculate the minimum and maximum value of each feature for scaling.
29
+ #
30
+ # @overload fit(x) -> MaxAbsScaler
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
33
+ # @return [MaxAbsScaler]
34
+ def fit(x, _y = nil)
35
+ check_sample_array(x)
36
+ @max_abs_vec = x.abs.max(0)
37
+ self
38
+ end
39
+
40
+ # Calculate the maximum absolute value for each feature, and then normalize samples.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
45
+ # @return [Numo::DFloat] The scaled samples.
46
+ def fit_transform(x, _y = nil)
47
+ check_sample_array(x)
48
+ fit(x).transform(x)
49
+ end
50
+
51
+ # Perform scaling the given samples with maximum absolute value for each feature.
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
54
+ # @return [Numo::DFloat] The scaled samples.
55
+ def transform(x)
56
+ check_sample_array(x)
57
+ x / @max_abs_vec
58
+ end
59
+
60
+ # Dump marshal data.
61
+ # @return [Hash] The marshal data about MaxAbsScaler.
62
+ def marshal_dump
63
+ { params: @params,
64
+ max_abs_vec: @max_abs_vec }
65
+ end
66
+
67
+ # Load marshal data.
68
+ # @return [nil]
69
+ def marshal_load(obj)
70
+ @params = obj[:params]
71
+ @max_abs_vec = obj[:max_abs_vec]
72
+ nil
73
+ end
74
+ end
75
+ end
76
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.8.2'
6
+ VERSION = '0.8.3'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ version: 0.8.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-19 00:00:00.000000000 Z
11
+ date: 2019-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -140,6 +140,7 @@ files:
140
140
  - lib/rumale/evaluation_measure/purity.rb
141
141
  - lib/rumale/evaluation_measure/r2_score.rb
142
142
  - lib/rumale/evaluation_measure/recall.rb
143
+ - lib/rumale/evaluation_measure/roc_auc.rb
143
144
  - lib/rumale/kernel_approximation/rbf.rb
144
145
  - lib/rumale/kernel_machine/kernel_svc.rb
145
146
  - lib/rumale/linear_model/base_linear_model.rb
@@ -159,6 +160,7 @@ files:
159
160
  - lib/rumale/naive_bayes/naive_bayes.rb
160
161
  - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
161
162
  - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
163
+ - lib/rumale/optimizer/ada_grad.rb
162
164
  - lib/rumale/optimizer/adam.rb
163
165
  - lib/rumale/optimizer/nadam.rb
164
166
  - lib/rumale/optimizer/rmsprop.rb
@@ -171,6 +173,7 @@ files:
171
173
  - lib/rumale/polynomial_model/factorization_machine_regressor.rb
172
174
  - lib/rumale/preprocessing/l2_normalizer.rb
173
175
  - lib/rumale/preprocessing/label_encoder.rb
176
+ - lib/rumale/preprocessing/max_abs_scaler.rb
174
177
  - lib/rumale/preprocessing/min_max_scaler.rb
175
178
  - lib/rumale/preprocessing/one_hot_encoder.rb
176
179
  - lib/rumale/preprocessing/standard_scaler.rb