rumale 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dba389e77a984b46e5352a2b4aae15f8eec2362d
4
- data.tar.gz: 2eab0f18fc0e4b16af317bfa7b81db8203c62a20
3
+ metadata.gz: 38e459906ed25e84791a9d872e9536d633d9db2b
4
+ data.tar.gz: ea80195eeacbaf3ed7fccaf828e3e809baa862a7
5
5
  SHA512:
6
- metadata.gz: 034b0fc6f79ed66af3a50d025e66f17a3815c0c0e0634bd3eccec19546d585b17f158e376700aafa3ae89d52a895efefd79ff048d61b9f87dabe51f72393b75f
7
- data.tar.gz: 4124f95f72392af658b342d7c21526717417de246ce99f4ee857cc4d26e799dc4a7ea0bbb59aa45c6e8621178ee84a4a4ead426aa79f09e62bf903e273cdf05b
6
+ metadata.gz: 639d266a1045d9ee1fbf37f770bf6171ff8afc5a8183441fbaf0283af7a17dc2e6e05ee44cb601c8b9ea301d5f46d914fc77601fd60345b904aff509f07e5277
7
+ data.tar.gz: 7010cbf3f11f0139a0dda334cfce4f3fd8ffe059eb776f93e7aba32f91c3f517c8fe4723975ef2036e9981131d741345bab4158ce8354431a8a8b57000dacc2f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ # 0.8.3
2
+ - Add optimizer class for AdaGrad.
3
+ - Add evaluator class for ROC AUC.
4
+ - Add class for scaling with maximum absolute value.
5
+
1
6
  # 0.8.2
2
7
  - Add class for Adam optimizer.
3
8
  - Add data splitter classes for random permutation cross validation.
data/lib/rumale.rb CHANGED
@@ -17,6 +17,7 @@ require 'rumale/base/transformer'
17
17
  require 'rumale/base/splitter'
18
18
  require 'rumale/base/evaluator'
19
19
  require 'rumale/optimizer/sgd'
20
+ require 'rumale/optimizer/ada_grad'
20
21
  require 'rumale/optimizer/rmsprop'
21
22
  require 'rumale/optimizer/adam'
22
23
  require 'rumale/optimizer/nadam'
@@ -52,6 +53,7 @@ require 'rumale/decomposition/pca'
52
53
  require 'rumale/decomposition/nmf'
53
54
  require 'rumale/preprocessing/l2_normalizer'
54
55
  require 'rumale/preprocessing/min_max_scaler'
56
+ require 'rumale/preprocessing/max_abs_scaler'
55
57
  require 'rumale/preprocessing/standard_scaler'
56
58
  require 'rumale/preprocessing/label_encoder'
57
59
  require 'rumale/preprocessing/one_hot_encoder'
@@ -65,6 +67,7 @@ require 'rumale/evaluation_measure/accuracy'
65
67
  require 'rumale/evaluation_measure/precision'
66
68
  require 'rumale/evaluation_measure/recall'
67
69
  require 'rumale/evaluation_measure/f_score'
70
+ require 'rumale/evaluation_measure/roc_auc'
68
71
  require 'rumale/evaluation_measure/log_loss'
69
72
  require 'rumale/evaluation_measure/r2_score'
70
73
  require 'rumale/evaluation_measure/explained_variance_score'
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/evaluator'
4
+
5
+ module Rumale
6
+ module EvaluationMeasure
7
+ # ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
8
+ #
9
+ # @example
10
+ # # Encode labels to integer array.
11
+ # labels = %w[A B B C A A C C C A]
12
+ # label_encoder = Rumale::Preprocessing::LabelEncoder.new
13
+ # y = label_encoder.fit_transform(labels)
14
+ # # Fit classifier.
15
+ # classifier = Rumale::LinearModel::LogisticRegression.new
16
+ # classifier.fit(x, y)
17
+ # # Predict class probabilities.
18
+ # y_score = classifier.predict_proba(x)
19
+ # # Encode labels to one-hot vectors.
20
+ # one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
21
+ # y_onehot = one_hot_encoder.fit_transform(y)
22
+ # # Calculate ROC AUC.
23
+ # evaluator = Rumale::EvaluationMeasure::ROCAUC.new
24
+ # puts evaluator.score(y_onehot, y_score)
25
+ class ROCAUC
26
+ include Base::Evaluator
27
+
28
+ # Calculate area under the receiver operation characteristic curve (ROC AUC).
29
+ #
30
+ # @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
31
+ # Ground truth binary labels or one-hot encoded multi-labels.
32
+ # @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
33
+ # Predicted class probabilities or confidence scores.
34
+ # @return [Float] (macro-averaged) ROC AUC.
35
+ def score(y_true, y_score)
36
+ check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
37
+ raise ArgumentError, 'Expect to have the same shape for y_true and y_score.' unless y_true.shape == y_score.shape
38
+
39
+ n_classes = y_score.shape[1]
40
+ if n_classes.nil?
41
+ fpr, tpr, = roc_curve(y_true, y_score)
42
+ return auc(fpr, tpr)
43
+ end
44
+
45
+ scores = Array.new(n_classes) do |c|
46
+ fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
47
+ auc(fpr, tpr)
48
+ end
49
+
50
+ scores.reduce(&:+).fdiv(n_classes)
51
+ end
52
+
53
+ # Calculate receiver operation characteristic curve.
54
+ #
55
+ # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
56
+ # @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
57
+ # @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
58
+ # If nil is given, the method considers the maximum value of the label as a positive label.
59
+ # @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
60
+ # thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
61
+ def roc_curve(y_true, y_score, pos_label = nil)
62
+ check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
63
+ raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
64
+ raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?
65
+ labels = y_true.to_a.uniq
66
+ if pos_label.nil?
67
+ raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
68
+ else
69
+ raise ArgumentError, 'y_true must have elements whose values are pos_label.' unless y_true.to_a.uniq.include?(pos_label)
70
+ end
71
+
72
+ false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
73
+
74
+ if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
75
+ true_pos = true_pos.insert(0, 0)
76
+ false_pos = false_pos.insert(0, 0)
77
+ thresholds = thresholds.insert(0, thresholds[0] + 1)
78
+ end
79
+
80
+ tpr = true_pos / true_pos[-1].to_f
81
+ fpr = false_pos / false_pos[-1].to_f
82
+
83
+ [fpr, tpr, thresholds]
84
+ end
85
+
86
+ # Calculate area under the curve using the trapezoidal rule.
87
+ #
88
+ # @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
89
+ # x coordinates. These are expected to monotonously increase or decrease.
90
+ # @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
91
+ # @return [Float] area under the curve.
92
+ def auc(x, y)
93
+ check_params_type(Numo::NArray, x: x, y: y)
94
+ raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
95
+ raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?
96
+ n_samples = [x.shape[0], y.shape[0]].min
97
+ raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
98
+ (0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
99
+ end
100
+
101
+ private
102
+
103
+ def binary_roc_curve(y_true, y_score, pos_label = nil)
104
+ pos_label = y_true.to_a.uniq.max if pos_label.nil?
105
+
106
+ bin_y_true = y_true.eq(pos_label)
107
+ desc_pred_ids = y_score.sort_index.reverse
108
+
109
+ desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
110
+ desc_y_score = y_score[desc_pred_ids]
111
+
112
+ dist_value_ids = desc_y_score.diff.ne(0).where
113
+ threshold_ids = dist_value_ids.append(desc_y_true.size - 1)
114
+
115
+ true_pos = desc_y_true.cumsum[threshold_ids]
116
+ false_pos = 1 + threshold_ids - true_pos
117
+
118
+ [false_pos, true_pos, desc_y_score[threshold_ids]]
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/base_estimator'
5
+
6
+ module Rumale
7
+ module Optimizer
8
+ # AdaGrad is a class that implements AdaGrad optimizer.
9
+ #
10
+ # @example
11
+ # optimizer = Rumale::Optimizer::AdaGrad.new(learning_rate: 0.01, momentum: 0.9)
12
+ # estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
13
+ # estimator.fit(samples, values)
14
+ #
15
+ # *Reference*
16
+ # - J. Duchi, E Hazan, and Y. Singer, "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
17
+ class AdaGrad
18
+ include Base::BaseEstimator
19
+ include Validation
20
+
21
+ # Create a new optimizer with AdaGrad.
22
+ #
23
+ # @param learning_rate [Float] The initial value of learning rate.
24
+ def initialize(learning_rate: 0.01)
25
+ check_params_float(learning_rate: learning_rate)
26
+ check_params_positive(learning_rate: learning_rate)
27
+ @params = {}
28
+ @params[:learning_rate] = learning_rate
29
+ @moment = nil
30
+ end
31
+
32
+ # Calculate the updated weight with AdaGrad adaptive learning rate.
33
+ #
34
+ # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
35
+ # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
36
+ # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
37
+ def call(weight, gradient)
38
+ @moment ||= Numo::DFloat.zeros(weight.shape[0])
39
+ @moment += gradient**2
40
+ weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
41
+ end
42
+
43
+ # Dump marshal data.
44
+ # @return [Hash] The marshal data.
45
+ def marshal_dump
46
+ { params: @params,
47
+ moment: @moment }
48
+ end
49
+
50
+ # Load marshal data.
51
+ # @return [nil]
52
+ def marshal_load(obj)
53
+ @params = obj[:params]
54
+ @moment = obj[:moment]
55
+ nil
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Normalize samples by scaling each feature with its maximum absolute value.
9
+ #
10
+ # @example
11
+ # normalizer = Rumale::Preprocessing::MaxAbsScaler.new
12
+ # new_training_samples = normalizer.fit_transform(training_samples)
13
+ # new_testing_samples = normalizer.transform(testing_samples)
14
+ class MaxAbsScaler
15
+ include Base::BaseEstimator
16
+ include Base::Transformer
17
+
18
+ # Return the vector consists of the maximum absolute value for each feature.
19
+ # @return [Numo::DFloat] (shape: [n_features])
20
+ attr_reader :max_abs_vec
21
+
22
+ # Creates a new normalizer for scaling each feature with its maximum absolute value.
23
+ def initialize
24
+ @params = {}
25
+ @max_abs_vec = nil
26
+ end
27
+
28
+ # Calculate the minimum and maximum value of each feature for scaling.
29
+ #
30
+ # @overload fit(x) -> MaxAbsScaler
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
33
+ # @return [MaxAbsScaler]
34
+ def fit(x, _y = nil)
35
+ check_sample_array(x)
36
+ @max_abs_vec = x.abs.max(0)
37
+ self
38
+ end
39
+
40
+ # Calculate the maximum absolute value for each feature, and then normalize samples.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
45
+ # @return [Numo::DFloat] The scaled samples.
46
+ def fit_transform(x, _y = nil)
47
+ check_sample_array(x)
48
+ fit(x).transform(x)
49
+ end
50
+
51
+ # Perform scaling the given samples with maximum absolute value for each feature.
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
54
+ # @return [Numo::DFloat] The scaled samples.
55
+ def transform(x)
56
+ check_sample_array(x)
57
+ x / @max_abs_vec
58
+ end
59
+
60
+ # Dump marshal data.
61
+ # @return [Hash] The marshal data about MaxAbsScaler.
62
+ def marshal_dump
63
+ { params: @params,
64
+ max_abs_vec: @max_abs_vec }
65
+ end
66
+
67
+ # Load marshal data.
68
+ # @return [nil]
69
+ def marshal_load(obj)
70
+ @params = obj[:params]
71
+ @max_abs_vec = obj[:max_abs_vec]
72
+ nil
73
+ end
74
+ end
75
+ end
76
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.8.2'
6
+ VERSION = '0.8.3'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ version: 0.8.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-19 00:00:00.000000000 Z
11
+ date: 2019-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -140,6 +140,7 @@ files:
140
140
  - lib/rumale/evaluation_measure/purity.rb
141
141
  - lib/rumale/evaluation_measure/r2_score.rb
142
142
  - lib/rumale/evaluation_measure/recall.rb
143
+ - lib/rumale/evaluation_measure/roc_auc.rb
143
144
  - lib/rumale/kernel_approximation/rbf.rb
144
145
  - lib/rumale/kernel_machine/kernel_svc.rb
145
146
  - lib/rumale/linear_model/base_linear_model.rb
@@ -159,6 +160,7 @@ files:
159
160
  - lib/rumale/naive_bayes/naive_bayes.rb
160
161
  - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
161
162
  - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
163
+ - lib/rumale/optimizer/ada_grad.rb
162
164
  - lib/rumale/optimizer/adam.rb
163
165
  - lib/rumale/optimizer/nadam.rb
164
166
  - lib/rumale/optimizer/rmsprop.rb
@@ -171,6 +173,7 @@ files:
171
173
  - lib/rumale/polynomial_model/factorization_machine_regressor.rb
172
174
  - lib/rumale/preprocessing/l2_normalizer.rb
173
175
  - lib/rumale/preprocessing/label_encoder.rb
176
+ - lib/rumale/preprocessing/max_abs_scaler.rb
174
177
  - lib/rumale/preprocessing/min_max_scaler.rb
175
178
  - lib/rumale/preprocessing/one_hot_encoder.rb
176
179
  - lib/rumale/preprocessing/standard_scaler.rb