RubyGems - rumale - Versions diffs - 0.8.2 → 0.8.3 - Mend

rumale 0.8.2 → 0.8.3

Files changed (8) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/lib/rumale.rb +3 -0
data/lib/rumale/evaluation_measure/roc_auc.rb +122 -0
data/lib/rumale/optimizer/ada_grad.rb +59 -0
data/lib/rumale/preprocessing/max_abs_scaler.rb +76 -0
data/lib/rumale/version.rb +1 -1
metadata +5 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: dba389e77a984b46e5352a2b4aae15f8eec2362d
-  data.tar.gz: 2eab0f18fc0e4b16af317bfa7b81db8203c62a20
+  metadata.gz: 38e459906ed25e84791a9d872e9536d633d9db2b
+  data.tar.gz: ea80195eeacbaf3ed7fccaf828e3e809baa862a7
 SHA512:
-  metadata.gz: 034b0fc6f79ed66af3a50d025e66f17a3815c0c0e0634bd3eccec19546d585b17f158e376700aafa3ae89d52a895efefd79ff048d61b9f87dabe51f72393b75f
-  data.tar.gz: 4124f95f72392af658b342d7c21526717417de246ce99f4ee857cc4d26e799dc4a7ea0bbb59aa45c6e8621178ee84a4a4ead426aa79f09e62bf903e273cdf05b
+  metadata.gz: 639d266a1045d9ee1fbf37f770bf6171ff8afc5a8183441fbaf0283af7a17dc2e6e05ee44cb601c8b9ea301d5f46d914fc77601fd60345b904aff509f07e5277
+  data.tar.gz: 7010cbf3f11f0139a0dda334cfce4f3fd8ffe059eb776f93e7aba32f91c3f517c8fe4723975ef2036e9981131d741345bab4158ce8354431a8a8b57000dacc2f

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,8 @@
+# 0.8.3
+- Add optimizer class for AdaGrad.
+- Add evaluator class for ROC AUC.
+- Add class for scaling with maximum absolute value.
 # 0.8.2
 - Add class for Adam optimizer.
 - Add data splitter classes for random permutation cross validation.

data/lib/rumale.rb CHANGED Viewed

@@ -17,6 +17,7 @@ require 'rumale/base/transformer'
 require 'rumale/base/splitter'
 require 'rumale/base/evaluator'
 require 'rumale/optimizer/sgd'
+require 'rumale/optimizer/ada_grad'
 require 'rumale/optimizer/rmsprop'
 require 'rumale/optimizer/adam'
 require 'rumale/optimizer/nadam'
@@ -52,6 +53,7 @@ require 'rumale/decomposition/pca'
 require 'rumale/decomposition/nmf'
 require 'rumale/preprocessing/l2_normalizer'
 require 'rumale/preprocessing/min_max_scaler'
+require 'rumale/preprocessing/max_abs_scaler'
 require 'rumale/preprocessing/standard_scaler'
 require 'rumale/preprocessing/label_encoder'
 require 'rumale/preprocessing/one_hot_encoder'
@@ -65,6 +67,7 @@ require 'rumale/evaluation_measure/accuracy'
 require 'rumale/evaluation_measure/precision'
 require 'rumale/evaluation_measure/recall'
 require 'rumale/evaluation_measure/f_score'
+require 'rumale/evaluation_measure/roc_auc'
 require 'rumale/evaluation_measure/log_loss'
 require 'rumale/evaluation_measure/r2_score'
 require 'rumale/evaluation_measure/explained_variance_score'

data/lib/rumale/evaluation_measure/roc_auc.rb ADDED Viewed

@@ -0,0 +1,122 @@
+# frozen_string_literal: true
+require 'rumale/base/evaluator'
+module Rumale
+  module EvaluationMeasure
+    # ROCAUC is a class that calculate area under the receiver operation characteristic curve from predicted scores.
+    #
+    # @example
+    #   # Encode labels to integer array.
+    #   labels = %w[A B B C A A C C C A]
+    #   label_encoder = Rumale::Preprocessing::LabelEncoder.new
+    #   y = label_encoder.fit_transform(labels)
+    #   # Fit classifier.
+    #   classifier = Rumale::LinearModel::LogisticRegression.new
+    #   classifier.fit(x, y)
+    #   # Predict class probabilities.
+    #   y_score = classifier.predict_proba(x)
+    #   # Encode labels to one-hot vectors.
+    #   one_hot_encoder = Rumale::Preprocessing::OneHotEncoder.new
+    #   y_onehot = one_hot_encoder.fit_transform(y)
+    #   # Calculate ROC AUC.
+    #   evaluator = Rumale::EvaluationMeasure::ROCAUC.new
+    #   puts evaluator.score(y_onehot, y_score)
+    class ROCAUC
+      include Base::Evaluator
+      # Calculate area under the receiver operation characteristic curve (ROC AUC).
+      #
+      # @param y_true [Numo::Int32] (shape: [n_samples] or [n_samples, n_classes])
+      #   Ground truth binary labels or one-hot encoded multi-labels.
+      # @param y_score [Numo::DFloat] (shape: [n_samples] or [n_samples, n_classes])
+      #   Predicted class probabilities or confidence scores.
+      # @return [Float] (macro-averaged) ROC AUC.
+      def score(y_true, y_score)
+        check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
+        raise ArgumentError, 'Expect to have the same shape for y_true and y_score.' unless y_true.shape == y_score.shape
+        n_classes = y_score.shape[1]
+        if n_classes.nil?
+          fpr, tpr, = roc_curve(y_true, y_score)
+          return auc(fpr, tpr)
+        end
+        scores = Array.new(n_classes) do |c|
+          fpr, tpr, = roc_curve(y_true[true, c], y_score[true, c])
+          auc(fpr, tpr)
+        end
+        scores.reduce(&:+).fdiv(n_classes)
+      end
+      # Calculate receiver operation characteristic curve.
+      #
+      # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth binary labels.
+      # @param y_score [Numo::DFloat] (shape: [n_samples]) Predicted class probabilities or confidence scores.
+      # @param pos_label [Integer] Label to be a positive label when binarizing the given labels.
+      #   If nil is given, the method considers the maximum value of the label as a positive label.
+      # @return [Array] fpr (Numo::DFloat): false positive rates. tpr (Numo::DFloat): true positive rates.
+      #   thresholds (Numo::DFloat): thresholds on the decision function used to calculate fpr and tpr.
+      def roc_curve(y_true, y_score, pos_label = nil)
+        check_params_type(Numo::NArray, y_true: y_true, y_score: y_score)
+        raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
+        raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?
+        labels = y_true.to_a.uniq
+        if pos_label.nil?
+          raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
+        else
+          raise ArgumentError, 'y_true must have elements whose values are pos_label.' unless y_true.to_a.uniq.include?(pos_label)
+        end
+        false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
+        if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
+          true_pos = true_pos.insert(0, 0)
+          false_pos = false_pos.insert(0, 0)
+          thresholds = thresholds.insert(0, thresholds[0] + 1)
+        end
+        tpr = true_pos / true_pos[-1].to_f
+        fpr = false_pos / false_pos[-1].to_f
+        [fpr, tpr, thresholds]
+      end
+      # Calculate area under the curve using the trapezoidal rule.
+      #
+      # @param x [Numo::Int32/Numo::DFloat] (shape: [n_elements])
+      #   x coordinates. These are expected to monotonously increase or decrease.
+      # @param y [Numo::Int32/Numo::DFloat] (shape: [n_elements]) y coordinates.
+      # @return [Float] area under the curve.
+      def auc(x, y)
+        check_params_type(Numo::NArray, x: x, y: y)
+        raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
+        raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?
+        n_samples = [x.shape[0], y.shape[0]].min
+        raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
+        (0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
+      end
+      private
+      def binary_roc_curve(y_true, y_score, pos_label = nil)
+        pos_label = y_true.to_a.uniq.max if pos_label.nil?
+        bin_y_true = y_true.eq(pos_label)
+        desc_pred_ids = y_score.sort_index.reverse
+        desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
+        desc_y_score = y_score[desc_pred_ids]
+        dist_value_ids = desc_y_score.diff.ne(0).where
+        threshold_ids = dist_value_ids.append(desc_y_true.size - 1)
+        true_pos = desc_y_true.cumsum[threshold_ids]
+        false_pos = 1 + threshold_ids - true_pos
+        [false_pos, true_pos, desc_y_score[threshold_ids]]
+      end
+    end
+  end
+end

data/lib/rumale/optimizer/ada_grad.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+require 'rumale/validation'
+require 'rumale/base/base_estimator'
+module Rumale
+  module Optimizer
+    # AdaGrad is a class that implements AdaGrad optimizer.
+    #
+    # @example
+    #   optimizer = Rumale::Optimizer::AdaGrad.new(learning_rate: 0.01, momentum: 0.9)
+    #   estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
+    #   estimator.fit(samples, values)
+    #
+    # *Reference*
+    # - J. Duchi, E Hazan, and Y. Singer, "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
+    class AdaGrad
+      include Base::BaseEstimator
+      include Validation
+      # Create a new optimizer with AdaGrad.
+      #
+      # @param learning_rate [Float] The initial value of learning rate.
+      def initialize(learning_rate: 0.01)
+        check_params_float(learning_rate: learning_rate)
+        check_params_positive(learning_rate: learning_rate)
+        @params = {}
+        @params[:learning_rate] = learning_rate
+        @moment = nil
+      end
+      # Calculate the updated weight with AdaGrad adaptive learning rate.
+      #
+      # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
+      # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
+      # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
+      def call(weight, gradient)
+        @moment ||= Numo::DFloat.zeros(weight.shape[0])
+        @moment += gradient**2
+        weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data.
+      def marshal_dump
+        { params: @params,
+          moment: @moment }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @moment = obj[:moment]
+        nil
+      end
+    end
+  end
+end

data/lib/rumale/preprocessing/max_abs_scaler.rb ADDED Viewed

@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+module Rumale
+  module Preprocessing
+    # Normalize samples by scaling each feature with its maximum absolute value.
+    #
+    # @example
+    #   normalizer = Rumale::Preprocessing::MaxAbsScaler.new
+    #   new_training_samples = normalizer.fit_transform(training_samples)
+    #   new_testing_samples = normalizer.transform(testing_samples)
+    class MaxAbsScaler
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Return the vector consists of the maximum absolute value for each feature.
+      # @return [Numo::DFloat] (shape: [n_features])
+      attr_reader :max_abs_vec
+      # Creates a new normalizer for scaling each feature with its maximum absolute value.
+      def initialize
+        @params = {}
+        @max_abs_vec = nil
+      end
+      # Calculate the minimum and maximum value of each feature for scaling.
+      #
+      # @overload fit(x) -> MaxAbsScaler
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
+      # @return [MaxAbsScaler]
+      def fit(x, _y = nil)
+        check_sample_array(x)
+        @max_abs_vec = x.abs.max(0)
+        self
+      end
+      # Calculate the maximum absolute value for each feature, and then normalize samples.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum absolute value for each feature.
+      # @return [Numo::DFloat] The scaled samples.
+      def fit_transform(x, _y = nil)
+        check_sample_array(x)
+        fit(x).transform(x)
+      end
+      # Perform scaling the given samples with maximum absolute value for each feature.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
+      # @return [Numo::DFloat] The scaled samples.
+      def transform(x)
+        check_sample_array(x)
+        x / @max_abs_vec
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about MaxAbsScaler.
+      def marshal_dump
+        { params: @params,
+          max_abs_vec: @max_abs_vec }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @max_abs_vec = obj[:max_abs_vec]
+        nil
+      end
+    end
+  end
+end

data/lib/rumale/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.8.2'
+  VERSION = '0.8.3'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.8.2
+  version: 0.8.3
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-03-19 00:00:00.000000000 Z
+date: 2019-04-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -140,6 +140,7 @@ files:
 - lib/rumale/evaluation_measure/purity.rb
 - lib/rumale/evaluation_measure/r2_score.rb
 - lib/rumale/evaluation_measure/recall.rb
+- lib/rumale/evaluation_measure/roc_auc.rb
 - lib/rumale/kernel_approximation/rbf.rb
 - lib/rumale/kernel_machine/kernel_svc.rb
 - lib/rumale/linear_model/base_linear_model.rb
@@ -159,6 +160,7 @@ files:
 - lib/rumale/naive_bayes/naive_bayes.rb
 - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
 - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
+- lib/rumale/optimizer/ada_grad.rb
 - lib/rumale/optimizer/adam.rb
 - lib/rumale/optimizer/nadam.rb
 - lib/rumale/optimizer/rmsprop.rb
@@ -171,6 +173,7 @@ files:
 - lib/rumale/polynomial_model/factorization_machine_regressor.rb
 - lib/rumale/preprocessing/l2_normalizer.rb
 - lib/rumale/preprocessing/label_encoder.rb
+- lib/rumale/preprocessing/max_abs_scaler.rb
 - lib/rumale/preprocessing/min_max_scaler.rb
 - lib/rumale/preprocessing/one_hot_encoder.rb
 - lib/rumale/preprocessing/standard_scaler.rb