RubyGems - rumale - Versions diffs - 0.8.1 → 0.8.2 - Mend

rumale 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/.rubocop.yml +5 -0
data/CHANGELOG.md +6 -0
data/README.md +7 -0
data/lib/rumale.rb +3 -0
data/lib/rumale/model_selection/k_fold.rb +4 -0
data/lib/rumale/model_selection/shuffle_split.rb +91 -0
data/lib/rumale/model_selection/stratified_k_fold.rb +4 -0
data/lib/rumale/model_selection/stratified_shuffle_split.rb +115 -0
data/lib/rumale/optimizer/adam.rb +77 -0
data/lib/rumale/version.rb +2 -1
metadata +5 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: dab9c67aa39f19e73859d41013363b4f3811142e
-  data.tar.gz: 49b1d14b9261f2ede4dc97b4353efdf9032872d2
+  metadata.gz: dba389e77a984b46e5352a2b4aae15f8eec2362d
+  data.tar.gz: 2eab0f18fc0e4b16af317bfa7b81db8203c62a20
 SHA512:
-  metadata.gz: 1d2b62e0660586f4ace811f06bdd73e9ff5adb877682a9ae53e29c76188b6fe1215d1305953d292668dcc4f0b5fba71399ad746e8976572a19bbd6a4c1153829
-  data.tar.gz: ce075327208560af72f0b54d7113b39a23c98c1828a3f5c3f5b28b3d987be3d81af465aae943aa27f2883cc671337b1952b4f8d8981a970c3727e0c94affbef5
+  metadata.gz: 034b0fc6f79ed66af3a50d025e66f17a3815c0c0e0634bd3eccec19546d585b17f158e376700aafa3ae89d52a895efefd79ff048d61b9f87dabe51f72393b75f
+  data.tar.gz: 4124f95f72392af658b342d7c21526717417de246ce99f4ee857cc4d26e799dc4a7ea0bbb59aa45c6e8621178ee84a4a4ead426aa79f09e62bf903e273cdf05b

data/.rubocop.yml CHANGED

@@ -4,6 +4,11 @@ AllCops:
   TargetRubyVersion: 2.3
   DisplayCopNames: true
   DisplayStyleGuide: true
+  Exclude:
+    - 'bin/*'
+    - 'rumale.gemspec'
+    - 'Rakefile'
+    - 'Gemfile'
 Documentation:
   Enabled: false

data/CHANGELOG.md CHANGED

@@ -1,3 +1,9 @@
+# 0.8.2
+- Add class for Adam optimizer.
+- Add data splitter classes for random permutation cross validation.
+- Add accessor method for number of splits to K-fold splitter classes.
+- Add execution result of example script on README ([#3](https://github.com/yoshoku/rumale/pull/3)).
 # 0.8.1
 - Add some evaluator classes.
   - MeanSquaredLogError

data/README.md CHANGED

@@ -121,6 +121,13 @@ mean_logloss = report[:test_score].inject(:+) / kf.n_splits
 puts("5-CV mean log-loss: %.3f" % mean_logloss)
 ```
+Execution of the above scripts result in the following.
+```bash
+$ ruby cross_validation.rb
+5-CV mean log-loss: 0.476
+```
 ### Example 3. Pipeline
 ```ruby

data/lib/rumale.rb CHANGED

@@ -18,6 +18,7 @@ require 'rumale/base/splitter'
 require 'rumale/base/evaluator'
 require 'rumale/optimizer/sgd'
 require 'rumale/optimizer/rmsprop'
+require 'rumale/optimizer/adam'
 require 'rumale/optimizer/nadam'
 require 'rumale/optimizer/yellow_fin'
 require 'rumale/pipeline/pipeline'
@@ -56,6 +57,8 @@ require 'rumale/preprocessing/label_encoder'
 require 'rumale/preprocessing/one_hot_encoder'
 require 'rumale/model_selection/k_fold'
 require 'rumale/model_selection/stratified_k_fold'
+require 'rumale/model_selection/shuffle_split'
+require 'rumale/model_selection/stratified_shuffle_split'
 require 'rumale/model_selection/cross_validation'
 require 'rumale/model_selection/grid_search_cv'
 require 'rumale/evaluation_measure/accuracy'

data/lib/rumale/model_selection/k_fold.rb CHANGED

@@ -18,6 +18,10 @@ module Rumale
     class KFold
       include Base::Splitter
+      # Return the number of folds.
+      # @return [Integer]
+      attr_reader :n_splits
       # Return the flag indicating whether to shuffle the dataset.
       # @return [Boolean]
       attr_reader :shuffle

data/lib/rumale/model_selection/shuffle_split.rb ADDED

@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+require 'rumale/base/splitter'
+module Rumale
+  module ModelSelection
+    # ShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
+    #
+    # @example
+    #   ss = Rumale::ModelSelection::ShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
+    #   ss.split(samples, labels).each do |train_ids, test_ids|
+    #     train_samples = samples[train_ids, true]
+    #     test_samples = samples[test_ids, true]
+    #     ...
+    #   end
+    #
+    class ShuffleSplit
+      include Base::Splitter
+      # Return the number of folds.
+      # @return [Integer]
+      attr_reader :n_splits
+      # Return the random generator for shuffling the dataset.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new data splitter for random permutation cross validation.
+      #
+      # @param n_splits [Integer] The number of folds.
+      # @param test_size [Float] The ratio of number of samples for test data.
+      # @param train_size [Float] The ratio of number of samples for train data.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
+        check_params_integer(n_splits: n_splits)
+        check_params_float(test_size: test_size)
+        check_params_type_or_nil(Float, train_size: train_size)
+        check_params_type_or_nil(Integer, random_seed: random_seed)
+        check_params_positive(n_splits: n_splits)
+        check_params_positive(test_size: test_size)
+        check_params_positive(train_size: train_size) unless train_size.nil?
+        @n_splits = n_splits
+        @test_size = test_size
+        @train_size = train_size
+        @random_seed = random_seed
+        @random_seed ||= srand
+        @rng = Random.new(@random_seed)
+      end
+      # Generate data indices for random permutation cross validation.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features])
+      #   The dataset to be used to generate data indices for random permutation cross validation.
+      # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
+      def split(x, _y = nil)
+        check_sample_array(x)
+        # Initialize and check some variables.
+        n_samples = x.shape[0]
+        n_test_samples = (@test_size * n_samples).to_i
+        n_train_samples = @train_size.nil? ? n_samples - n_test_samples : (@train_size * n_samples).to_i
+        unless @n_splits.between?(1, n_samples)
+          raise ArgumentError,
+                'The value of n_splits must be not less than 1 and not more than the number of samples.'
+        end
+        unless n_test_samples.between?(1, n_samples)
+          raise RangeError,
+                'The number of sample in test split must be not less than 1 and not more than the number of samples.'
+        end
+        unless n_train_samples.between?(1, n_samples)
+          raise RangeError,
+                'The number of sample in train split must be not less than 1 and not more than the number of samples.'
+        end
+        if (n_test_samples + n_train_samples) > n_samples
+          raise RangeError,
+                'The total number of samples in test split and train split must be not more than the number of samples.'
+        end
+        # Returns array consisting of the training and testing ids for each fold.
+        dataset_ids = [*0...n_samples]
+        Array.new(@n_splits) do
+          test_ids = dataset_ids.sample(n_test_samples, random: @rng)
+          train_ids = if @train_size.nil?
+                        dataset_ids - test_ids
+                      else
+                        (dataset_ids - test_ids).sample(n_train_samples, random: @rng)
+                      end
+          [train_ids, test_ids]
+        end
+      end
+    end
+  end
+end

data/lib/rumale/model_selection/stratified_k_fold.rb CHANGED

@@ -18,6 +18,10 @@ module Rumale
     class StratifiedKFold
       include Base::Splitter
+      # Return the number of folds.
+      # @return [Integer]
+      attr_reader :n_splits
       # Return the flag indicating whether to shuffle the dataset.
       # @return [Boolean]
       attr_reader :shuffle

data/lib/rumale/model_selection/stratified_shuffle_split.rb ADDED

@@ -0,0 +1,115 @@
+# frozen_string_literal: true
+require 'rumale/base/splitter'
+module Rumale
+  module ModelSelection
+    # StratifiedShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
+    # The proportion of the number of samples in each class will be almost equal for each fold.
+    #
+    # @example
+    #   ss = Rumale::ModelSelection::StratifiedShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
+    #   ss.split(samples, labels).each do |train_ids, test_ids|
+    #     train_samples = samples[train_ids, true]
+    #     test_samples = samples[test_ids, true]
+    #     ...
+    #   end
+    #
+    class StratifiedShuffleSplit
+      include Base::Splitter
+      # Return the number of folds.
+      # @return [Integer]
+      attr_reader :n_splits
+      # Return the random generator for shuffling the dataset.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new data splitter for random permutation cross validation.
+      #
+      # @param n_splits [Integer] The number of folds.
+      # @param test_size [Float] The ratio of number of samples for test data.
+      # @param train_size [Float] The ratio of number of samples for train data.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
+        check_params_integer(n_splits: n_splits)
+        check_params_float(test_size: test_size)
+        check_params_type_or_nil(Float, train_size: train_size)
+        check_params_type_or_nil(Integer, random_seed: random_seed)
+        check_params_positive(n_splits: n_splits)
+        check_params_positive(test_size: test_size)
+        check_params_positive(train_size: train_size) unless train_size.nil?
+        @n_splits = n_splits
+        @test_size = test_size
+        @train_size = train_size
+        @random_seed = random_seed
+        @random_seed ||= srand
+        @rng = Random.new(@random_seed)
+      end
+      # Generate data indices for stratified random permutation cross validation.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features])
+      #   The dataset to be used to generate data indices for stratified random permutation cross validation.
+      #   This argument exists to unify the interface between the K-fold methods, it is not used in the method.
+      # @param y [Numo::Int32] (shape: [n_samples])
+      #   The labels to be used to generate data indices for stratified random permutation cross validation.
+      # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
+      def split(x, y)
+        check_sample_array(x)
+        check_label_array(y)
+        check_sample_label_size(x, y)
+        # Initialize and check some variables.
+        train_sz = @train_size.nil? ? 1.0 - @test_size : @train_size
+        # Check the number of samples in each class.
+        unless valid_n_splits?(y)
+          raise ArgumentError,
+                'The value of n_splits must be not less than 1 and not more than the number of samples in each class.'
+        end
+        unless enough_data_size_each_class?(y, @test_size)
+          raise RangeError,
+                'The number of sample in test split must be not less than 1 and not more than the number of samples in each class.'
+        end
+        unless enough_data_size_each_class?(y, train_sz)
+          raise RangeError,
+                'The number of sample in train split must be not less than 1 and not more than the number of samples in each class.'
+        end
+        unless enough_data_size_each_class?(y, train_sz + @test_size)
+          raise RangeError,
+                'The total number of samples in test split and train split must be not more than the number of samples in each class.'
+        end
+        # Returns array consisting of the training and testing ids for each fold.
+        sample_ids_each_class = y.to_a.uniq.map { |label| y.eq(label).where.to_a }
+        Array.new(@n_splits) do
+          train_ids = []
+          test_ids = []
+          sample_ids_each_class.each do |sample_ids|
+            n_samples = sample_ids.size
+            n_test_samples = (@test_size * n_samples).to_i
+            n_train_samples = (train_sz * n_samples).to_i
+            test_ids += sample_ids.sample(n_test_samples, random: @rng)
+            train_ids += if @train_size.nil?
+                           sample_ids - test_ids
+                         else
+                           (sample_ids - test_ids).sample(n_train_samples, random: @rng)
+                         end
+          end
+          [train_ids, test_ids]
+        end
+      end
+      private
+      def valid_n_splits?(y)
+        y.to_a.uniq.map { |label| y.eq(label).where.size }.all? { |n_samples| @n_splits.between?(1, n_samples) }
+      end
+      def enough_data_size_each_class?(y, data_size)
+        y.to_a.uniq.map { |label| y.eq(label).where.size }.all? do |n_samples|
+          (data_size * n_samples).to_i.between?(1, n_samples)
+        end
+      end
+    end
+  end
+end

data/lib/rumale/optimizer/adam.rb ADDED

@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+require 'rumale/validation'
+require 'rumale/base/base_estimator'
+module Rumale
+  module Optimizer
+    # Adam is a class that implements Adam optimizer.
+    #
+    # @example
+    #   optimizer = Rumale::Optimizer::Adam.new(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
+    #   estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
+    #   estimator.fit(samples, values)
+    #
+    # *Reference*
+    # - D P. Kingma and J. Ba, "Adam: A Method for Stochastic Optimization," Proc. ICLR'15, 2015.
+    class Adam
+      include Base::BaseEstimator
+      include Validation
+      # Create a new optimizer with Adam
+      #
+      # @param learning_rate [Float] The initial value of learning rate.
+      # @param decay1 [Float] The smoothing parameter for the first moment.
+      # @param decay2 [Float] The smoothing parameter for the second moment.
+      def initialize(learning_rate: 0.001, decay1: 0.9, decay2: 0.999)
+        check_params_float(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
+        check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
+        @params = {}
+        @params[:learning_rate] = learning_rate
+        @params[:decay1] = decay1
+        @params[:decay2] = decay2
+        @fst_moment = nil
+        @sec_moment = nil
+        @iter = 0
+      end
+      # Calculate the updated weight with Nadam adaptive learning rate.
+      #
+      # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
+      # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
+      # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
+      def call(weight, gradient)
+        @fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
+        @sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
+        @iter += 1
+        @fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
+        @sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
+        nm_fst_moment = @fst_moment / (1.0 - @params[:decay1]**@iter)
+        nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
+        weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data.
+      def marshal_dump
+        { params: @params,
+          fst_moment: @fst_moment,
+          sec_moment: @sec_moment,
+          iter: @iter }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @fst_moment = obj[:fst_moment]
+        @sec_moment = obj[:sec_moment]
+        @iter = obj[:iter]
+        nil
+      end
+    end
+  end
+end

data/lib/rumale/version.rb CHANGED

@@ -2,5 +2,6 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
-  VERSION = '0.8.1'
+  # The version of Rumale you are using.
+  VERSION = '0.8.2'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.8.1
+  version: 0.8.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-03-08 00:00:00.000000000 Z
+date: 2019-03-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -152,11 +152,14 @@ files:
 - lib/rumale/model_selection/cross_validation.rb
 - lib/rumale/model_selection/grid_search_cv.rb
 - lib/rumale/model_selection/k_fold.rb
+- lib/rumale/model_selection/shuffle_split.rb
 - lib/rumale/model_selection/stratified_k_fold.rb
+- lib/rumale/model_selection/stratified_shuffle_split.rb
 - lib/rumale/multiclass/one_vs_rest_classifier.rb
 - lib/rumale/naive_bayes/naive_bayes.rb
 - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
 - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
+- lib/rumale/optimizer/adam.rb
 - lib/rumale/optimizer/nadam.rb
 - lib/rumale/optimizer/rmsprop.rb
 - lib/rumale/optimizer/sgd.rb