RubyGems - rumale - Versions diffs - 0.20.1 → 0.20.2 - Mend

rumale 0.20.1 → 0.20.2

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/rumale.rb +1 -0
data/lib/rumale/model_selection/time_series_split.rb +91 -0
data/lib/rumale/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0f361026cd2922a2d36846a817eee855bf0c000156ed6c756bca29d2e42d67a2
-  data.tar.gz: 016fa40aa2546824cacbc32353263cbfc9427f0ceabb7e703f99854914bb9a2e
+  metadata.gz: 5d8c93acbf38fbd07e5df224010abbdd4269a6ce3bbf8112a0eba652a606785d
+  data.tar.gz: e7cb00a802420854835c92f011425f3054bfcc1052bf7b3664da1f95834ef435
 SHA512:
-  metadata.gz: 7a53a958db7ec8b56236018505370b9908ae81a9afc9d7c8ff0b16d83971539c1ad729b5ab350eb49ae9b90ada43a8912ed2404a37eef97a4d34dad90b1d3e9f
-  data.tar.gz: 2f2b3d48625c7120464179bc7759c01ba7de85cb0d54720665eaf1e4822f24c1870474ebc24a47cff123e44a8626b0e0fac6a7e81216c057286071770ea5ba79
+  metadata.gz: f95fdd89b84dad02e516ee0479b1cddfb101cb96de897b6e7fa3fba546272a243cff5cfe954cb51942ec1ab23cf3028b183db86b52fab00a35d15be7eee5bf92
+  data.tar.gz: e5f6235e88dd47b9002a2154cabd2c1e64afb6cbb5b0745b411c7e5559351e925c9db8ec332724e301b83215662b3582e79a9e997f0338846514b234dabf1fc3

data/CHANGELOG.md CHANGED

@@ -1,3 +1,7 @@
+# 0.20.2
+- Add cross-validator class for time-series data.
+  - [TimeSeriesSplit](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/TimeSeriesSplit.html)
 # 0.20.1
 - Add cross-validator classes that split data according group labels.
   - [GroupKFold](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/GroupKFold.html)

data/lib/rumale.rb CHANGED

@@ -103,6 +103,7 @@ require 'rumale/model_selection/stratified_k_fold'
 require 'rumale/model_selection/shuffle_split'
 require 'rumale/model_selection/group_shuffle_split'
 require 'rumale/model_selection/stratified_shuffle_split'
+require 'rumale/model_selection/time_series_split'
 require 'rumale/model_selection/cross_validation'
 require 'rumale/model_selection/grid_search_cv'
 require 'rumale/model_selection/function'

data/lib/rumale/model_selection/time_series_split.rb ADDED

@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+require 'rumale/base/splitter'
+module Rumale
+  module ModelSelection
+    # TimeSeriesSplit is a class that generates the set of data indices for time series cross-validation.
+    # It is assumed that the dataset given are already ordered by time information.
+    #
+    # @example
+    #   cv = Rumale::ModelSelection::TimeSeriesSplit.new(n_splits: 5)
+    #   x = Numo::DFloat.new(6, 2).rand
+    #   cv.split(x, nil).each do |train_ids, test_ids|
+    #     puts '---'
+    #     pp train_ids
+    #     pp test_ids
+    #   end
+    #
+    #   # ---
+    #   # [0]
+    #   # [1]
+    #   # ---
+    #   # [0, 1]
+    #   # [2]
+    #   # ---
+    #   # [0, 1, 2]
+    #   # [3]
+    #   # ---
+    #   # [0, 1, 2, 3]
+    #   # [4]
+    #   # ---
+    #   # [0, 1, 2, 3, 4]
+    #   # [5]
+    #
+    class TimeSeriesSplit
+      include Base::Splitter
+      # Return the number of splits.
+      # @return [Integer]
+      attr_reader :n_splits
+      # Return the maximum number of training samples in a split.
+      # @return [Integer/Nil]
+      attr_reader :max_train_size
+      # Create a new data splitter for time series cross-validation.
+      #
+      # @param n_splits [Integer] The number of splits.
+      # @param max_train_size [Integer/Nil] The maximum number of training samples in a split.
+      def initialize(n_splits: 5, max_train_size: nil)
+        check_params_numeric(n_splits: n_splits)
+        check_params_numeric_or_nil(max_train_size: max_train_size)
+        @n_splits = n_splits
+        @max_train_size = max_train_size
+      end
+      # Generate data indices for time series cross-validation.
+      #
+      # @overload split(x, y) -> Array
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features])
+      #     The dataset to be used to generate data indices for time series cross-validation.
+      #     It is expected that the data will be ordered by time information.
+      #   @param y [Numo::Int32] (shape: [n_samples])
+      #     This argument exists to unify the interface between the K-fold methods, it is not used in the method.
+      # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
+      def split(x, _y)
+        x = check_convert_sample_array(x)
+        n_samples = x.shape[0]
+        unless (@n_splits + 1).between?(2, n_samples)
+          raise ArgumentError,
+                'The number of folds (n_splits + 1) must be not less than 2 and not more than the number of samples.'
+        end
+        test_size = n_samples / (@n_splits + 1)
+        offset = test_size + n_samples % (@n_splits + 1)
+        Array.new(@n_splits) do |n|
+          start = offset * (n + 1)
+          train_ids = if !@max_train_size.nil? && @max_train_size < test_size
+                        Array((start - @max_train_size)...start)
+                      else
+                        Array(0...start)
+                      end
+          test_ids = Array(start...(start + test_size))
+          [train_ids, test_ids]
+        end
+      end
+    end
+  end
+end

data/lib/rumale/version.rb CHANGED

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.20.1'
+  VERSION = '0.20.2'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.20.1
+  version: 0.20.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-08-23 00:00:00.000000000 Z
+date: 2020-09-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -141,6 +141,7 @@ files:
 - lib/rumale/model_selection/shuffle_split.rb
 - lib/rumale/model_selection/stratified_k_fold.rb
 - lib/rumale/model_selection/stratified_shuffle_split.rb
+- lib/rumale/model_selection/time_series_split.rb
 - lib/rumale/multiclass/one_vs_rest_classifier.rb
 - lib/rumale/naive_bayes/base_naive_bayes.rb
 - lib/rumale/naive_bayes/bernoulli_nb.rb