RubyGems - svmkit - Versions diffs - 0.2.1 → 0.2.2 - Mend

svmkit 0.2.1 → 0.2.2

Files changed (10) hide show

checksums.yaml +4 -4
data/.travis.yml +7 -2
data/HISTORY.md +3 -0
data/lib/svmkit/base/splitter.rb +16 -0
data/lib/svmkit/model_selection/k_fold.rb +72 -0
data/lib/svmkit/model_selection/stratified_k_fold.rb +81 -0
data/lib/svmkit/version.rb +1 -1
data/lib/svmkit.rb +3 -0
data/svmkit.gemspec +12 -11
metadata +14 -11

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b3cb57cc46849d792fff7b6c6500b9498d56fc71
-  data.tar.gz: 65c909ee0efaafc42df12dd24aa6c62d5b816d6a
+  metadata.gz: c7c326db290b1847234f890914fe5d670a4b1d36
+  data.tar.gz: ee5a624c92bf6b35edcccf4df469f9f552c2174d
 SHA512:
-  metadata.gz: 25b52e63512393706f3f53ddf415a2e4ac07923f3d1bd909cca0ade9de66d5bbb63d32a932bce32f2fa2b6c4430bab73f483d94d620eb355540a91905320644a
-  data.tar.gz: a3b983cf6d75168cb6eda70ec5da113feb2bc52c7fc501af3a9328f569c6d793c19e4744a4a80f0c3c60b0ea4e5387db21451f2a004f8d4699605b8348c81bab
+  metadata.gz: 9256fc3d36e6247fae44ac1a14672eaf9b3ba414176b48c592be5aa8631d232dbaddba9f3884198e0ba751616a3017ad461da2fb7ec40ef26b9ab2b2417aadf5
+  data.tar.gz: e198dcbe0c7e782162a31e7131b961f619e76bf509b43b596299412ba5bb5ea16ea02e21bb2a79909d70bb230c81484083df94ec65db6e770e4e5adc712da174

data/.travis.yml CHANGED Viewed

@@ -1,5 +1,10 @@
 sudo: false
+os: linux
+dist: trusty
 language: ruby
 rvm:
-  - 2.4.2
-before_install: gem install bundler -v 1.15.4
+  - 2.2.9
+  - 2.3.6
+  - 2.4.3
+before_install:
+  - gem install --no-document bundler -v '~> 1.16'

data/HISTORY.md CHANGED Viewed

@@ -1,3 +1,6 @@
+# 0.2.2
+- Added classes for K-fold cross validation.
 # 0.2.1
 - Added class for K-nearest neighbors classifier.

data/lib/svmkit/base/splitter.rb ADDED Viewed

@@ -0,0 +1,16 @@
+module SVMKit
+  module Base
+    # Module for all validation methods in SVMKit.
+    module Splitter
+      # Return the number of splits.
+      # @return [Integer]
+      attr_reader :n_splits
+      # An abstract method for splitting dataset.
+      def split
+        raise NoImplementedError, "#{__method__} has to be implemented in #{self.class}."
+      end
+    end
+  end
+end

data/lib/svmkit/model_selection/k_fold.rb ADDED Viewed

@@ -0,0 +1,72 @@
+require 'svmkit/base/splitter'
+module SVMKit
+  # This module consists of the classes for model validation techniques.
+  module ModelSelection
+    # KFold is a class that generates the set of data indices for K-fold cross-validation.
+    #
+    # @example
+    #   kf = SVMKit::ModelSelection::KFold.new(n_splits: 3, shuffle: true, random_seed: 1)
+    #   kf.split(samples, labels).each do |train_ids, test_ids|
+    #     train_samples = samples[train_ids, true]
+    #     test_samples = samples[test_ids, true]
+    #     ...
+    #   end
+    #
+    class KFold
+      include Base::Splitter
+      # Return the proportion of the test set to the dataset.
+      # @return [Boolean]
+      attr_reader :shuffle
+      # Return the random generator for shuffling the dataset.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new data splitter for K-fold cross validation.
+      #
+      # @param n_splits [Integer] The number of folds.
+      # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_splits: 3, shuffle: false, random_seed: nil)
+        @n_splits = n_splits
+        @shuffle = shuffle
+        @random_seed = random_seed
+        @random_seed ||= srand
+        @rng = Random.new(@random_seed)
+      end
+      # Generate data indices for K-fold cross validation.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features])
+      #   The dataset to be used to generate data indices for K-fold cross validation.
+      # @param y [Numo::Int32] (shape: [n_samples])
+      #   The labels to be used to generate data indices for stratified K-fold cross validation.
+      #   This argument exists to unify the interface between the K-fold methods, it is not used in the method.
+      # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
+      def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
+        # Initialize and check some variables.
+        n_samples, = x.shape
+        unless @n_splits.between?(2, n_samples)
+          raise ArgumentError,
+                'The value of n_splits must be not less than 2 and not more than the number of samples.'
+        end
+        # Splits dataset ids to each fold.
+        dataset_ids = [*0...n_samples]
+        dataset_ids.shuffle!(random: @rng) if @shuffle
+        fold_sets = Array.new(@n_splits) do |n|
+          n_fold_samples = n_samples / @n_splits
+          n_fold_samples += 1 if n < n_samples % @n_splits
+          dataset_ids.shift(n_fold_samples)
+        end
+        # Returns array consisting of the training and testing ids for each fold.
+        Array.new(@n_splits) do |n|
+          train_ids = fold_sets.select.with_index { |_, id| id != n }.flatten
+          test_ids = fold_sets[n]
+          [train_ids, test_ids]
+        end
+      end
+    end
+  end
+end

data/lib/svmkit/model_selection/stratified_k_fold.rb ADDED Viewed

@@ -0,0 +1,81 @@
+require 'svmkit/base/splitter'
+module SVMKit
+  module ModelSelection
+    # StratifiedKFold is a class that generates the set of data indices for K-fold cross-validation.
+    # The proportion of the number of samples in each class will be almost equal for each fold.
+    #
+    # @example
+    #   kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 3, shuffle: true, random_seed: 1)
+    #   kf.split(samples, labels).each do |train_ids, test_ids|
+    #     train_samples = samples[train_ids, true]
+    #     test_samples = samples[test_ids, true]
+    #     ...
+    #   end
+    #
+    class StratifiedKFold
+      include Base::Splitter
+      # Return the proportion of the test set to the dataset.
+      # @return [Boolean]
+      attr_reader :shuffle
+      # Return the random generator for shuffling the dataset.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new data splitter for K-fold cross validation.
+      #
+      # @param n_splits [Integer] The number of folds.
+      # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      def initialize(n_splits: 3, shuffle: false, random_seed: nil)
+        @n_splits = n_splits
+        @shuffle = shuffle
+        @random_seed = random_seed
+        @random_seed ||= srand
+        @rng = Random.new(@random_seed)
+      end
+      # Generate data indices for stratified K-fold cross validation.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features])
+      #   The dataset to be used to generate data indices for stratified K-fold cross validation.
+      #   This argument exists to unify the interface between the K-fold methods, it is not used in the method.
+      # @param y [Numo::Int32] (shape: [n_samples])
+      #   The labels to be used to generate data indices for stratified K-fold cross validation.
+      # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
+      def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
+        # Check the number of samples in each class.
+        unless y.bincount.to_a.all? { |n_samples| @n_splits.between?(2, n_samples) }
+          raise ArgumentError,
+                'The value of n_splits must be not less than 2 and not more than the number of samples in each class.'
+        end
+        # Splits dataset ids of each class to each fold.
+        fold_sets_each_class = y.to_a.uniq.map { |label| fold_sets(y, label) }
+        # Returns array consisting of the training and testing ids for each fold.
+        Array.new(@n_splits) { |fold_id| train_test_sets(fold_sets_each_class, fold_id) }
+      end
+      private
+      def fold_sets(y, label)
+        sample_ids = y.eq(label).where.to_a
+        sample_ids.shuffle!(random: @rng) if @shuffle
+        n_samples = sample_ids.size
+        Array.new(@n_splits) do |n|
+          n_fold_samples = n_samples / @n_splits
+          n_fold_samples += 1 if n < n_samples % @n_splits
+          sample_ids.shift(n_fold_samples)
+        end
+      end
+      def train_test_sets(fold_sets_each_class, fold_id)
+        train_test_sets_each_class = fold_sets_each_class.map do |folds|
+          folds.partition.with_index { |_, id| id != fold_id }.map(&:flatten)
+        end
+        train_test_sets_each_class.transpose.map(&:flatten)
+      end
+    end
+  end
+end

data/lib/svmkit/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # SVMKit is an experimental library of machine learning in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.2.1'.freeze
+  VERSION = '0.2.2'.freeze
 end

data/lib/svmkit.rb CHANGED Viewed

@@ -7,6 +7,7 @@ require 'svmkit/dataset'
 require 'svmkit/base/base_estimator'
 require 'svmkit/base/classifier'
 require 'svmkit/base/transformer'
+require 'svmkit/base/splitter'
 require 'svmkit/kernel_approximation/rbf'
 require 'svmkit/linear_model/svc'
 require 'svmkit/linear_model/logistic_regression'
@@ -16,3 +17,5 @@ require 'svmkit/nearest_neighbors/k_neighbors_classifier'
 require 'svmkit/preprocessing/l2_normalizer'
 require 'svmkit/preprocessing/min_max_scaler'
 require 'svmkit/preprocessing/standard_scaler'
+require 'svmkit/model_selection/k_fold'
+require 'svmkit/model_selection/stratified_k_fold'

data/svmkit.gemspec CHANGED Viewed

@@ -3,11 +3,6 @@ lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'svmkit/version'
-SVMKit::DESCRIPTION = <<MSG
-SVMKit is a library for machine learninig in Ruby.
-SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
-However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
-MSG
 Gem::Specification.new do |spec|
   spec.name          = 'svmkit'
@@ -15,8 +10,14 @@ Gem::Specification.new do |spec|
   spec.authors       = ['yoshoku']
   spec.email         = ['yoshoku@outlook.com']
-  spec.summary       = %q{SVMKit is an experimental library of machine learning in Ruby.}
-  spec.description   = SVMKit::DESCRIPTION
+  spec.summary       = <<MSG
+SVMKit is an experimental library of machine learning in Ruby.
+MSG
+  spec.description   = <<MSG
+SVMKit is a library for machine learninig in Ruby.
+SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
+However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
+MSG
   spec.homepage      = 'https://github.com/yoshoku/svmkit'
   spec.license       = 'BSD-2-Clause'
@@ -29,12 +30,12 @@ Gem::Specification.new do |spec|
   spec.required_ruby_version = '>= 2.1'
-  spec.add_runtime_dependency 'numo-narray', '~> 0.9.0.5'
+  spec.add_runtime_dependency 'numo-narray', '~> 0.9.0'
-  spec.add_development_dependency 'bundler', '~> 1.15'
-  spec.add_development_dependency 'rake', '~> 10.0'
+  spec.add_development_dependency 'bundler', '~> 1.16'
+  spec.add_development_dependency 'rake', '~> 12.0'
   spec.add_development_dependency 'rspec', '~> 3.0'
-  spec.add_development_dependency 'simplecov', '~> 0.15.1'
+  spec.add_development_dependency 'simplecov', '~> 0.15'
   spec.post_install_message = <<MSG
 *************************************************************************

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.2.1
+  version: 0.2.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2017-12-03 00:00:00.000000000 Z
+date: 2018-01-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -16,42 +16,42 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.9.0.5
+        version: 0.9.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.9.0.5
+        version: 0.9.0
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.15'
+        version: '1.16'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.15'
+        version: '1.16'
 - !ruby/object:Gem::Dependency
   name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '12.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '12.0'
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
@@ -72,14 +72,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.15.1
+        version: '0.15'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.15.1
+        version: '0.15'
 description: |
   SVMKit is a library for machine learninig in Ruby.
   SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
@@ -105,12 +105,15 @@ files:
 - lib/svmkit.rb
 - lib/svmkit/base/base_estimator.rb
 - lib/svmkit/base/classifier.rb
+- lib/svmkit/base/splitter.rb
 - lib/svmkit/base/transformer.rb
 - lib/svmkit/dataset.rb
 - lib/svmkit/kernel_approximation/rbf.rb
 - lib/svmkit/kernel_machine/kernel_svc.rb
 - lib/svmkit/linear_model/logistic_regression.rb
 - lib/svmkit/linear_model/svc.rb
+- lib/svmkit/model_selection/k_fold.rb
+- lib/svmkit/model_selection/stratified_k_fold.rb
 - lib/svmkit/multiclass/one_vs_rest_classifier.rb
 - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
 - lib/svmkit/pairwise_metric.rb
@@ -145,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.5
+rubygems_version: 2.4.5.4
 signing_key:
 specification_version: 4
 summary: SVMKit is an experimental library of machine learning in Ruby.