svmkit 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b3cb57cc46849d792fff7b6c6500b9498d56fc71
4
- data.tar.gz: 65c909ee0efaafc42df12dd24aa6c62d5b816d6a
3
+ metadata.gz: c7c326db290b1847234f890914fe5d670a4b1d36
4
+ data.tar.gz: ee5a624c92bf6b35edcccf4df469f9f552c2174d
5
5
  SHA512:
6
- metadata.gz: 25b52e63512393706f3f53ddf415a2e4ac07923f3d1bd909cca0ade9de66d5bbb63d32a932bce32f2fa2b6c4430bab73f483d94d620eb355540a91905320644a
7
- data.tar.gz: a3b983cf6d75168cb6eda70ec5da113feb2bc52c7fc501af3a9328f569c6d793c19e4744a4a80f0c3c60b0ea4e5387db21451f2a004f8d4699605b8348c81bab
6
+ metadata.gz: 9256fc3d36e6247fae44ac1a14672eaf9b3ba414176b48c592be5aa8631d232dbaddba9f3884198e0ba751616a3017ad461da2fb7ec40ef26b9ab2b2417aadf5
7
+ data.tar.gz: e198dcbe0c7e782162a31e7131b961f619e76bf509b43b596299412ba5bb5ea16ea02e21bb2a79909d70bb230c81484083df94ec65db6e770e4e5adc712da174
data/.travis.yml CHANGED
@@ -1,5 +1,10 @@
1
1
  sudo: false
2
+ os: linux
3
+ dist: trusty
2
4
  language: ruby
3
5
  rvm:
4
- - 2.4.2
5
- before_install: gem install bundler -v 1.15.4
6
+ - 2.2.9
7
+ - 2.3.6
8
+ - 2.4.3
9
+ before_install:
10
+ - gem install --no-document bundler -v '~> 1.16'
data/HISTORY.md CHANGED
@@ -1,3 +1,6 @@
1
+ # 0.2.2
2
+ - Added classes for K-fold cross validation.
3
+
1
4
  # 0.2.1
2
5
  - Added class for K-nearest neighbors classifier.
3
6
 
@@ -0,0 +1,16 @@
1
+
2
+ module SVMKit
3
+ module Base
4
+ # Module for all validation methods in SVMKit.
5
+ module Splitter
6
+ # Return the number of splits.
7
+ # @return [Integer]
8
+ attr_reader :n_splits
9
+
10
+ # An abstract method for splitting dataset.
11
+ def split
12
+ raise NoImplementedError, "#{__method__} has to be implemented in #{self.class}."
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,72 @@
1
+ require 'svmkit/base/splitter'
2
+
3
+ module SVMKit
4
+ # This module consists of the classes for model validation techniques.
5
+ module ModelSelection
6
+ # KFold is a class that generates the set of data indices for K-fold cross-validation.
7
+ #
8
+ # @example
9
+ # kf = SVMKit::ModelSelection::KFold.new(n_splits: 3, shuffle: true, random_seed: 1)
10
+ # kf.split(samples, labels).each do |train_ids, test_ids|
11
+ # train_samples = samples[train_ids, true]
12
+ # test_samples = samples[test_ids, true]
13
+ # ...
14
+ # end
15
+ #
16
+ class KFold
17
+ include Base::Splitter
18
+
19
+ # Return the proportion of the test set to the dataset.
20
+ # @return [Boolean]
21
+ attr_reader :shuffle
22
+
23
+ # Return the random generator for shuffling the dataset.
24
+ # @return [Random]
25
+ attr_reader :rng
26
+
27
+ # Create a new data splitter for K-fold cross validation.
28
+ #
29
+ # @param n_splits [Integer] The number of folds.
30
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
31
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
32
+ def initialize(n_splits: 3, shuffle: false, random_seed: nil)
33
+ @n_splits = n_splits
34
+ @shuffle = shuffle
35
+ @random_seed = random_seed
36
+ @random_seed ||= srand
37
+ @rng = Random.new(@random_seed)
38
+ end
39
+
40
+ # Generate data indices for K-fold cross validation.
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
43
+ # The dataset to be used to generate data indices for K-fold cross validation.
44
+ # @param y [Numo::Int32] (shape: [n_samples])
45
+ # The labels to be used to generate data indices for stratified K-fold cross validation.
46
+ # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
47
+ # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
48
+ def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
49
+ # Initialize and check some variables.
50
+ n_samples, = x.shape
51
+ unless @n_splits.between?(2, n_samples)
52
+ raise ArgumentError,
53
+ 'The value of n_splits must be not less than 2 and not more than the number of samples.'
54
+ end
55
+ # Splits dataset ids to each fold.
56
+ dataset_ids = [*0...n_samples]
57
+ dataset_ids.shuffle!(random: @rng) if @shuffle
58
+ fold_sets = Array.new(@n_splits) do |n|
59
+ n_fold_samples = n_samples / @n_splits
60
+ n_fold_samples += 1 if n < n_samples % @n_splits
61
+ dataset_ids.shift(n_fold_samples)
62
+ end
63
+ # Returns array consisting of the training and testing ids for each fold.
64
+ Array.new(@n_splits) do |n|
65
+ train_ids = fold_sets.select.with_index { |_, id| id != n }.flatten
66
+ test_ids = fold_sets[n]
67
+ [train_ids, test_ids]
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,81 @@
1
+ require 'svmkit/base/splitter'
2
+
3
+ module SVMKit
4
+ module ModelSelection
5
+ # StratifiedKFold is a class that generates the set of data indices for K-fold cross-validation.
6
+ # The proportion of the number of samples in each class will be almost equal for each fold.
7
+ #
8
+ # @example
9
+ # kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 3, shuffle: true, random_seed: 1)
10
+ # kf.split(samples, labels).each do |train_ids, test_ids|
11
+ # train_samples = samples[train_ids, true]
12
+ # test_samples = samples[test_ids, true]
13
+ # ...
14
+ # end
15
+ #
16
+ class StratifiedKFold
17
+ include Base::Splitter
18
+
19
+ # Return the proportion of the test set to the dataset.
20
+ # @return [Boolean]
21
+ attr_reader :shuffle
22
+
23
+ # Return the random generator for shuffling the dataset.
24
+ # @return [Random]
25
+ attr_reader :rng
26
+
27
+ # Create a new data splitter for K-fold cross validation.
28
+ #
29
+ # @param n_splits [Integer] The number of folds.
30
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
31
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
32
+ def initialize(n_splits: 3, shuffle: false, random_seed: nil)
33
+ @n_splits = n_splits
34
+ @shuffle = shuffle
35
+ @random_seed = random_seed
36
+ @random_seed ||= srand
37
+ @rng = Random.new(@random_seed)
38
+ end
39
+
40
+ # Generate data indices for stratified K-fold cross validation.
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
43
+ # The dataset to be used to generate data indices for stratified K-fold cross validation.
44
+ # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
45
+ # @param y [Numo::Int32] (shape: [n_samples])
46
+ # The labels to be used to generate data indices for stratified K-fold cross validation.
47
+ # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
48
+ def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
49
+ # Check the number of samples in each class.
50
+ unless y.bincount.to_a.all? { |n_samples| @n_splits.between?(2, n_samples) }
51
+ raise ArgumentError,
52
+ 'The value of n_splits must be not less than 2 and not more than the number of samples in each class.'
53
+ end
54
+ # Splits dataset ids of each class to each fold.
55
+ fold_sets_each_class = y.to_a.uniq.map { |label| fold_sets(y, label) }
56
+ # Returns array consisting of the training and testing ids for each fold.
57
+ Array.new(@n_splits) { |fold_id| train_test_sets(fold_sets_each_class, fold_id) }
58
+ end
59
+
60
+ private
61
+
62
+ def fold_sets(y, label)
63
+ sample_ids = y.eq(label).where.to_a
64
+ sample_ids.shuffle!(random: @rng) if @shuffle
65
+ n_samples = sample_ids.size
66
+ Array.new(@n_splits) do |n|
67
+ n_fold_samples = n_samples / @n_splits
68
+ n_fold_samples += 1 if n < n_samples % @n_splits
69
+ sample_ids.shift(n_fold_samples)
70
+ end
71
+ end
72
+
73
+ def train_test_sets(fold_sets_each_class, fold_id)
74
+ train_test_sets_each_class = fold_sets_each_class.map do |folds|
75
+ folds.partition.with_index { |_, id| id != fold_id }.map(&:flatten)
76
+ end
77
+ train_test_sets_each_class.transpose.map(&:flatten)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -1,5 +1,5 @@
1
1
  # SVMKit is an experimental library of machine learning in Ruby.
2
2
  module SVMKit
3
3
  # @!visibility private
4
- VERSION = '0.2.1'.freeze
4
+ VERSION = '0.2.2'.freeze
5
5
  end
data/lib/svmkit.rb CHANGED
@@ -7,6 +7,7 @@ require 'svmkit/dataset'
7
7
  require 'svmkit/base/base_estimator'
8
8
  require 'svmkit/base/classifier'
9
9
  require 'svmkit/base/transformer'
10
+ require 'svmkit/base/splitter'
10
11
  require 'svmkit/kernel_approximation/rbf'
11
12
  require 'svmkit/linear_model/svc'
12
13
  require 'svmkit/linear_model/logistic_regression'
@@ -16,3 +17,5 @@ require 'svmkit/nearest_neighbors/k_neighbors_classifier'
16
17
  require 'svmkit/preprocessing/l2_normalizer'
17
18
  require 'svmkit/preprocessing/min_max_scaler'
18
19
  require 'svmkit/preprocessing/standard_scaler'
20
+ require 'svmkit/model_selection/k_fold'
21
+ require 'svmkit/model_selection/stratified_k_fold'
data/svmkit.gemspec CHANGED
@@ -3,11 +3,6 @@ lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'svmkit/version'
5
5
 
6
- SVMKit::DESCRIPTION = <<MSG
7
- SVMKit is a library for machine learninig in Ruby.
8
- SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
9
- However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
10
- MSG
11
6
 
12
7
  Gem::Specification.new do |spec|
13
8
  spec.name = 'svmkit'
@@ -15,8 +10,14 @@ Gem::Specification.new do |spec|
15
10
  spec.authors = ['yoshoku']
16
11
  spec.email = ['yoshoku@outlook.com']
17
12
 
18
- spec.summary = %q{SVMKit is an experimental library of machine learning in Ruby.}
19
- spec.description = SVMKit::DESCRIPTION
13
+ spec.summary = <<MSG
14
+ SVMKit is an experimental library of machine learning in Ruby.
15
+ MSG
16
+ spec.description = <<MSG
17
+ SVMKit is a library for machine learninig in Ruby.
18
+ SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
19
+ However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
20
+ MSG
20
21
  spec.homepage = 'https://github.com/yoshoku/svmkit'
21
22
  spec.license = 'BSD-2-Clause'
22
23
 
@@ -29,12 +30,12 @@ Gem::Specification.new do |spec|
29
30
 
30
31
  spec.required_ruby_version = '>= 2.1'
31
32
 
32
- spec.add_runtime_dependency 'numo-narray', '~> 0.9.0.5'
33
+ spec.add_runtime_dependency 'numo-narray', '~> 0.9.0'
33
34
 
34
- spec.add_development_dependency 'bundler', '~> 1.15'
35
- spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'bundler', '~> 1.16'
36
+ spec.add_development_dependency 'rake', '~> 12.0'
36
37
  spec.add_development_dependency 'rspec', '~> 3.0'
37
- spec.add_development_dependency 'simplecov', '~> 0.15.1'
38
+ spec.add_development_dependency 'simplecov', '~> 0.15'
38
39
 
39
40
  spec.post_install_message = <<MSG
40
41
  *************************************************************************
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-12-03 00:00:00.000000000 Z
11
+ date: 2018-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -16,42 +16,42 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.0.5
19
+ version: 0.9.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.0.5
26
+ version: 0.9.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.15'
33
+ version: '1.16'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.15'
40
+ version: '1.16'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '10.0'
47
+ version: '12.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '10.0'
54
+ version: '12.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 0.15.1
75
+ version: '0.15'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 0.15.1
82
+ version: '0.15'
83
83
  description: |
84
84
  SVMKit is a library for machine learninig in Ruby.
85
85
  SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
@@ -105,12 +105,15 @@ files:
105
105
  - lib/svmkit.rb
106
106
  - lib/svmkit/base/base_estimator.rb
107
107
  - lib/svmkit/base/classifier.rb
108
+ - lib/svmkit/base/splitter.rb
108
109
  - lib/svmkit/base/transformer.rb
109
110
  - lib/svmkit/dataset.rb
110
111
  - lib/svmkit/kernel_approximation/rbf.rb
111
112
  - lib/svmkit/kernel_machine/kernel_svc.rb
112
113
  - lib/svmkit/linear_model/logistic_regression.rb
113
114
  - lib/svmkit/linear_model/svc.rb
115
+ - lib/svmkit/model_selection/k_fold.rb
116
+ - lib/svmkit/model_selection/stratified_k_fold.rb
114
117
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
115
118
  - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
116
119
  - lib/svmkit/pairwise_metric.rb
@@ -145,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
148
  version: '0'
146
149
  requirements: []
147
150
  rubyforge_project:
148
- rubygems_version: 2.2.5
151
+ rubygems_version: 2.4.5.4
149
152
  signing_key:
150
153
  specification_version: 4
151
154
  summary: SVMKit is an experimental library of machine learning in Ruby.