svmkit 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b3cb57cc46849d792fff7b6c6500b9498d56fc71
4
- data.tar.gz: 65c909ee0efaafc42df12dd24aa6c62d5b816d6a
3
+ metadata.gz: c7c326db290b1847234f890914fe5d670a4b1d36
4
+ data.tar.gz: ee5a624c92bf6b35edcccf4df469f9f552c2174d
5
5
  SHA512:
6
- metadata.gz: 25b52e63512393706f3f53ddf415a2e4ac07923f3d1bd909cca0ade9de66d5bbb63d32a932bce32f2fa2b6c4430bab73f483d94d620eb355540a91905320644a
7
- data.tar.gz: a3b983cf6d75168cb6eda70ec5da113feb2bc52c7fc501af3a9328f569c6d793c19e4744a4a80f0c3c60b0ea4e5387db21451f2a004f8d4699605b8348c81bab
6
+ metadata.gz: 9256fc3d36e6247fae44ac1a14672eaf9b3ba414176b48c592be5aa8631d232dbaddba9f3884198e0ba751616a3017ad461da2fb7ec40ef26b9ab2b2417aadf5
7
+ data.tar.gz: e198dcbe0c7e782162a31e7131b961f619e76bf509b43b596299412ba5bb5ea16ea02e21bb2a79909d70bb230c81484083df94ec65db6e770e4e5adc712da174
data/.travis.yml CHANGED
@@ -1,5 +1,10 @@
1
1
  sudo: false
2
+ os: linux
3
+ dist: trusty
2
4
  language: ruby
3
5
  rvm:
4
- - 2.4.2
5
- before_install: gem install bundler -v 1.15.4
6
+ - 2.2.9
7
+ - 2.3.6
8
+ - 2.4.3
9
+ before_install:
10
+ - gem install --no-document bundler -v '~> 1.16'
data/HISTORY.md CHANGED
@@ -1,3 +1,6 @@
1
+ # 0.2.2
2
+ - Added classes for K-fold cross validation.
3
+
1
4
  # 0.2.1
2
5
  - Added class for K-nearest neighbors classifier.
3
6
 
@@ -0,0 +1,16 @@
1
+
2
+ module SVMKit
3
+ module Base
4
+ # Module for all validation methods in SVMKit.
5
+ module Splitter
6
+ # Return the number of splits.
7
+ # @return [Integer]
8
+ attr_reader :n_splits
9
+
10
+ # An abstract method for splitting dataset.
11
+ def split
12
+ raise NoImplementedError, "#{__method__} has to be implemented in #{self.class}."
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,72 @@
1
+ require 'svmkit/base/splitter'
2
+
3
+ module SVMKit
4
+ # This module consists of the classes for model validation techniques.
5
+ module ModelSelection
6
+ # KFold is a class that generates the set of data indices for K-fold cross-validation.
7
+ #
8
+ # @example
9
+ # kf = SVMKit::ModelSelection::KFold.new(n_splits: 3, shuffle: true, random_seed: 1)
10
+ # kf.split(samples, labels).each do |train_ids, test_ids|
11
+ # train_samples = samples[train_ids, true]
12
+ # test_samples = samples[test_ids, true]
13
+ # ...
14
+ # end
15
+ #
16
+ class KFold
17
+ include Base::Splitter
18
+
19
+ # Return the proportion of the test set to the dataset.
20
+ # @return [Boolean]
21
+ attr_reader :shuffle
22
+
23
+ # Return the random generator for shuffling the dataset.
24
+ # @return [Random]
25
+ attr_reader :rng
26
+
27
+ # Create a new data splitter for K-fold cross validation.
28
+ #
29
+ # @param n_splits [Integer] The number of folds.
30
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
31
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
32
+ def initialize(n_splits: 3, shuffle: false, random_seed: nil)
33
+ @n_splits = n_splits
34
+ @shuffle = shuffle
35
+ @random_seed = random_seed
36
+ @random_seed ||= srand
37
+ @rng = Random.new(@random_seed)
38
+ end
39
+
40
+ # Generate data indices for K-fold cross validation.
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
43
+ # The dataset to be used to generate data indices for K-fold cross validation.
44
+ # @param y [Numo::Int32] (shape: [n_samples])
45
+ # The labels to be used to generate data indices for stratified K-fold cross validation.
46
+ # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
47
+ # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
48
+ def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
49
+ # Initialize and check some variables.
50
+ n_samples, = x.shape
51
+ unless @n_splits.between?(2, n_samples)
52
+ raise ArgumentError,
53
+ 'The value of n_splits must be not less than 2 and not more than the number of samples.'
54
+ end
55
+ # Splits dataset ids to each fold.
56
+ dataset_ids = [*0...n_samples]
57
+ dataset_ids.shuffle!(random: @rng) if @shuffle
58
+ fold_sets = Array.new(@n_splits) do |n|
59
+ n_fold_samples = n_samples / @n_splits
60
+ n_fold_samples += 1 if n < n_samples % @n_splits
61
+ dataset_ids.shift(n_fold_samples)
62
+ end
63
+ # Returns array consisting of the training and testing ids for each fold.
64
+ Array.new(@n_splits) do |n|
65
+ train_ids = fold_sets.select.with_index { |_, id| id != n }.flatten
66
+ test_ids = fold_sets[n]
67
+ [train_ids, test_ids]
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,81 @@
1
+ require 'svmkit/base/splitter'
2
+
3
+ module SVMKit
4
+ module ModelSelection
5
+ # StratifiedKFold is a class that generates the set of data indices for K-fold cross-validation.
6
+ # The proportion of the number of samples in each class will be almost equal for each fold.
7
+ #
8
+ # @example
9
+ # kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 3, shuffle: true, random_seed: 1)
10
+ # kf.split(samples, labels).each do |train_ids, test_ids|
11
+ # train_samples = samples[train_ids, true]
12
+ # test_samples = samples[test_ids, true]
13
+ # ...
14
+ # end
15
+ #
16
+ class StratifiedKFold
17
+ include Base::Splitter
18
+
19
+ # Return the proportion of the test set to the dataset.
20
+ # @return [Boolean]
21
+ attr_reader :shuffle
22
+
23
+ # Return the random generator for shuffling the dataset.
24
+ # @return [Random]
25
+ attr_reader :rng
26
+
27
+ # Create a new data splitter for K-fold cross validation.
28
+ #
29
+ # @param n_splits [Integer] The number of folds.
30
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
31
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
32
+ def initialize(n_splits: 3, shuffle: false, random_seed: nil)
33
+ @n_splits = n_splits
34
+ @shuffle = shuffle
35
+ @random_seed = random_seed
36
+ @random_seed ||= srand
37
+ @rng = Random.new(@random_seed)
38
+ end
39
+
40
+ # Generate data indices for stratified K-fold cross validation.
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
43
+ # The dataset to be used to generate data indices for stratified K-fold cross validation.
44
+ # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
45
+ # @param y [Numo::Int32] (shape: [n_samples])
46
+ # The labels to be used to generate data indices for stratified K-fold cross validation.
47
+ # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
48
+ def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
49
+ # Check the number of samples in each class.
50
+ unless y.bincount.to_a.all? { |n_samples| @n_splits.between?(2, n_samples) }
51
+ raise ArgumentError,
52
+ 'The value of n_splits must be not less than 2 and not more than the number of samples in each class.'
53
+ end
54
+ # Splits dataset ids of each class to each fold.
55
+ fold_sets_each_class = y.to_a.uniq.map { |label| fold_sets(y, label) }
56
+ # Returns array consisting of the training and testing ids for each fold.
57
+ Array.new(@n_splits) { |fold_id| train_test_sets(fold_sets_each_class, fold_id) }
58
+ end
59
+
60
+ private
61
+
62
+ def fold_sets(y, label)
63
+ sample_ids = y.eq(label).where.to_a
64
+ sample_ids.shuffle!(random: @rng) if @shuffle
65
+ n_samples = sample_ids.size
66
+ Array.new(@n_splits) do |n|
67
+ n_fold_samples = n_samples / @n_splits
68
+ n_fold_samples += 1 if n < n_samples % @n_splits
69
+ sample_ids.shift(n_fold_samples)
70
+ end
71
+ end
72
+
73
+ def train_test_sets(fold_sets_each_class, fold_id)
74
+ train_test_sets_each_class = fold_sets_each_class.map do |folds|
75
+ folds.partition.with_index { |_, id| id != fold_id }.map(&:flatten)
76
+ end
77
+ train_test_sets_each_class.transpose.map(&:flatten)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -1,5 +1,5 @@
1
1
  # SVMKit is an experimental library of machine learning in Ruby.
2
2
  module SVMKit
3
3
  # @!visibility private
4
- VERSION = '0.2.1'.freeze
4
+ VERSION = '0.2.2'.freeze
5
5
  end
data/lib/svmkit.rb CHANGED
@@ -7,6 +7,7 @@ require 'svmkit/dataset'
7
7
  require 'svmkit/base/base_estimator'
8
8
  require 'svmkit/base/classifier'
9
9
  require 'svmkit/base/transformer'
10
+ require 'svmkit/base/splitter'
10
11
  require 'svmkit/kernel_approximation/rbf'
11
12
  require 'svmkit/linear_model/svc'
12
13
  require 'svmkit/linear_model/logistic_regression'
@@ -16,3 +17,5 @@ require 'svmkit/nearest_neighbors/k_neighbors_classifier'
16
17
  require 'svmkit/preprocessing/l2_normalizer'
17
18
  require 'svmkit/preprocessing/min_max_scaler'
18
19
  require 'svmkit/preprocessing/standard_scaler'
20
+ require 'svmkit/model_selection/k_fold'
21
+ require 'svmkit/model_selection/stratified_k_fold'
data/svmkit.gemspec CHANGED
@@ -3,11 +3,6 @@ lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'svmkit/version'
5
5
 
6
- SVMKit::DESCRIPTION = <<MSG
7
- SVMKit is a library for machine learninig in Ruby.
8
- SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
9
- However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
10
- MSG
11
6
 
12
7
  Gem::Specification.new do |spec|
13
8
  spec.name = 'svmkit'
@@ -15,8 +10,14 @@ Gem::Specification.new do |spec|
15
10
  spec.authors = ['yoshoku']
16
11
  spec.email = ['yoshoku@outlook.com']
17
12
 
18
- spec.summary = %q{SVMKit is an experimental library of machine learning in Ruby.}
19
- spec.description = SVMKit::DESCRIPTION
13
+ spec.summary = <<MSG
14
+ SVMKit is an experimental library of machine learning in Ruby.
15
+ MSG
16
+ spec.description = <<MSG
17
+ SVMKit is a library for machine learninig in Ruby.
18
+ SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
19
+ However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
20
+ MSG
20
21
  spec.homepage = 'https://github.com/yoshoku/svmkit'
21
22
  spec.license = 'BSD-2-Clause'
22
23
 
@@ -29,12 +30,12 @@ Gem::Specification.new do |spec|
29
30
 
30
31
  spec.required_ruby_version = '>= 2.1'
31
32
 
32
- spec.add_runtime_dependency 'numo-narray', '~> 0.9.0.5'
33
+ spec.add_runtime_dependency 'numo-narray', '~> 0.9.0'
33
34
 
34
- spec.add_development_dependency 'bundler', '~> 1.15'
35
- spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'bundler', '~> 1.16'
36
+ spec.add_development_dependency 'rake', '~> 12.0'
36
37
  spec.add_development_dependency 'rspec', '~> 3.0'
37
- spec.add_development_dependency 'simplecov', '~> 0.15.1'
38
+ spec.add_development_dependency 'simplecov', '~> 0.15'
38
39
 
39
40
  spec.post_install_message = <<MSG
40
41
  *************************************************************************
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-12-03 00:00:00.000000000 Z
11
+ date: 2018-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -16,42 +16,42 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.0.5
19
+ version: 0.9.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.0.5
26
+ version: 0.9.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.15'
33
+ version: '1.16'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.15'
40
+ version: '1.16'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '10.0'
47
+ version: '12.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '10.0'
54
+ version: '12.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 0.15.1
75
+ version: '0.15'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 0.15.1
82
+ version: '0.15'
83
83
  description: |
84
84
  SVMKit is a library for machine learninig in Ruby.
85
85
  SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
@@ -105,12 +105,15 @@ files:
105
105
  - lib/svmkit.rb
106
106
  - lib/svmkit/base/base_estimator.rb
107
107
  - lib/svmkit/base/classifier.rb
108
+ - lib/svmkit/base/splitter.rb
108
109
  - lib/svmkit/base/transformer.rb
109
110
  - lib/svmkit/dataset.rb
110
111
  - lib/svmkit/kernel_approximation/rbf.rb
111
112
  - lib/svmkit/kernel_machine/kernel_svc.rb
112
113
  - lib/svmkit/linear_model/logistic_regression.rb
113
114
  - lib/svmkit/linear_model/svc.rb
115
+ - lib/svmkit/model_selection/k_fold.rb
116
+ - lib/svmkit/model_selection/stratified_k_fold.rb
114
117
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
115
118
  - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
116
119
  - lib/svmkit/pairwise_metric.rb
@@ -145,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
148
  version: '0'
146
149
  requirements: []
147
150
  rubyforge_project:
148
- rubygems_version: 2.2.5
151
+ rubygems_version: 2.4.5.4
149
152
  signing_key:
150
153
  specification_version: 4
151
154
  summary: SVMKit is an experimental library of machine learning in Ruby.