svmkit 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +7 -2
- data/HISTORY.md +3 -0
- data/lib/svmkit/base/splitter.rb +16 -0
- data/lib/svmkit/model_selection/k_fold.rb +72 -0
- data/lib/svmkit/model_selection/stratified_k_fold.rb +81 -0
- data/lib/svmkit/version.rb +1 -1
- data/lib/svmkit.rb +3 -0
- data/svmkit.gemspec +12 -11
- metadata +14 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7c326db290b1847234f890914fe5d670a4b1d36
|
4
|
+
data.tar.gz: ee5a624c92bf6b35edcccf4df469f9f552c2174d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9256fc3d36e6247fae44ac1a14672eaf9b3ba414176b48c592be5aa8631d232dbaddba9f3884198e0ba751616a3017ad461da2fb7ec40ef26b9ab2b2417aadf5
|
7
|
+
data.tar.gz: e198dcbe0c7e782162a31e7131b961f619e76bf509b43b596299412ba5bb5ea16ea02e21bb2a79909d70bb230c81484083df94ec65db6e770e4e5adc712da174
|
data/.travis.yml
CHANGED
data/HISTORY.md
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
|
2
|
+
module SVMKit
|
3
|
+
module Base
|
4
|
+
# Module for all validation methods in SVMKit.
|
5
|
+
module Splitter
|
6
|
+
# Return the number of splits.
|
7
|
+
# @return [Integer]
|
8
|
+
attr_reader :n_splits
|
9
|
+
|
10
|
+
# An abstract method for splitting dataset.
|
11
|
+
def split
|
12
|
+
raise NoImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'svmkit/base/splitter'
|
2
|
+
|
3
|
+
module SVMKit
|
4
|
+
# This module consists of the classes for model validation techniques.
|
5
|
+
module ModelSelection
|
6
|
+
# KFold is a class that generates the set of data indices for K-fold cross-validation.
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
# kf = SVMKit::ModelSelection::KFold.new(n_splits: 3, shuffle: true, random_seed: 1)
|
10
|
+
# kf.split(samples, labels).each do |train_ids, test_ids|
|
11
|
+
# train_samples = samples[train_ids, true]
|
12
|
+
# test_samples = samples[test_ids, true]
|
13
|
+
# ...
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
class KFold
|
17
|
+
include Base::Splitter
|
18
|
+
|
19
|
+
# Return the proportion of the test set to the dataset.
|
20
|
+
# @return [Boolean]
|
21
|
+
attr_reader :shuffle
|
22
|
+
|
23
|
+
# Return the random generator for shuffling the dataset.
|
24
|
+
# @return [Random]
|
25
|
+
attr_reader :rng
|
26
|
+
|
27
|
+
# Create a new data splitter for K-fold cross validation.
|
28
|
+
#
|
29
|
+
# @param n_splits [Integer] The number of folds.
|
30
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
31
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
32
|
+
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
33
|
+
@n_splits = n_splits
|
34
|
+
@shuffle = shuffle
|
35
|
+
@random_seed = random_seed
|
36
|
+
@random_seed ||= srand
|
37
|
+
@rng = Random.new(@random_seed)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Generate data indices for K-fold cross validation.
|
41
|
+
#
|
42
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
43
|
+
# The dataset to be used to generate data indices for K-fold cross validation.
|
44
|
+
# @param y [Numo::Int32] (shape: [n_samples])
|
45
|
+
# The labels to be used to generate data indices for stratified K-fold cross validation.
|
46
|
+
# This argument exists to unify the interface between the K-fold methods, it is not used in the method.
|
47
|
+
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
48
|
+
def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
|
49
|
+
# Initialize and check some variables.
|
50
|
+
n_samples, = x.shape
|
51
|
+
unless @n_splits.between?(2, n_samples)
|
52
|
+
raise ArgumentError,
|
53
|
+
'The value of n_splits must be not less than 2 and not more than the number of samples.'
|
54
|
+
end
|
55
|
+
# Splits dataset ids to each fold.
|
56
|
+
dataset_ids = [*0...n_samples]
|
57
|
+
dataset_ids.shuffle!(random: @rng) if @shuffle
|
58
|
+
fold_sets = Array.new(@n_splits) do |n|
|
59
|
+
n_fold_samples = n_samples / @n_splits
|
60
|
+
n_fold_samples += 1 if n < n_samples % @n_splits
|
61
|
+
dataset_ids.shift(n_fold_samples)
|
62
|
+
end
|
63
|
+
# Returns array consisting of the training and testing ids for each fold.
|
64
|
+
Array.new(@n_splits) do |n|
|
65
|
+
train_ids = fold_sets.select.with_index { |_, id| id != n }.flatten
|
66
|
+
test_ids = fold_sets[n]
|
67
|
+
[train_ids, test_ids]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'svmkit/base/splitter'
|
2
|
+
|
3
|
+
module SVMKit
|
4
|
+
module ModelSelection
|
5
|
+
# StratifiedKFold is a class that generates the set of data indices for K-fold cross-validation.
|
6
|
+
# The proportion of the number of samples in each class will be almost equal for each fold.
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
# kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 3, shuffle: true, random_seed: 1)
|
10
|
+
# kf.split(samples, labels).each do |train_ids, test_ids|
|
11
|
+
# train_samples = samples[train_ids, true]
|
12
|
+
# test_samples = samples[test_ids, true]
|
13
|
+
# ...
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
class StratifiedKFold
|
17
|
+
include Base::Splitter
|
18
|
+
|
19
|
+
# Return the proportion of the test set to the dataset.
|
20
|
+
# @return [Boolean]
|
21
|
+
attr_reader :shuffle
|
22
|
+
|
23
|
+
# Return the random generator for shuffling the dataset.
|
24
|
+
# @return [Random]
|
25
|
+
attr_reader :rng
|
26
|
+
|
27
|
+
# Create a new data splitter for K-fold cross validation.
|
28
|
+
#
|
29
|
+
# @param n_splits [Integer] The number of folds.
|
30
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
31
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
32
|
+
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
33
|
+
@n_splits = n_splits
|
34
|
+
@shuffle = shuffle
|
35
|
+
@random_seed = random_seed
|
36
|
+
@random_seed ||= srand
|
37
|
+
@rng = Random.new(@random_seed)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Generate data indices for stratified K-fold cross validation.
|
41
|
+
#
|
42
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
43
|
+
# The dataset to be used to generate data indices for stratified K-fold cross validation.
|
44
|
+
# This argument exists to unify the interface between the K-fold methods, it is not used in the method.
|
45
|
+
# @param y [Numo::Int32] (shape: [n_samples])
|
46
|
+
# The labels to be used to generate data indices for stratified K-fold cross validation.
|
47
|
+
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
48
|
+
def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
|
49
|
+
# Check the number of samples in each class.
|
50
|
+
unless y.bincount.to_a.all? { |n_samples| @n_splits.between?(2, n_samples) }
|
51
|
+
raise ArgumentError,
|
52
|
+
'The value of n_splits must be not less than 2 and not more than the number of samples in each class.'
|
53
|
+
end
|
54
|
+
# Splits dataset ids of each class to each fold.
|
55
|
+
fold_sets_each_class = y.to_a.uniq.map { |label| fold_sets(y, label) }
|
56
|
+
# Returns array consisting of the training and testing ids for each fold.
|
57
|
+
Array.new(@n_splits) { |fold_id| train_test_sets(fold_sets_each_class, fold_id) }
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def fold_sets(y, label)
|
63
|
+
sample_ids = y.eq(label).where.to_a
|
64
|
+
sample_ids.shuffle!(random: @rng) if @shuffle
|
65
|
+
n_samples = sample_ids.size
|
66
|
+
Array.new(@n_splits) do |n|
|
67
|
+
n_fold_samples = n_samples / @n_splits
|
68
|
+
n_fold_samples += 1 if n < n_samples % @n_splits
|
69
|
+
sample_ids.shift(n_fold_samples)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def train_test_sets(fold_sets_each_class, fold_id)
|
74
|
+
train_test_sets_each_class = fold_sets_each_class.map do |folds|
|
75
|
+
folds.partition.with_index { |_, id| id != fold_id }.map(&:flatten)
|
76
|
+
end
|
77
|
+
train_test_sets_each_class.transpose.map(&:flatten)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/lib/svmkit.rb
CHANGED
@@ -7,6 +7,7 @@ require 'svmkit/dataset'
|
|
7
7
|
require 'svmkit/base/base_estimator'
|
8
8
|
require 'svmkit/base/classifier'
|
9
9
|
require 'svmkit/base/transformer'
|
10
|
+
require 'svmkit/base/splitter'
|
10
11
|
require 'svmkit/kernel_approximation/rbf'
|
11
12
|
require 'svmkit/linear_model/svc'
|
12
13
|
require 'svmkit/linear_model/logistic_regression'
|
@@ -16,3 +17,5 @@ require 'svmkit/nearest_neighbors/k_neighbors_classifier'
|
|
16
17
|
require 'svmkit/preprocessing/l2_normalizer'
|
17
18
|
require 'svmkit/preprocessing/min_max_scaler'
|
18
19
|
require 'svmkit/preprocessing/standard_scaler'
|
20
|
+
require 'svmkit/model_selection/k_fold'
|
21
|
+
require 'svmkit/model_selection/stratified_k_fold'
|
data/svmkit.gemspec
CHANGED
@@ -3,11 +3,6 @@ lib = File.expand_path('../lib', __FILE__)
|
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
4
|
require 'svmkit/version'
|
5
5
|
|
6
|
-
SVMKit::DESCRIPTION = <<MSG
|
7
|
-
SVMKit is a library for machine learninig in Ruby.
|
8
|
-
SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
|
9
|
-
However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
|
10
|
-
MSG
|
11
6
|
|
12
7
|
Gem::Specification.new do |spec|
|
13
8
|
spec.name = 'svmkit'
|
@@ -15,8 +10,14 @@ Gem::Specification.new do |spec|
|
|
15
10
|
spec.authors = ['yoshoku']
|
16
11
|
spec.email = ['yoshoku@outlook.com']
|
17
12
|
|
18
|
-
spec.summary =
|
19
|
-
|
13
|
+
spec.summary = <<MSG
|
14
|
+
SVMKit is an experimental library of machine learning in Ruby.
|
15
|
+
MSG
|
16
|
+
spec.description = <<MSG
|
17
|
+
SVMKit is a library for machine learninig in Ruby.
|
18
|
+
SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
|
19
|
+
However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
|
20
|
+
MSG
|
20
21
|
spec.homepage = 'https://github.com/yoshoku/svmkit'
|
21
22
|
spec.license = 'BSD-2-Clause'
|
22
23
|
|
@@ -29,12 +30,12 @@ Gem::Specification.new do |spec|
|
|
29
30
|
|
30
31
|
spec.required_ruby_version = '>= 2.1'
|
31
32
|
|
32
|
-
spec.add_runtime_dependency 'numo-narray', '~> 0.9.0
|
33
|
+
spec.add_runtime_dependency 'numo-narray', '~> 0.9.0'
|
33
34
|
|
34
|
-
spec.add_development_dependency 'bundler', '~> 1.
|
35
|
-
spec.add_development_dependency 'rake', '~>
|
35
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
36
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
36
37
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
37
|
-
spec.add_development_dependency 'simplecov', '~> 0.15
|
38
|
+
spec.add_development_dependency 'simplecov', '~> 0.15'
|
38
39
|
|
39
40
|
spec.post_install_message = <<MSG
|
40
41
|
*************************************************************************
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -16,42 +16,42 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.9.0
|
19
|
+
version: 0.9.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.9.0
|
26
|
+
version: 0.9.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1.
|
33
|
+
version: '1.16'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
40
|
+
version: '1.16'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '12.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '12.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: rspec
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,14 +72,14 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.15
|
75
|
+
version: '0.15'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.15
|
82
|
+
version: '0.15'
|
83
83
|
description: |
|
84
84
|
SVMKit is a library for machine learninig in Ruby.
|
85
85
|
SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
|
@@ -105,12 +105,15 @@ files:
|
|
105
105
|
- lib/svmkit.rb
|
106
106
|
- lib/svmkit/base/base_estimator.rb
|
107
107
|
- lib/svmkit/base/classifier.rb
|
108
|
+
- lib/svmkit/base/splitter.rb
|
108
109
|
- lib/svmkit/base/transformer.rb
|
109
110
|
- lib/svmkit/dataset.rb
|
110
111
|
- lib/svmkit/kernel_approximation/rbf.rb
|
111
112
|
- lib/svmkit/kernel_machine/kernel_svc.rb
|
112
113
|
- lib/svmkit/linear_model/logistic_regression.rb
|
113
114
|
- lib/svmkit/linear_model/svc.rb
|
115
|
+
- lib/svmkit/model_selection/k_fold.rb
|
116
|
+
- lib/svmkit/model_selection/stratified_k_fold.rb
|
114
117
|
- lib/svmkit/multiclass/one_vs_rest_classifier.rb
|
115
118
|
- lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
|
116
119
|
- lib/svmkit/pairwise_metric.rb
|
@@ -145,7 +148,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
148
|
version: '0'
|
146
149
|
requirements: []
|
147
150
|
rubyforge_project:
|
148
|
-
rubygems_version: 2.
|
151
|
+
rubygems_version: 2.4.5.4
|
149
152
|
signing_key:
|
150
153
|
specification_version: 4
|
151
154
|
summary: SVMKit is an experimental library of machine learning in Ruby.
|