rumale 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +6 -0
- data/README.md +7 -0
- data/lib/rumale.rb +3 -0
- data/lib/rumale/model_selection/k_fold.rb +4 -0
- data/lib/rumale/model_selection/shuffle_split.rb +91 -0
- data/lib/rumale/model_selection/stratified_k_fold.rb +4 -0
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +115 -0
- data/lib/rumale/optimizer/adam.rb +77 -0
- data/lib/rumale/version.rb +2 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dba389e77a984b46e5352a2b4aae15f8eec2362d
|
4
|
+
data.tar.gz: 2eab0f18fc0e4b16af317bfa7b81db8203c62a20
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 034b0fc6f79ed66af3a50d025e66f17a3815c0c0e0634bd3eccec19546d585b17f158e376700aafa3ae89d52a895efefd79ff048d61b9f87dabe51f72393b75f
|
7
|
+
data.tar.gz: 4124f95f72392af658b342d7c21526717417de246ce99f4ee857cc4d26e799dc4a7ea0bbb59aa45c6e8621178ee84a4a4ead426aa79f09e62bf903e273cdf05b
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 0.8.2
|
2
|
+
- Add class for Adam optimizer.
|
3
|
+
- Add data splitter classes for random permutation cross validation.
|
4
|
+
- Add accessor method for number of splits to K-fold splitter classes.
|
5
|
+
- Add execution result of example script on README ([#3](https://github.com/yoshoku/rumale/pull/3)).
|
6
|
+
|
1
7
|
# 0.8.1
|
2
8
|
- Add some evaluator classes.
|
3
9
|
- MeanSquaredLogError
|
data/README.md
CHANGED
@@ -121,6 +121,13 @@ mean_logloss = report[:test_score].inject(:+) / kf.n_splits
|
|
121
121
|
puts("5-CV mean log-loss: %.3f" % mean_logloss)
|
122
122
|
```
|
123
123
|
|
124
|
+
Execution of the above scripts result in the following.
|
125
|
+
|
126
|
+
```bash
|
127
|
+
$ ruby cross_validation.rb
|
128
|
+
5-CV mean log-loss: 0.476
|
129
|
+
```
|
130
|
+
|
124
131
|
### Example 3. Pipeline
|
125
132
|
|
126
133
|
```ruby
|
data/lib/rumale.rb
CHANGED
@@ -18,6 +18,7 @@ require 'rumale/base/splitter'
|
|
18
18
|
require 'rumale/base/evaluator'
|
19
19
|
require 'rumale/optimizer/sgd'
|
20
20
|
require 'rumale/optimizer/rmsprop'
|
21
|
+
require 'rumale/optimizer/adam'
|
21
22
|
require 'rumale/optimizer/nadam'
|
22
23
|
require 'rumale/optimizer/yellow_fin'
|
23
24
|
require 'rumale/pipeline/pipeline'
|
@@ -56,6 +57,8 @@ require 'rumale/preprocessing/label_encoder'
|
|
56
57
|
require 'rumale/preprocessing/one_hot_encoder'
|
57
58
|
require 'rumale/model_selection/k_fold'
|
58
59
|
require 'rumale/model_selection/stratified_k_fold'
|
60
|
+
require 'rumale/model_selection/shuffle_split'
|
61
|
+
require 'rumale/model_selection/stratified_shuffle_split'
|
59
62
|
require 'rumale/model_selection/cross_validation'
|
60
63
|
require 'rumale/model_selection/grid_search_cv'
|
61
64
|
require 'rumale/evaluation_measure/accuracy'
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/splitter'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module ModelSelection
|
7
|
+
# ShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# ss = Rumale::ModelSelection::ShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
|
11
|
+
# ss.split(samples, labels).each do |train_ids, test_ids|
|
12
|
+
# train_samples = samples[train_ids, true]
|
13
|
+
# test_samples = samples[test_ids, true]
|
14
|
+
# ...
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
class ShuffleSplit
|
18
|
+
include Base::Splitter
|
19
|
+
|
20
|
+
# Return the number of folds.
|
21
|
+
# @return [Integer]
|
22
|
+
attr_reader :n_splits
|
23
|
+
|
24
|
+
# Return the random generator for shuffling the dataset.
|
25
|
+
# @return [Random]
|
26
|
+
attr_reader :rng
|
27
|
+
|
28
|
+
# Create a new data splitter for random permutation cross validation.
|
29
|
+
#
|
30
|
+
# @param n_splits [Integer] The number of folds.
|
31
|
+
# @param test_size [Float] The ratio of number of samples for test data.
|
32
|
+
# @param train_size [Float] The ratio of number of samples for train data.
|
33
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
|
+
def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
|
35
|
+
check_params_integer(n_splits: n_splits)
|
36
|
+
check_params_float(test_size: test_size)
|
37
|
+
check_params_type_or_nil(Float, train_size: train_size)
|
38
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
39
|
+
check_params_positive(n_splits: n_splits)
|
40
|
+
check_params_positive(test_size: test_size)
|
41
|
+
check_params_positive(train_size: train_size) unless train_size.nil?
|
42
|
+
@n_splits = n_splits
|
43
|
+
@test_size = test_size
|
44
|
+
@train_size = train_size
|
45
|
+
@random_seed = random_seed
|
46
|
+
@random_seed ||= srand
|
47
|
+
@rng = Random.new(@random_seed)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Generate data indices for random permutation cross validation.
|
51
|
+
#
|
52
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
53
|
+
# The dataset to be used to generate data indices for random permutation cross validation.
|
54
|
+
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
55
|
+
def split(x, _y = nil)
|
56
|
+
check_sample_array(x)
|
57
|
+
# Initialize and check some variables.
|
58
|
+
n_samples = x.shape[0]
|
59
|
+
n_test_samples = (@test_size * n_samples).to_i
|
60
|
+
n_train_samples = @train_size.nil? ? n_samples - n_test_samples : (@train_size * n_samples).to_i
|
61
|
+
unless @n_splits.between?(1, n_samples)
|
62
|
+
raise ArgumentError,
|
63
|
+
'The value of n_splits must be not less than 1 and not more than the number of samples.'
|
64
|
+
end
|
65
|
+
unless n_test_samples.between?(1, n_samples)
|
66
|
+
raise RangeError,
|
67
|
+
'The number of sample in test split must be not less than 1 and not more than the number of samples.'
|
68
|
+
end
|
69
|
+
unless n_train_samples.between?(1, n_samples)
|
70
|
+
raise RangeError,
|
71
|
+
'The number of sample in train split must be not less than 1 and not more than the number of samples.'
|
72
|
+
end
|
73
|
+
if (n_test_samples + n_train_samples) > n_samples
|
74
|
+
raise RangeError,
|
75
|
+
'The total number of samples in test split and train split must be not more than the number of samples.'
|
76
|
+
end
|
77
|
+
# Returns array consisting of the training and testing ids for each fold.
|
78
|
+
dataset_ids = [*0...n_samples]
|
79
|
+
Array.new(@n_splits) do
|
80
|
+
test_ids = dataset_ids.sample(n_test_samples, random: @rng)
|
81
|
+
train_ids = if @train_size.nil?
|
82
|
+
dataset_ids - test_ids
|
83
|
+
else
|
84
|
+
(dataset_ids - test_ids).sample(n_train_samples, random: @rng)
|
85
|
+
end
|
86
|
+
[train_ids, test_ids]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -18,6 +18,10 @@ module Rumale
|
|
18
18
|
class StratifiedKFold
|
19
19
|
include Base::Splitter
|
20
20
|
|
21
|
+
# Return the number of folds.
|
22
|
+
# @return [Integer]
|
23
|
+
attr_reader :n_splits
|
24
|
+
|
21
25
|
# Return the flag indicating whether to shuffle the dataset.
|
22
26
|
# @return [Boolean]
|
23
27
|
attr_reader :shuffle
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/splitter'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module ModelSelection
|
7
|
+
# StratifiedShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
|
8
|
+
# The proportion of the number of samples in each class will be almost equal for each fold.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# ss = Rumale::ModelSelection::StratifiedShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
|
12
|
+
# ss.split(samples, labels).each do |train_ids, test_ids|
|
13
|
+
# train_samples = samples[train_ids, true]
|
14
|
+
# test_samples = samples[test_ids, true]
|
15
|
+
# ...
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
class StratifiedShuffleSplit
|
19
|
+
include Base::Splitter
|
20
|
+
|
21
|
+
# Return the number of folds.
|
22
|
+
# @return [Integer]
|
23
|
+
attr_reader :n_splits
|
24
|
+
|
25
|
+
# Return the random generator for shuffling the dataset.
|
26
|
+
# @return [Random]
|
27
|
+
attr_reader :rng
|
28
|
+
|
29
|
+
# Create a new data splitter for random permutation cross validation.
|
30
|
+
#
|
31
|
+
# @param n_splits [Integer] The number of folds.
|
32
|
+
# @param test_size [Float] The ratio of number of samples for test data.
|
33
|
+
# @param train_size [Float] The ratio of number of samples for train data.
|
34
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
35
|
+
def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
|
36
|
+
check_params_integer(n_splits: n_splits)
|
37
|
+
check_params_float(test_size: test_size)
|
38
|
+
check_params_type_or_nil(Float, train_size: train_size)
|
39
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
40
|
+
check_params_positive(n_splits: n_splits)
|
41
|
+
check_params_positive(test_size: test_size)
|
42
|
+
check_params_positive(train_size: train_size) unless train_size.nil?
|
43
|
+
@n_splits = n_splits
|
44
|
+
@test_size = test_size
|
45
|
+
@train_size = train_size
|
46
|
+
@random_seed = random_seed
|
47
|
+
@random_seed ||= srand
|
48
|
+
@rng = Random.new(@random_seed)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Generate data indices for stratified random permutation cross validation.
|
52
|
+
#
|
53
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
54
|
+
# The dataset to be used to generate data indices for stratified random permutation cross validation.
|
55
|
+
# This argument exists to unify the interface between the K-fold methods, it is not used in the method.
|
56
|
+
# @param y [Numo::Int32] (shape: [n_samples])
|
57
|
+
# The labels to be used to generate data indices for stratified random permutation cross validation.
|
58
|
+
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
59
|
+
def split(x, y)
|
60
|
+
check_sample_array(x)
|
61
|
+
check_label_array(y)
|
62
|
+
check_sample_label_size(x, y)
|
63
|
+
# Initialize and check some variables.
|
64
|
+
train_sz = @train_size.nil? ? 1.0 - @test_size : @train_size
|
65
|
+
# Check the number of samples in each class.
|
66
|
+
unless valid_n_splits?(y)
|
67
|
+
raise ArgumentError,
|
68
|
+
'The value of n_splits must be not less than 1 and not more than the number of samples in each class.'
|
69
|
+
end
|
70
|
+
unless enough_data_size_each_class?(y, @test_size)
|
71
|
+
raise RangeError,
|
72
|
+
'The number of sample in test split must be not less than 1 and not more than the number of samples in each class.'
|
73
|
+
end
|
74
|
+
unless enough_data_size_each_class?(y, train_sz)
|
75
|
+
raise RangeError,
|
76
|
+
'The number of sample in train split must be not less than 1 and not more than the number of samples in each class.'
|
77
|
+
end
|
78
|
+
unless enough_data_size_each_class?(y, train_sz + @test_size)
|
79
|
+
raise RangeError,
|
80
|
+
'The total number of samples in test split and train split must be not more than the number of samples in each class.'
|
81
|
+
end
|
82
|
+
# Returns array consisting of the training and testing ids for each fold.
|
83
|
+
sample_ids_each_class = y.to_a.uniq.map { |label| y.eq(label).where.to_a }
|
84
|
+
Array.new(@n_splits) do
|
85
|
+
train_ids = []
|
86
|
+
test_ids = []
|
87
|
+
sample_ids_each_class.each do |sample_ids|
|
88
|
+
n_samples = sample_ids.size
|
89
|
+
n_test_samples = (@test_size * n_samples).to_i
|
90
|
+
n_train_samples = (train_sz * n_samples).to_i
|
91
|
+
test_ids += sample_ids.sample(n_test_samples, random: @rng)
|
92
|
+
train_ids += if @train_size.nil?
|
93
|
+
sample_ids - test_ids
|
94
|
+
else
|
95
|
+
(sample_ids - test_ids).sample(n_train_samples, random: @rng)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
[train_ids, test_ids]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def valid_n_splits?(y)
|
105
|
+
y.to_a.uniq.map { |label| y.eq(label).where.size }.all? { |n_samples| @n_splits.between?(1, n_samples) }
|
106
|
+
end
|
107
|
+
|
108
|
+
def enough_data_size_each_class?(y, data_size)
|
109
|
+
y.to_a.uniq.map { |label| y.eq(label).where.size }.all? do |n_samples|
|
110
|
+
(data_size * n_samples).to_i.between?(1, n_samples)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Optimizer
|
8
|
+
# Adam is a class that implements Adam optimizer.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# optimizer = Rumale::Optimizer::Adam.new(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
|
12
|
+
# estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
|
13
|
+
# estimator.fit(samples, values)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - D P. Kingma and J. Ba, "Adam: A Method for Stochastic Optimization," Proc. ICLR'15, 2015.
|
17
|
+
class Adam
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Validation
|
20
|
+
|
21
|
+
# Create a new optimizer with Adam
|
22
|
+
#
|
23
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
24
|
+
# @param decay1 [Float] The smoothing parameter for the first moment.
|
25
|
+
# @param decay2 [Float] The smoothing parameter for the second moment.
|
26
|
+
def initialize(learning_rate: 0.001, decay1: 0.9, decay2: 0.999)
|
27
|
+
check_params_float(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
28
|
+
check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
29
|
+
@params = {}
|
30
|
+
@params[:learning_rate] = learning_rate
|
31
|
+
@params[:decay1] = decay1
|
32
|
+
@params[:decay2] = decay2
|
33
|
+
@fst_moment = nil
|
34
|
+
@sec_moment = nil
|
35
|
+
@iter = 0
|
36
|
+
end
|
37
|
+
|
38
|
+
# Calculate the updated weight with Nadam adaptive learning rate.
|
39
|
+
#
|
40
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
41
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
42
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
43
|
+
def call(weight, gradient)
|
44
|
+
@fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
45
|
+
@sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
46
|
+
|
47
|
+
@iter += 1
|
48
|
+
|
49
|
+
@fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
|
50
|
+
@sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
|
51
|
+
nm_fst_moment = @fst_moment / (1.0 - @params[:decay1]**@iter)
|
52
|
+
nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
|
53
|
+
|
54
|
+
weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Dump marshal data.
|
58
|
+
# @return [Hash] The marshal data.
|
59
|
+
def marshal_dump
|
60
|
+
{ params: @params,
|
61
|
+
fst_moment: @fst_moment,
|
62
|
+
sec_moment: @sec_moment,
|
63
|
+
iter: @iter }
|
64
|
+
end
|
65
|
+
|
66
|
+
# Load marshal data.
|
67
|
+
# @return [nil]
|
68
|
+
def marshal_load(obj)
|
69
|
+
@params = obj[:params]
|
70
|
+
@fst_moment = obj[:fst_moment]
|
71
|
+
@sec_moment = obj[:sec_moment]
|
72
|
+
@iter = obj[:iter]
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -152,11 +152,14 @@ files:
|
|
152
152
|
- lib/rumale/model_selection/cross_validation.rb
|
153
153
|
- lib/rumale/model_selection/grid_search_cv.rb
|
154
154
|
- lib/rumale/model_selection/k_fold.rb
|
155
|
+
- lib/rumale/model_selection/shuffle_split.rb
|
155
156
|
- lib/rumale/model_selection/stratified_k_fold.rb
|
157
|
+
- lib/rumale/model_selection/stratified_shuffle_split.rb
|
156
158
|
- lib/rumale/multiclass/one_vs_rest_classifier.rb
|
157
159
|
- lib/rumale/naive_bayes/naive_bayes.rb
|
158
160
|
- lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
|
159
161
|
- lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
|
162
|
+
- lib/rumale/optimizer/adam.rb
|
160
163
|
- lib/rumale/optimizer/nadam.rb
|
161
164
|
- lib/rumale/optimizer/rmsprop.rb
|
162
165
|
- lib/rumale/optimizer/sgd.rb
|