rumale 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dab9c67aa39f19e73859d41013363b4f3811142e
4
- data.tar.gz: 49b1d14b9261f2ede4dc97b4353efdf9032872d2
3
+ metadata.gz: dba389e77a984b46e5352a2b4aae15f8eec2362d
4
+ data.tar.gz: 2eab0f18fc0e4b16af317bfa7b81db8203c62a20
5
5
  SHA512:
6
- metadata.gz: 1d2b62e0660586f4ace811f06bdd73e9ff5adb877682a9ae53e29c76188b6fe1215d1305953d292668dcc4f0b5fba71399ad746e8976572a19bbd6a4c1153829
7
- data.tar.gz: ce075327208560af72f0b54d7113b39a23c98c1828a3f5c3f5b28b3d987be3d81af465aae943aa27f2883cc671337b1952b4f8d8981a970c3727e0c94affbef5
6
+ metadata.gz: 034b0fc6f79ed66af3a50d025e66f17a3815c0c0e0634bd3eccec19546d585b17f158e376700aafa3ae89d52a895efefd79ff048d61b9f87dabe51f72393b75f
7
+ data.tar.gz: 4124f95f72392af658b342d7c21526717417de246ce99f4ee857cc4d26e799dc4a7ea0bbb59aa45c6e8621178ee84a4a4ead426aa79f09e62bf903e273cdf05b
@@ -4,6 +4,11 @@ AllCops:
4
4
  TargetRubyVersion: 2.3
5
5
  DisplayCopNames: true
6
6
  DisplayStyleGuide: true
7
+ Exclude:
8
+ - 'bin/*'
9
+ - 'rumale.gemspec'
10
+ - 'Rakefile'
11
+ - 'Gemfile'
7
12
 
8
13
  Documentation:
9
14
  Enabled: false
@@ -1,3 +1,9 @@
1
+ # 0.8.2
2
+ - Add class for Adam optimizer.
3
+ - Add data splitter classes for random permutation cross validation.
4
+ - Add accessor method for number of splits to K-fold splitter classes.
5
+ - Add execution result of example script on README ([#3](https://github.com/yoshoku/rumale/pull/3)).
6
+
1
7
  # 0.8.1
2
8
  - Add some evaluator classes.
3
9
  - MeanSquaredLogError
data/README.md CHANGED
@@ -121,6 +121,13 @@ mean_logloss = report[:test_score].inject(:+) / kf.n_splits
121
121
  puts("5-CV mean log-loss: %.3f" % mean_logloss)
122
122
  ```
123
123
 
124
+ Execution of the above scripts result in the following.
125
+
126
+ ```bash
127
+ $ ruby cross_validation.rb
128
+ 5-CV mean log-loss: 0.476
129
+ ```
130
+
124
131
  ### Example 3. Pipeline
125
132
 
126
133
  ```ruby
@@ -18,6 +18,7 @@ require 'rumale/base/splitter'
18
18
  require 'rumale/base/evaluator'
19
19
  require 'rumale/optimizer/sgd'
20
20
  require 'rumale/optimizer/rmsprop'
21
+ require 'rumale/optimizer/adam'
21
22
  require 'rumale/optimizer/nadam'
22
23
  require 'rumale/optimizer/yellow_fin'
23
24
  require 'rumale/pipeline/pipeline'
@@ -56,6 +57,8 @@ require 'rumale/preprocessing/label_encoder'
56
57
  require 'rumale/preprocessing/one_hot_encoder'
57
58
  require 'rumale/model_selection/k_fold'
58
59
  require 'rumale/model_selection/stratified_k_fold'
60
+ require 'rumale/model_selection/shuffle_split'
61
+ require 'rumale/model_selection/stratified_shuffle_split'
59
62
  require 'rumale/model_selection/cross_validation'
60
63
  require 'rumale/model_selection/grid_search_cv'
61
64
  require 'rumale/evaluation_measure/accuracy'
@@ -18,6 +18,10 @@ module Rumale
18
18
  class KFold
19
19
  include Base::Splitter
20
20
 
21
+ # Return the number of folds.
22
+ # @return [Integer]
23
+ attr_reader :n_splits
24
+
21
25
  # Return the flag indicating whether to shuffle the dataset.
22
26
  # @return [Boolean]
23
27
  attr_reader :shuffle
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/splitter'
4
+
5
+ module Rumale
6
+ module ModelSelection
7
+ # ShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
8
+ #
9
+ # @example
10
+ # ss = Rumale::ModelSelection::ShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
11
+ # ss.split(samples, labels).each do |train_ids, test_ids|
12
+ # train_samples = samples[train_ids, true]
13
+ # test_samples = samples[test_ids, true]
14
+ # ...
15
+ # end
16
+ #
17
+ class ShuffleSplit
18
+ include Base::Splitter
19
+
20
+ # Return the number of folds.
21
+ # @return [Integer]
22
+ attr_reader :n_splits
23
+
24
+ # Return the random generator for shuffling the dataset.
25
+ # @return [Random]
26
+ attr_reader :rng
27
+
28
+ # Create a new data splitter for random permutation cross validation.
29
+ #
30
+ # @param n_splits [Integer] The number of folds.
31
+ # @param test_size [Float] The ratio of number of samples for test data.
32
+ # @param train_size [Float] The ratio of number of samples for train data.
33
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
34
+ def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
35
+ check_params_integer(n_splits: n_splits)
36
+ check_params_float(test_size: test_size)
37
+ check_params_type_or_nil(Float, train_size: train_size)
38
+ check_params_type_or_nil(Integer, random_seed: random_seed)
39
+ check_params_positive(n_splits: n_splits)
40
+ check_params_positive(test_size: test_size)
41
+ check_params_positive(train_size: train_size) unless train_size.nil?
42
+ @n_splits = n_splits
43
+ @test_size = test_size
44
+ @train_size = train_size
45
+ @random_seed = random_seed
46
+ @random_seed ||= srand
47
+ @rng = Random.new(@random_seed)
48
+ end
49
+
50
+ # Generate data indices for random permutation cross validation.
51
+ #
52
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
53
+ # The dataset to be used to generate data indices for random permutation cross validation.
54
+ # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
55
+ def split(x, _y = nil)
56
+ check_sample_array(x)
57
+ # Initialize and check some variables.
58
+ n_samples = x.shape[0]
59
+ n_test_samples = (@test_size * n_samples).to_i
60
+ n_train_samples = @train_size.nil? ? n_samples - n_test_samples : (@train_size * n_samples).to_i
61
+ unless @n_splits.between?(1, n_samples)
62
+ raise ArgumentError,
63
+ 'The value of n_splits must be not less than 1 and not more than the number of samples.'
64
+ end
65
+ unless n_test_samples.between?(1, n_samples)
66
+ raise RangeError,
67
+ 'The number of sample in test split must be not less than 1 and not more than the number of samples.'
68
+ end
69
+ unless n_train_samples.between?(1, n_samples)
70
+ raise RangeError,
71
+ 'The number of sample in train split must be not less than 1 and not more than the number of samples.'
72
+ end
73
+ if (n_test_samples + n_train_samples) > n_samples
74
+ raise RangeError,
75
+ 'The total number of samples in test split and train split must be not more than the number of samples.'
76
+ end
77
+ # Returns array consisting of the training and testing ids for each fold.
78
+ dataset_ids = [*0...n_samples]
79
+ Array.new(@n_splits) do
80
+ test_ids = dataset_ids.sample(n_test_samples, random: @rng)
81
+ train_ids = if @train_size.nil?
82
+ dataset_ids - test_ids
83
+ else
84
+ (dataset_ids - test_ids).sample(n_train_samples, random: @rng)
85
+ end
86
+ [train_ids, test_ids]
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -18,6 +18,10 @@ module Rumale
18
18
  class StratifiedKFold
19
19
  include Base::Splitter
20
20
 
21
+ # Return the number of folds.
22
+ # @return [Integer]
23
+ attr_reader :n_splits
24
+
21
25
  # Return the flag indicating whether to shuffle the dataset.
22
26
  # @return [Boolean]
23
27
  attr_reader :shuffle
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/splitter'
4
+
5
+ module Rumale
6
+ module ModelSelection
7
+ # StratifiedShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
8
+ # The proportion of the number of samples in each class will be almost equal for each fold.
9
+ #
10
+ # @example
11
+ # ss = Rumale::ModelSelection::StratifiedShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
12
+ # ss.split(samples, labels).each do |train_ids, test_ids|
13
+ # train_samples = samples[train_ids, true]
14
+ # test_samples = samples[test_ids, true]
15
+ # ...
16
+ # end
17
+ #
18
+ class StratifiedShuffleSplit
19
+ include Base::Splitter
20
+
21
+ # Return the number of folds.
22
+ # @return [Integer]
23
+ attr_reader :n_splits
24
+
25
+ # Return the random generator for shuffling the dataset.
26
+ # @return [Random]
27
+ attr_reader :rng
28
+
29
+ # Create a new data splitter for random permutation cross validation.
30
+ #
31
+ # @param n_splits [Integer] The number of folds.
32
+ # @param test_size [Float] The ratio of number of samples for test data.
33
+ # @param train_size [Float] The ratio of number of samples for train data.
34
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
35
+ def initialize(n_splits: 3, test_size: 0.1, train_size: nil, random_seed: nil)
36
+ check_params_integer(n_splits: n_splits)
37
+ check_params_float(test_size: test_size)
38
+ check_params_type_or_nil(Float, train_size: train_size)
39
+ check_params_type_or_nil(Integer, random_seed: random_seed)
40
+ check_params_positive(n_splits: n_splits)
41
+ check_params_positive(test_size: test_size)
42
+ check_params_positive(train_size: train_size) unless train_size.nil?
43
+ @n_splits = n_splits
44
+ @test_size = test_size
45
+ @train_size = train_size
46
+ @random_seed = random_seed
47
+ @random_seed ||= srand
48
+ @rng = Random.new(@random_seed)
49
+ end
50
+
51
+ # Generate data indices for stratified random permutation cross validation.
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
54
+ # The dataset to be used to generate data indices for stratified random permutation cross validation.
55
+ # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
56
+ # @param y [Numo::Int32] (shape: [n_samples])
57
+ # The labels to be used to generate data indices for stratified random permutation cross validation.
58
+ # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
59
+ def split(x, y)
60
+ check_sample_array(x)
61
+ check_label_array(y)
62
+ check_sample_label_size(x, y)
63
+ # Initialize and check some variables.
64
+ train_sz = @train_size.nil? ? 1.0 - @test_size : @train_size
65
+ # Check the number of samples in each class.
66
+ unless valid_n_splits?(y)
67
+ raise ArgumentError,
68
+ 'The value of n_splits must be not less than 1 and not more than the number of samples in each class.'
69
+ end
70
+ unless enough_data_size_each_class?(y, @test_size)
71
+ raise RangeError,
72
+ 'The number of sample in test split must be not less than 1 and not more than the number of samples in each class.'
73
+ end
74
+ unless enough_data_size_each_class?(y, train_sz)
75
+ raise RangeError,
76
+ 'The number of sample in train split must be not less than 1 and not more than the number of samples in each class.'
77
+ end
78
+ unless enough_data_size_each_class?(y, train_sz + @test_size)
79
+ raise RangeError,
80
+ 'The total number of samples in test split and train split must be not more than the number of samples in each class.'
81
+ end
82
+ # Returns array consisting of the training and testing ids for each fold.
83
+ sample_ids_each_class = y.to_a.uniq.map { |label| y.eq(label).where.to_a }
84
+ Array.new(@n_splits) do
85
+ train_ids = []
86
+ test_ids = []
87
+ sample_ids_each_class.each do |sample_ids|
88
+ n_samples = sample_ids.size
89
+ n_test_samples = (@test_size * n_samples).to_i
90
+ n_train_samples = (train_sz * n_samples).to_i
91
+ test_ids += sample_ids.sample(n_test_samples, random: @rng)
92
+ train_ids += if @train_size.nil?
93
+ sample_ids - test_ids
94
+ else
95
+ (sample_ids - test_ids).sample(n_train_samples, random: @rng)
96
+ end
97
+ end
98
+ [train_ids, test_ids]
99
+ end
100
+ end
101
+
102
+ private
103
+
104
+ def valid_n_splits?(y)
105
+ y.to_a.uniq.map { |label| y.eq(label).where.size }.all? { |n_samples| @n_splits.between?(1, n_samples) }
106
+ end
107
+
108
+ def enough_data_size_each_class?(y, data_size)
109
+ y.to_a.uniq.map { |label| y.eq(label).where.size }.all? do |n_samples|
110
+ (data_size * n_samples).to_i.between?(1, n_samples)
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/base_estimator'
5
+
6
+ module Rumale
7
+ module Optimizer
8
+ # Adam is a class that implements Adam optimizer.
9
+ #
10
+ # @example
11
+ # optimizer = Rumale::Optimizer::Adam.new(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
12
+ # estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
13
+ # estimator.fit(samples, values)
14
+ #
15
+ # *Reference*
16
+ # - D P. Kingma and J. Ba, "Adam: A Method for Stochastic Optimization," Proc. ICLR'15, 2015.
17
+ class Adam
18
+ include Base::BaseEstimator
19
+ include Validation
20
+
21
+ # Create a new optimizer with Adam
22
+ #
23
+ # @param learning_rate [Float] The initial value of learning rate.
24
+ # @param decay1 [Float] The smoothing parameter for the first moment.
25
+ # @param decay2 [Float] The smoothing parameter for the second moment.
26
+ def initialize(learning_rate: 0.001, decay1: 0.9, decay2: 0.999)
27
+ check_params_float(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
28
+ check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
29
+ @params = {}
30
+ @params[:learning_rate] = learning_rate
31
+ @params[:decay1] = decay1
32
+ @params[:decay2] = decay2
33
+ @fst_moment = nil
34
+ @sec_moment = nil
35
+ @iter = 0
36
+ end
37
+
38
+ # Calculate the updated weight with Nadam adaptive learning rate.
39
+ #
40
+ # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
41
+ # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
42
+ # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
43
+ def call(weight, gradient)
44
+ @fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
45
+ @sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
46
+
47
+ @iter += 1
48
+
49
+ @fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
50
+ @sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
51
+ nm_fst_moment = @fst_moment / (1.0 - @params[:decay1]**@iter)
52
+ nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
53
+
54
+ weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
55
+ end
56
+
57
+ # Dump marshal data.
58
+ # @return [Hash] The marshal data.
59
+ def marshal_dump
60
+ { params: @params,
61
+ fst_moment: @fst_moment,
62
+ sec_moment: @sec_moment,
63
+ iter: @iter }
64
+ end
65
+
66
+ # Load marshal data.
67
+ # @return [nil]
68
+ def marshal_load(obj)
69
+ @params = obj[:params]
70
+ @fst_moment = obj[:fst_moment]
71
+ @sec_moment = obj[:sec_moment]
72
+ @iter = obj[:iter]
73
+ nil
74
+ end
75
+ end
76
+ end
77
+ end
@@ -2,5 +2,6 @@
2
2
 
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
- VERSION = '0.8.1'
5
+ # The version of Rumale you are using.
6
+ VERSION = '0.8.2'
6
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.8.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-08 00:00:00.000000000 Z
11
+ date: 2019-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -152,11 +152,14 @@ files:
152
152
  - lib/rumale/model_selection/cross_validation.rb
153
153
  - lib/rumale/model_selection/grid_search_cv.rb
154
154
  - lib/rumale/model_selection/k_fold.rb
155
+ - lib/rumale/model_selection/shuffle_split.rb
155
156
  - lib/rumale/model_selection/stratified_k_fold.rb
157
+ - lib/rumale/model_selection/stratified_shuffle_split.rb
156
158
  - lib/rumale/multiclass/one_vs_rest_classifier.rb
157
159
  - lib/rumale/naive_bayes/naive_bayes.rb
158
160
  - lib/rumale/nearest_neighbors/k_neighbors_classifier.rb
159
161
  - lib/rumale/nearest_neighbors/k_neighbors_regressor.rb
162
+ - lib/rumale/optimizer/adam.rb
160
163
  - lib/rumale/optimizer/nadam.rb
161
164
  - lib/rumale/optimizer/rmsprop.rb
162
165
  - lib/rumale/optimizer/sgd.rb