rumale 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/lib/rumale/base/base_estimator.rb +16 -0
- data/lib/rumale/ensemble/extra_trees_classifier.rb +28 -13
- data/lib/rumale/ensemble/extra_trees_regressor.rb +28 -13
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +83 -34
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +58 -30
- data/lib/rumale/ensemble/random_forest_classifier.rb +66 -37
- data/lib/rumale/ensemble/random_forest_regressor.rb +45 -15
- data/lib/rumale/kernel_machine/kernel_svc.rb +37 -11
- data/lib/rumale/linear_model/base_linear_model.rb +5 -1
- data/lib/rumale/linear_model/lasso.rb +13 -4
- data/lib/rumale/linear_model/linear_regression.rb +13 -3
- data/lib/rumale/linear_model/logistic_regression.rb +25 -6
- data/lib/rumale/linear_model/ridge.rb +13 -3
- data/lib/rumale/linear_model/svc.rb +40 -18
- data/lib/rumale/linear_model/svr.rb +12 -3
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +6 -1
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +26 -7
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +12 -3
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -0
- metadata +16 -2
@@ -38,12 +38,17 @@ module Rumale
|
|
38
38
|
# @param batch_size [Integer] The size of the mini batches.
|
39
39
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
40
40
|
# If nil is given, Nadam is used.
|
41
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
42
|
+
# If nil is given, the method does not execute in parallel.
|
43
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
44
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
41
45
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
42
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
46
|
+
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
47
|
+
n_jobs: nil, random_seed: nil)
|
43
48
|
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
44
49
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
45
50
|
check_params_boolean(fit_bias: fit_bias)
|
46
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
51
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
47
52
|
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
48
53
|
super
|
49
54
|
end
|
@@ -64,7 +69,12 @@ module Rumale
|
|
64
69
|
if n_outputs > 1
|
65
70
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
66
71
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
67
|
-
|
72
|
+
if enable_parallel?
|
73
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
74
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
75
|
+
else
|
76
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
77
|
+
end
|
68
78
|
else
|
69
79
|
@weight_vec, @bias_term = partial_fit(x, y)
|
70
80
|
end
|
@@ -48,13 +48,17 @@ module Rumale
|
|
48
48
|
# @param probability [Boolean] The flag indicating whether to perform probability estimation.
|
49
49
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
50
50
|
# If nil is given, Nadam is used.
|
51
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
52
|
+
# If nil is given, the methods do not execute in parallel.
|
53
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
54
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
51
55
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
52
56
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
53
|
-
max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
|
57
|
+
max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, n_jobs: nil, random_seed: nil)
|
54
58
|
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
55
59
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
56
60
|
check_params_boolean(fit_bias: fit_bias, probability: probability)
|
57
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
61
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
58
62
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
59
63
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
60
64
|
keywd_args.delete(:probability)
|
@@ -79,27 +83,30 @@ module Rumale
|
|
79
83
|
n_features = x.shape[1]
|
80
84
|
|
81
85
|
if n_classes > 2
|
86
|
+
# initialize model.
|
82
87
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
83
88
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
84
89
|
@prob_param = Numo::DFloat.zeros(n_classes, 2)
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
90
|
+
# fit model.
|
91
|
+
models = if enable_parallel?
|
92
|
+
# :nocov:
|
93
|
+
parallel_map(n_classes) do |n|
|
94
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
95
|
+
partial_fit(x, bin_y)
|
96
|
+
end
|
97
|
+
# :nocov:
|
98
|
+
else
|
99
|
+
Array.new(n_classes) do |n|
|
100
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
101
|
+
partial_fit(x, bin_y)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
# store model.
|
105
|
+
models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
|
94
106
|
else
|
95
107
|
negative_label = y.to_a.uniq.min
|
96
108
|
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
97
|
-
@weight_vec, @bias_term = partial_fit(x, bin_y)
|
98
|
-
@prob_param = if @params[:probability]
|
99
|
-
Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
|
100
|
-
else
|
101
|
-
Numo::DFloat[1, 0]
|
102
|
-
end
|
109
|
+
@weight_vec, @bias_term, @prob_param = partial_fit(x, bin_y)
|
103
110
|
end
|
104
111
|
|
105
112
|
self
|
@@ -125,7 +132,12 @@ module Rumale
|
|
125
132
|
|
126
133
|
n_samples, = x.shape
|
127
134
|
decision_values = decision_function(x)
|
128
|
-
|
135
|
+
predicted = if enable_parallel?
|
136
|
+
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
137
|
+
else
|
138
|
+
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
139
|
+
end
|
140
|
+
Numo::Int32.asarray(predicted)
|
129
141
|
end
|
130
142
|
|
131
143
|
# Predict probability for samples.
|
@@ -172,6 +184,16 @@ module Rumale
|
|
172
184
|
|
173
185
|
private
|
174
186
|
|
187
|
+
def partial_fit(x, bin_y)
|
188
|
+
w, b = super
|
189
|
+
p = if @params[:probability]
|
190
|
+
Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
|
191
|
+
else
|
192
|
+
Numo::DFloat[1, 0]
|
193
|
+
end
|
194
|
+
[w, b, p]
|
195
|
+
end
|
196
|
+
|
175
197
|
def calc_loss_gradient(x, y, weight)
|
176
198
|
target_ids = (x.dot(weight) * y).lt(1.0).where
|
177
199
|
grad = Numo::DFloat.zeros(@params[:batch_size])
|
@@ -41,13 +41,17 @@ module Rumale
|
|
41
41
|
# @param batch_size [Integer] The size of the mini batches.
|
42
42
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
43
43
|
# If nil is given, Nadam is used.
|
44
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
45
|
+
# If nil is given, the method does not execute in parallel.
|
46
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
47
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
44
48
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
45
49
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
|
46
|
-
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
50
|
+
max_iter: 1000, batch_size: 20, optimizer: nil, n_jobs: nil, random_seed: nil)
|
47
51
|
check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
|
48
52
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
49
53
|
check_params_boolean(fit_bias: fit_bias)
|
50
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
54
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
51
55
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
|
52
56
|
max_iter: max_iter, batch_size: batch_size)
|
53
57
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
@@ -72,7 +76,12 @@ module Rumale
|
|
72
76
|
if n_outputs > 1
|
73
77
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
74
78
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
75
|
-
|
79
|
+
if enable_parallel?
|
80
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
81
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
82
|
+
else
|
83
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
84
|
+
end
|
76
85
|
else
|
77
86
|
@weight_vec, @bias_term = partial_fit(x, y)
|
78
87
|
end
|
@@ -21,9 +21,13 @@ module Rumale
|
|
21
21
|
# @param batch_size [Integer] The size of the mini batches.
|
22
22
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
23
23
|
# If nil is given, Nadam is used.
|
24
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
25
|
+
# If nil is given, the methods do not execute in parallel.
|
26
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
27
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
24
28
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
25
29
|
def initialize(n_factors: 2, loss: nil, reg_param_linear: 1.0, reg_param_factor: 1.0,
|
26
|
-
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
30
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
27
31
|
@params = {}
|
28
32
|
@params[:n_factors] = n_factors
|
29
33
|
@params[:loss] = loss unless loss.nil?
|
@@ -33,6 +37,7 @@ module Rumale
|
|
33
37
|
@params[:batch_size] = batch_size
|
34
38
|
@params[:optimizer] = optimizer
|
35
39
|
@params[:optimizer] ||= Optimizer::Nadam.new
|
40
|
+
@params[:n_jobs] = n_jobs
|
36
41
|
@params[:random_seed] = random_seed
|
37
42
|
@params[:random_seed] ||= srand
|
38
43
|
@factor_mat = nil
|
@@ -54,13 +54,17 @@ module Rumale
|
|
54
54
|
# @param batch_size [Integer] The size of the mini batches.
|
55
55
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
56
56
|
# If nil is given, Nadam is used.
|
57
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
58
|
+
# If nil is given, the methods do not execute in parallel.
|
59
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
60
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
57
61
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
58
62
|
def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
|
59
|
-
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
63
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
60
64
|
check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
|
61
65
|
check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
62
66
|
check_params_string(loss: loss)
|
63
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
67
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
64
68
|
check_params_positive(n_factors: n_factors,
|
65
69
|
reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
66
70
|
max_iter: max_iter, batch_size: batch_size)
|
@@ -86,9 +90,19 @@ module Rumale
|
|
86
90
|
@factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
|
87
91
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
88
92
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
89
|
-
|
90
|
-
|
91
|
-
|
93
|
+
if enable_parallel?
|
94
|
+
# :nocov:
|
95
|
+
models = parallel_map(n_classes) do |n|
|
96
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
97
|
+
partial_fit(x, bin_y)
|
98
|
+
end
|
99
|
+
# :nocov:
|
100
|
+
n_classes.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
|
101
|
+
else
|
102
|
+
n_classes.times do |n|
|
103
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
104
|
+
@factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
105
|
+
end
|
92
106
|
end
|
93
107
|
else
|
94
108
|
negative_label = y.to_a.uniq.min
|
@@ -122,9 +136,14 @@ module Rumale
|
|
122
136
|
check_sample_array(x)
|
123
137
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
124
138
|
|
125
|
-
n_samples
|
139
|
+
n_samples = x.shape[0]
|
126
140
|
decision_values = decision_function(x)
|
127
|
-
|
141
|
+
predicted = if enable_parallel?
|
142
|
+
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
143
|
+
else
|
144
|
+
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
145
|
+
end
|
146
|
+
Numo::Int32.asarray(predicted)
|
128
147
|
end
|
129
148
|
|
130
149
|
# Predict probability for samples.
|
@@ -47,12 +47,16 @@ module Rumale
|
|
47
47
|
# @param batch_size [Integer] The size of the mini batches.
|
48
48
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
49
49
|
# If nil is given, Nadam is used.
|
50
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
51
|
+
# If nil is given, the method does not execute in parallel.
|
52
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
53
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
50
54
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
55
|
def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
|
52
|
-
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
56
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
53
57
|
check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
|
54
58
|
check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
55
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
59
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
56
60
|
check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
57
61
|
max_iter: max_iter, batch_size: batch_size)
|
58
62
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
|
@@ -76,7 +80,12 @@ module Rumale
|
|
76
80
|
@factor_mat = Numo::DFloat.zeros(n_outputs, @params[:n_factors], n_features)
|
77
81
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
78
82
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
79
|
-
|
83
|
+
if enable_parallel?
|
84
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
85
|
+
n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
|
86
|
+
else
|
87
|
+
n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
88
|
+
end
|
80
89
|
else
|
81
90
|
@factor_mat, @weight_vec, @bias_term = partial_fit(x, y)
|
82
91
|
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -39,5 +39,6 @@ MSG
|
|
39
39
|
spec.add_development_dependency 'coveralls', '~> 0.8'
|
40
40
|
spec.add_development_dependency 'rake', '~> 12.0'
|
41
41
|
spec.add_development_dependency 'rake-compiler'
|
42
|
+
spec.add_development_dependency 'parallel'
|
42
43
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
43
44
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-05-
|
11
|
+
date: 2019-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: parallel
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rspec
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|