rumale 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/lib/rumale/base/base_estimator.rb +16 -0
- data/lib/rumale/ensemble/extra_trees_classifier.rb +28 -13
- data/lib/rumale/ensemble/extra_trees_regressor.rb +28 -13
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +83 -34
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +58 -30
- data/lib/rumale/ensemble/random_forest_classifier.rb +66 -37
- data/lib/rumale/ensemble/random_forest_regressor.rb +45 -15
- data/lib/rumale/kernel_machine/kernel_svc.rb +37 -11
- data/lib/rumale/linear_model/base_linear_model.rb +5 -1
- data/lib/rumale/linear_model/lasso.rb +13 -4
- data/lib/rumale/linear_model/linear_regression.rb +13 -3
- data/lib/rumale/linear_model/logistic_regression.rb +25 -6
- data/lib/rumale/linear_model/ridge.rb +13 -3
- data/lib/rumale/linear_model/svc.rb +40 -18
- data/lib/rumale/linear_model/svr.rb +12 -3
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +6 -1
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +26 -7
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +12 -3
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -0
- metadata +16 -2
@@ -48,13 +48,17 @@ module Rumale
|
|
48
48
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
49
49
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
50
50
|
# If nil is given, split process considers all features.
|
51
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
52
|
+
# If nil is given, the method does not execute in parallel.
|
53
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
54
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
51
55
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
52
56
|
# It is used to randomly determine the order of features when deciding spliting point.
|
53
57
|
def initialize(n_estimators: 10,
|
54
58
|
criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
55
|
-
max_features: nil, random_seed: nil)
|
59
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
56
60
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
57
|
-
max_features: max_features, random_seed: random_seed)
|
61
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
58
62
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
59
63
|
check_params_string(criterion: criterion)
|
60
64
|
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
|
@@ -67,6 +71,7 @@ module Rumale
|
|
67
71
|
@params[:max_leaf_nodes] = max_leaf_nodes
|
68
72
|
@params[:min_samples_leaf] = min_samples_leaf
|
69
73
|
@params[:max_features] = max_features
|
74
|
+
@params[:n_jobs] = n_jobs
|
70
75
|
@params[:random_seed] = random_seed
|
71
76
|
@params[:random_seed] ||= srand
|
72
77
|
@estimators = nil
|
@@ -89,19 +94,28 @@ module Rumale
|
|
89
94
|
@params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
|
90
95
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
91
96
|
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
92
|
-
@feature_importances = Numo::DFloat.zeros(n_features)
|
93
97
|
# Construct forest.
|
94
|
-
@estimators =
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
98
|
+
@estimators =
|
99
|
+
if enable_parallel?
|
100
|
+
rngs = Array.new(@params[:n_estimators]) { Random.new(@rng.rand(Rumale::Values.int_max)) }
|
101
|
+
# :nocov:
|
102
|
+
parallel_map(@params[:n_estimators]) do |n|
|
103
|
+
bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
|
104
|
+
plant_tree(rngs[n].rand(Rumale::Values.int_max)).fit(x[bootstrap_ids, true], y[bootstrap_ids])
|
105
|
+
end
|
106
|
+
# :nocov:
|
107
|
+
else
|
108
|
+
Array.new(@params[:n_estimators]) do
|
109
|
+
bootstrap_ids = Array.new(n_samples) { @rng.rand(0...n_samples) }
|
110
|
+
plant_tree(@rng.rand(Rumale::Values.int_max)).fit(x[bootstrap_ids, true], y[bootstrap_ids])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
@feature_importances =
|
114
|
+
if enable_parallel?
|
115
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
|
116
|
+
else
|
117
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
118
|
+
end
|
105
119
|
@feature_importances /= @feature_importances.sum
|
106
120
|
self
|
107
121
|
end
|
@@ -112,18 +126,16 @@ module Rumale
|
|
112
126
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
113
127
|
def predict(x)
|
114
128
|
check_sample_array(x)
|
115
|
-
n_samples
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
end
|
126
|
-
Numo::Int32[*Array.new(n_samples) { |n| @classes[ballot_box[n, true].max_index] }]
|
129
|
+
n_samples = x.shape[0]
|
130
|
+
n_estimators = @estimators.size
|
131
|
+
predicted = if enable_parallel?
|
132
|
+
predict_set = parallel_map(n_estimators) { |n| @estimators[n].predict(x).to_a }.transpose
|
133
|
+
parallel_map(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first }
|
134
|
+
else
|
135
|
+
predict_set = @estimators.map { |tree| tree.predict(x).to_a }.transpose
|
136
|
+
Array.new(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first }
|
137
|
+
end
|
138
|
+
Numo::Int32.asarray(predicted)
|
127
139
|
end
|
128
140
|
|
129
141
|
# Predict probability for samples.
|
@@ -132,18 +144,12 @@ module Rumale
|
|
132
144
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
133
145
|
def predict_proba(x)
|
134
146
|
check_sample_array(x)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
probs = tree.predict_proba(x)
|
141
|
-
tree.classes.size.times do |n|
|
142
|
-
class_id = classes_arr.index(tree.classes[n])
|
143
|
-
ballot_box[true, class_id] += probs[true, n] unless class_id.nil?
|
144
|
-
end
|
147
|
+
n_estimators = @estimators.size
|
148
|
+
if enable_parallel?
|
149
|
+
parallel_map(n_estimators) { |n| predict_proba_tree(@estimators[n], x) }.reduce(&:+) / n_estimators
|
150
|
+
else
|
151
|
+
@estimators.map { |tree| predict_proba_tree(tree, x) }.reduce(&:+) / n_estimators
|
145
152
|
end
|
146
|
-
(ballot_box.transpose / ballot_box.sum(axis: 1)).transpose
|
147
153
|
end
|
148
154
|
|
149
155
|
# Return the index of the leaf that each sample reached.
|
@@ -175,6 +181,29 @@ module Rumale
|
|
175
181
|
@rng = obj[:rng]
|
176
182
|
nil
|
177
183
|
end
|
184
|
+
|
185
|
+
private
|
186
|
+
|
187
|
+
def plant_tree(rnd_seed)
|
188
|
+
Tree::DecisionTreeClassifier.new(
|
189
|
+
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
190
|
+
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
191
|
+
max_features: @params[:max_features], random_seed: rnd_seed
|
192
|
+
)
|
193
|
+
end
|
194
|
+
|
195
|
+
def predict_proba_tree(tree, x)
|
196
|
+
# initialize some variables.
|
197
|
+
n_samples = x.shape[0]
|
198
|
+
base_classes = @classes.to_a
|
199
|
+
n_classes = base_classes.size
|
200
|
+
class_ids = tree.classes.map { |c| base_classes.index(c) }
|
201
|
+
# predict probabilities.
|
202
|
+
probs = Numo::DFloat.zeros(n_samples, n_classes)
|
203
|
+
tree_probs = tree.predict_proba(x)
|
204
|
+
class_ids.each_with_index { |i, j| probs[true, i] = tree_probs[true, j] }
|
205
|
+
probs
|
206
|
+
end
|
178
207
|
end
|
179
208
|
end
|
180
209
|
end
|
@@ -43,13 +43,17 @@ module Rumale
|
|
43
43
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
44
44
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
45
45
|
# If nil is given, split process considers all features.
|
46
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
47
|
+
# If nil is given, the methods do not execute in parallel.
|
48
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
49
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
46
50
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
51
|
# It is used to randomly determine the order of features when deciding spliting point.
|
48
52
|
def initialize(n_estimators: 10,
|
49
53
|
criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
50
|
-
max_features: nil, random_seed: nil)
|
54
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
51
55
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
52
|
-
max_features: max_features, random_seed: random_seed)
|
56
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
53
57
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
54
58
|
check_params_string(criterion: criterion)
|
55
59
|
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
|
@@ -62,6 +66,7 @@ module Rumale
|
|
62
66
|
@params[:max_leaf_nodes] = max_leaf_nodes
|
63
67
|
@params[:min_samples_leaf] = min_samples_leaf
|
64
68
|
@params[:max_features] = max_features
|
69
|
+
@params[:n_jobs] = n_jobs
|
65
70
|
@params[:random_seed] = random_seed
|
66
71
|
@params[:random_seed] ||= srand
|
67
72
|
@estimators = nil
|
@@ -82,20 +87,31 @@ module Rumale
|
|
82
87
|
n_samples, n_features = x.shape
|
83
88
|
@params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
|
84
89
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
85
|
-
@feature_importances = Numo::DFloat.zeros(n_features)
|
86
90
|
single_target = y.shape[1].nil?
|
87
91
|
# Construct forest.
|
88
|
-
@estimators =
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
92
|
+
@estimators =
|
93
|
+
if enable_parallel?
|
94
|
+
rngs = Array.new(@params[:n_estimators]) { Random.new(@rng.rand(Rumale::Values.int_max)) }
|
95
|
+
# :nocov:
|
96
|
+
parallel_map(@params[:n_estimators]) do |n|
|
97
|
+
bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
|
98
|
+
tree = plant_tree(rngs[n].rand(Rumale::Values.int_max))
|
99
|
+
tree.fit(x[bootstrap_ids, true], single_target ? y[bootstrap_ids] : y[bootstrap_ids, true])
|
100
|
+
end
|
101
|
+
# :nocov:
|
102
|
+
else
|
103
|
+
Array.new(@params[:n_estimators]) do
|
104
|
+
bootstrap_ids = Array.new(n_samples) { @rng.rand(0...n_samples) }
|
105
|
+
tree = plant_tree(@rng.rand(Rumale::Values.int_max))
|
106
|
+
tree.fit(x[bootstrap_ids, true], single_target ? y[bootstrap_ids] : y[bootstrap_ids, true])
|
107
|
+
end
|
108
|
+
end
|
109
|
+
@feature_importances =
|
110
|
+
if enable_parallel?
|
111
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
|
112
|
+
else
|
113
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
114
|
+
end
|
99
115
|
@feature_importances /= @feature_importances.sum
|
100
116
|
self
|
101
117
|
end
|
@@ -106,7 +122,11 @@ module Rumale
|
|
106
122
|
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
|
107
123
|
def predict(x)
|
108
124
|
check_sample_array(x)
|
109
|
-
|
125
|
+
if enable_parallel?
|
126
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.reduce(&:+) / @params[:n_estimators]
|
127
|
+
else
|
128
|
+
@estimators.map { |tree| tree.predict(x) }.reduce(&:+) / @params[:n_estimators]
|
129
|
+
end
|
110
130
|
end
|
111
131
|
|
112
132
|
# Return the index of the leaf that each sample reached.
|
@@ -136,6 +156,16 @@ module Rumale
|
|
136
156
|
@rng = obj[:rng]
|
137
157
|
nil
|
138
158
|
end
|
159
|
+
|
160
|
+
private
|
161
|
+
|
162
|
+
def plant_tree(rnd_seed)
|
163
|
+
Tree::DecisionTreeRegressor.new(
|
164
|
+
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
165
|
+
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
166
|
+
max_features: @params[:max_features], random_seed: rnd_seed
|
167
|
+
)
|
168
|
+
end
|
139
169
|
end
|
140
170
|
end
|
141
171
|
end
|
@@ -42,17 +42,22 @@ module Rumale
|
|
42
42
|
# @param reg_param [Float] The regularization parameter.
|
43
43
|
# @param max_iter [Integer] The maximum number of iterations.
|
44
44
|
# @param probability [Boolean] The flag indicating whether to perform probability estimation.
|
45
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
46
|
+
# If nil is given, the methods do not execute in parallel.
|
47
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
48
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
45
49
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
|
-
def initialize(reg_param: 1.0, max_iter: 1000, probability: false, random_seed: nil)
|
50
|
+
def initialize(reg_param: 1.0, max_iter: 1000, probability: false, n_jobs: nil, random_seed: nil)
|
47
51
|
check_params_float(reg_param: reg_param)
|
48
52
|
check_params_integer(max_iter: max_iter)
|
49
53
|
check_params_boolean(probability: probability)
|
50
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
54
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
51
55
|
check_params_positive(reg_param: reg_param, max_iter: max_iter)
|
52
56
|
@params = {}
|
53
57
|
@params[:reg_param] = reg_param
|
54
58
|
@params[:max_iter] = max_iter
|
55
59
|
@params[:probability] = probability
|
60
|
+
@params[:n_jobs] = n_jobs
|
56
61
|
@params[:random_seed] = random_seed
|
57
62
|
@params[:random_seed] ||= srand
|
58
63
|
@weight_vec = nil
|
@@ -79,14 +84,30 @@ module Rumale
|
|
79
84
|
if n_classes > 2
|
80
85
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
81
86
|
@prob_param = Numo::DFloat.zeros(n_classes, 2)
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
87
|
+
if enable_parallel?
|
88
|
+
# :nocov:
|
89
|
+
models = parallel_map(n_classes) do |n|
|
90
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
91
|
+
w = binary_fit(x, bin_y)
|
92
|
+
p = if @params[:probability]
|
93
|
+
Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
|
94
|
+
else
|
95
|
+
Numo::DFloat[1, 0]
|
96
|
+
end
|
97
|
+
[w, p]
|
98
|
+
end
|
99
|
+
# :nocov:
|
100
|
+
n_classes.times { |n| @weight_vec[n, true], @prob_param[n, true] = models[n] }
|
101
|
+
else
|
102
|
+
n_classes.times do |n|
|
103
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
104
|
+
@weight_vec[n, true] = binary_fit(x, bin_y)
|
105
|
+
@prob_param[n, true] = if @params[:probability]
|
106
|
+
Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec[n, true].transpose), bin_y)
|
107
|
+
else
|
108
|
+
Numo::DFloat[1, 0]
|
109
|
+
end
|
110
|
+
end
|
90
111
|
end
|
91
112
|
else
|
92
113
|
negative_label = y.to_a.uniq.min
|
@@ -125,7 +146,12 @@ module Rumale
|
|
125
146
|
|
126
147
|
n_samples, = x.shape
|
127
148
|
decision_values = decision_function(x)
|
128
|
-
|
149
|
+
predicted = if enable_parallel?
|
150
|
+
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
151
|
+
else
|
152
|
+
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
153
|
+
end
|
154
|
+
Numo::Int32.asarray(predicted)
|
129
155
|
end
|
130
156
|
|
131
157
|
# Predict probability for samples.
|
@@ -20,9 +20,12 @@ module Rumale
|
|
20
20
|
# @param batch_size [Integer] The size of the mini batches.
|
21
21
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
22
22
|
# If nil is given, Nadam is used.
|
23
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
24
|
+
# If nil is given, the methods do not execute in parallel.
|
25
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
23
26
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
24
27
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
25
|
-
max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
|
28
|
+
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
26
29
|
@params = {}
|
27
30
|
@params[:reg_param] = reg_param
|
28
31
|
@params[:fit_bias] = fit_bias
|
@@ -31,6 +34,7 @@ module Rumale
|
|
31
34
|
@params[:batch_size] = batch_size
|
32
35
|
@params[:optimizer] = optimizer
|
33
36
|
@params[:optimizer] ||= Optimizer::Nadam.new
|
37
|
+
@params[:n_jobs] = n_jobs
|
34
38
|
@params[:random_seed] = random_seed
|
35
39
|
@params[:random_seed] ||= srand
|
36
40
|
@weight_vec = nil
|
@@ -41,12 +41,17 @@ module Rumale
|
|
41
41
|
# @param batch_size [Integer] The size of the mini batches.
|
42
42
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
43
43
|
# If nil is given, Nadam is used.
|
44
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
45
|
+
# If nil is given, the method does not execute in parallel.
|
46
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
47
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
44
48
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
45
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
49
|
+
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
50
|
+
n_jobs: nil, random_seed: nil)
|
46
51
|
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
47
52
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
48
53
|
check_params_boolean(fit_bias: fit_bias)
|
49
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
54
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
50
55
|
check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
|
51
56
|
super
|
52
57
|
end
|
@@ -67,11 +72,15 @@ module Rumale
|
|
67
72
|
if n_outputs > 1
|
68
73
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
69
74
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
70
|
-
|
75
|
+
if enable_parallel?
|
76
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
77
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
78
|
+
else
|
79
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
80
|
+
end
|
71
81
|
else
|
72
82
|
@weight_vec, @bias_term = partial_fit(x, y)
|
73
83
|
end
|
74
|
-
|
75
84
|
self
|
76
85
|
end
|
77
86
|
|
@@ -37,12 +37,17 @@ module Rumale
|
|
37
37
|
# @param batch_size [Integer] The size of the mini batches.
|
38
38
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
39
39
|
# If nil is given, Nadam is used.
|
40
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
41
|
+
# If nil is given, the method does not execute in parallel.
|
42
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
43
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
40
44
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
41
|
-
def initialize(fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
45
|
+
def initialize(fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
|
46
|
+
n_jobs: nil, random_seed: nil)
|
42
47
|
check_params_float(bias_scale: bias_scale)
|
43
48
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
44
49
|
check_params_boolean(fit_bias: fit_bias)
|
45
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
46
51
|
check_params_positive(max_iter: max_iter, batch_size: batch_size)
|
47
52
|
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(reg_param: 0.0)
|
48
53
|
super(keywd_args)
|
@@ -64,7 +69,12 @@ module Rumale
|
|
64
69
|
if n_outputs > 1
|
65
70
|
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
66
71
|
@bias_term = Numo::DFloat.zeros(n_outputs)
|
67
|
-
|
72
|
+
if enable_parallel?
|
73
|
+
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
74
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
75
|
+
else
|
76
|
+
n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
77
|
+
end
|
68
78
|
else
|
69
79
|
@weight_vec, @bias_term = partial_fit(x, y)
|
70
80
|
end
|
@@ -46,13 +46,17 @@ module Rumale
|
|
46
46
|
# @param batch_size [Integer] The size of the mini batches.
|
47
47
|
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
48
48
|
# If nil is given, Nadam is used.
|
49
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
50
|
+
# If nil is given, the methods do not execute in parallel.
|
51
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
52
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
49
53
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
50
54
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
51
|
-
max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
|
55
|
+
max_iter: 1000, batch_size: 20, optimizer: nil, n_jobs: nil, random_seed: nil)
|
52
56
|
check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
53
57
|
check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
54
58
|
check_params_boolean(fit_bias: fit_bias)
|
55
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
59
|
+
check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
|
56
60
|
check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
|
57
61
|
super
|
58
62
|
@classes = nil
|
@@ -75,9 +79,19 @@ module Rumale
|
|
75
79
|
if n_classes > 2
|
76
80
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
77
81
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
78
|
-
|
79
|
-
|
80
|
-
|
82
|
+
if enable_parallel?
|
83
|
+
# :nocov:
|
84
|
+
models = parallel_map(n_classes) do |n|
|
85
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
86
|
+
partial_fit(x, bin_y)
|
87
|
+
end
|
88
|
+
# :nocov:
|
89
|
+
n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
|
90
|
+
else
|
91
|
+
n_classes.times do |n|
|
92
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
93
|
+
@weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
94
|
+
end
|
81
95
|
end
|
82
96
|
else
|
83
97
|
negative_label = y.to_a.uniq.min
|
@@ -108,7 +122,12 @@ module Rumale
|
|
108
122
|
|
109
123
|
n_samples, = x.shape
|
110
124
|
decision_values = predict_proba(x)
|
111
|
-
|
125
|
+
predicted = if enable_parallel?
|
126
|
+
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
127
|
+
else
|
128
|
+
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
129
|
+
end
|
130
|
+
Numo::Int32.asarray(predicted)
|
112
131
|
end
|
113
132
|
|
114
133
|
# Predict probability for samples.
|