rumale 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/lib/rumale/base/base_estimator.rb +16 -0
- data/lib/rumale/ensemble/extra_trees_classifier.rb +28 -13
- data/lib/rumale/ensemble/extra_trees_regressor.rb +28 -13
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +83 -34
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +58 -30
- data/lib/rumale/ensemble/random_forest_classifier.rb +66 -37
- data/lib/rumale/ensemble/random_forest_regressor.rb +45 -15
- data/lib/rumale/kernel_machine/kernel_svc.rb +37 -11
- data/lib/rumale/linear_model/base_linear_model.rb +5 -1
- data/lib/rumale/linear_model/lasso.rb +13 -4
- data/lib/rumale/linear_model/linear_regression.rb +13 -3
- data/lib/rumale/linear_model/logistic_regression.rb +25 -6
- data/lib/rumale/linear_model/ridge.rb +13 -3
- data/lib/rumale/linear_model/svc.rb +40 -18
- data/lib/rumale/linear_model/svr.rb +12 -3
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +6 -1
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +26 -7
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +12 -3
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50ce110d0d5ad24245b5b52347a7ae72c1a7c673
|
4
|
+
data.tar.gz: 52c1acc4ebe4c8da8120dc431be4e1a953317a63
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8774f51f6bde00ea9414de9bfbe2c31b1c3c09c6931bd29ae414117d2648ee8273fa4f8dc32e78573a9e9da96db2cba19ca67372e4ac56adbe2a68c9be5b92a
|
7
|
+
data.tar.gz: 7777ba4d627830877dea89b1c9573340fd03882ccdafac57700e261f1e0b621962cc9744129bdbf26ae1078995e7d16db9c36758ae9a327d93ef3e5c3f572b28
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
# 0.11.0
|
2
|
+
- Introduce [Parallel gem](https://github.com/grosser/parallel) to improve execution speed for one-vs-the-rest and bagging methods.
|
3
|
+
- Add the n_jobs parameter that specifies the number of jobs for parallel processing in some estimators belong to the Rumale::LinearModel, Rumale::PolynomialModel, and Rumale::Ensemble.
|
4
|
+
- The n_jobs parameter is valid only when parallel gem is loaded.
|
5
|
+
|
6
|
+
```ruby
|
7
|
+
require 'rumale'
|
8
|
+
require 'parallel'
|
9
|
+
|
10
|
+
svc = Rumale::LinearModel::SVC.new(n_jobs: -1)
|
11
|
+
```
|
12
|
+
|
1
13
|
# 0.10.0
|
2
14
|
- Add class for t-distributed Stochastic Neighborhood Embedding.
|
3
15
|
- Fix bug of zero division on min-max scaling class.
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
[](https://coveralls.io/github/yoshoku/rumale?branch=master)
|
7
7
|
[](https://badge.fury.io/rb/rumale)
|
8
8
|
[](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
|
9
|
-
[](https://www.rubydoc.info/gems/rumale/0.
|
9
|
+
[](https://www.rubydoc.info/gems/rumale/0.11.0)
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
@@ -8,6 +8,22 @@ module Rumale
|
|
8
8
|
# Return parameters about an estimator.
|
9
9
|
# @return [Hash]
|
10
10
|
attr_reader :params
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def enable_parallel?
|
15
|
+
return false if @params[:n_jobs].nil? || defined?(Parallel).nil?
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
def n_processes
|
20
|
+
return 1 unless enable_parallel?
|
21
|
+
@params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
|
22
|
+
end
|
23
|
+
|
24
|
+
def parallel_map(n_outputs, &block)
|
25
|
+
Parallel.map(Array.new(n_outputs) { |v| v }, in_processes: n_processes, &block)
|
26
|
+
end
|
11
27
|
end
|
12
28
|
end
|
13
29
|
end
|
@@ -47,13 +47,17 @@ module Rumale
|
|
47
47
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
48
48
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
49
49
|
# If nil is given, split process considers all features.
|
50
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
51
|
+
# If nil is given, the method does not execute in parallel.
|
52
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
53
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
50
54
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
55
|
# It is used to randomly determine the order of features when deciding spliting point.
|
52
56
|
def initialize(n_estimators: 10,
|
53
57
|
criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
54
|
-
max_features: nil, random_seed: nil)
|
58
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
55
59
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
56
|
-
max_features: max_features, random_seed: random_seed)
|
60
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
57
61
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
58
62
|
check_params_string(criterion: criterion)
|
59
63
|
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
|
@@ -76,18 +80,19 @@ module Rumale
|
|
76
80
|
@params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
|
77
81
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
78
82
|
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
79
|
-
@feature_importances = Numo::DFloat.zeros(n_features)
|
80
83
|
# Construct trees.
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
84
|
+
rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
|
85
|
+
@estimators = if enable_parallel?
|
86
|
+
parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
87
|
+
else
|
88
|
+
Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
89
|
+
end
|
90
|
+
@feature_importances =
|
91
|
+
if enable_parallel?
|
92
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
|
93
|
+
else
|
94
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
95
|
+
end
|
91
96
|
@feature_importances /= @feature_importances.sum
|
92
97
|
self
|
93
98
|
end
|
@@ -130,6 +135,16 @@ module Rumale
|
|
130
135
|
def marshal_load(obj)
|
131
136
|
super
|
132
137
|
end
|
138
|
+
|
139
|
+
private
|
140
|
+
|
141
|
+
def plant_tree(rnd_seed)
|
142
|
+
Tree::ExtraTreeClassifier.new(
|
143
|
+
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
144
|
+
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
145
|
+
max_features: @params[:max_features], random_seed: rnd_seed
|
146
|
+
)
|
147
|
+
end
|
133
148
|
end
|
134
149
|
end
|
135
150
|
end
|
@@ -43,13 +43,17 @@ module Rumale
|
|
43
43
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
44
44
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
45
45
|
# If nil is given, split process considers all features.
|
46
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
47
|
+
# If nil is given, the methods do not execute in parallel.
|
48
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
49
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
46
50
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
51
|
# It is used to randomly determine the order of features when deciding spliting point.
|
48
52
|
def initialize(n_estimators: 10,
|
49
53
|
criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
50
|
-
max_features: nil, random_seed: nil)
|
54
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
51
55
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
52
|
-
max_features: max_features, random_seed: random_seed)
|
56
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
53
57
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
54
58
|
check_params_string(criterion: criterion)
|
55
59
|
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
|
@@ -71,18 +75,19 @@ module Rumale
|
|
71
75
|
n_features = x.shape[1]
|
72
76
|
@params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
|
73
77
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
74
|
-
@feature_importances = Numo::DFloat.zeros(n_features)
|
75
78
|
# Construct forest.
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
79
|
+
rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
|
80
|
+
@estimators = if enable_parallel?
|
81
|
+
parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
82
|
+
else
|
83
|
+
Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
84
|
+
end
|
85
|
+
@feature_importances =
|
86
|
+
if enable_parallel?
|
87
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
|
88
|
+
else
|
89
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
90
|
+
end
|
86
91
|
@feature_importances /= @feature_importances.sum
|
87
92
|
self
|
88
93
|
end
|
@@ -116,6 +121,16 @@ module Rumale
|
|
116
121
|
def marshal_load(obj)
|
117
122
|
super
|
118
123
|
end
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
def plant_tree(rnd_seed)
|
128
|
+
Tree::ExtraTreeRegressor.new(
|
129
|
+
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
130
|
+
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
131
|
+
max_features: @params[:max_features], random_seed:rnd_seed
|
132
|
+
)
|
133
|
+
end
|
119
134
|
end
|
120
135
|
end
|
121
136
|
end
|
@@ -56,19 +56,22 @@ module Rumale
|
|
56
56
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
57
57
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
58
58
|
# If nil is given, split process considers all features.
|
59
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
60
|
+
# If nil is given, the methods do not execute in parallel.
|
61
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
62
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
59
63
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
64
|
# It is used to randomly determine the order of features when deciding spliting point.
|
61
65
|
def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
|
62
66
|
max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
63
|
-
max_features: nil, random_seed: nil)
|
67
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
64
68
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
65
|
-
max_features: max_features, random_seed: random_seed)
|
69
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
66
70
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
67
71
|
check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
|
68
|
-
check_params_positive(n_estimators: n_estimators,
|
69
|
-
|
70
|
-
|
71
|
-
max_features: max_features)
|
72
|
+
check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
|
73
|
+
subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
74
|
+
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
72
75
|
@params = {}
|
73
76
|
@params[:n_estimators] = n_estimators
|
74
77
|
@params[:learning_rate] = learning_rate
|
@@ -78,6 +81,7 @@ module Rumale
|
|
78
81
|
@params[:max_leaf_nodes] = max_leaf_nodes
|
79
82
|
@params[:min_samples_leaf] = min_samples_leaf
|
80
83
|
@params[:max_features] = max_features
|
84
|
+
@params[:n_jobs] = n_jobs
|
81
85
|
@params[:random_seed] = random_seed
|
82
86
|
@params[:random_seed] ||= srand
|
83
87
|
@estimators = nil
|
@@ -96,22 +100,16 @@ module Rumale
|
|
96
100
|
check_sample_array(x)
|
97
101
|
check_label_array(y)
|
98
102
|
check_sample_label_size(x, y)
|
99
|
-
|
103
|
+
# initialize some variables.
|
100
104
|
n_features = x.shape[1]
|
101
105
|
@params[:max_features] = n_features if @params[:max_features].nil?
|
102
106
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
103
|
-
|
104
|
-
# train estimator.
|
105
107
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
106
108
|
n_classes = @classes.size
|
109
|
+
# train estimator.
|
107
110
|
if n_classes > 2
|
108
|
-
@base_predictions =
|
109
|
-
@estimators =
|
110
|
-
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
111
|
-
y_mean = bin_y.mean
|
112
|
-
@base_predictions[n] = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
|
113
|
-
partial_fit(x, bin_y, @base_predictions[n])
|
114
|
-
end
|
111
|
+
@base_predictions = multiclass_base_predictions(y)
|
112
|
+
@estimators = multiclass_estimators(x, y)
|
115
113
|
else
|
116
114
|
negative_label = y.to_a.uniq.min
|
117
115
|
bin_y = Numo::DFloat.cast(y.ne(negative_label)) * 2 - 1
|
@@ -119,17 +117,12 @@ module Rumale
|
|
119
117
|
@base_predictions = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
|
120
118
|
@estimators = partial_fit(x, bin_y, @base_predictions)
|
121
119
|
end
|
122
|
-
|
123
120
|
# calculate feature importances.
|
124
|
-
@feature_importances =
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
else
|
130
|
-
@estimators.each { |tree| @feature_importances += tree.feature_importances }
|
131
|
-
end
|
132
|
-
|
121
|
+
@feature_importances = if n_classes > 2
|
122
|
+
multiclass_feature_importances
|
123
|
+
else
|
124
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
125
|
+
end
|
133
126
|
self
|
134
127
|
end
|
135
128
|
|
@@ -139,18 +132,12 @@ module Rumale
|
|
139
132
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
140
133
|
def decision_function(x)
|
141
134
|
check_sample_array(x)
|
142
|
-
n_samples = x.shape[0]
|
143
135
|
n_classes = @classes.size
|
144
136
|
if n_classes > 2
|
145
|
-
|
146
|
-
n_classes.times do |n|
|
147
|
-
@estimators[n].each { |tree| scores[true, n] += tree.predict(x) }
|
148
|
-
end
|
137
|
+
multiclass_scores(x)
|
149
138
|
else
|
150
|
-
|
151
|
-
@estimators.each { |tree| scores += tree.predict(x) }
|
139
|
+
@estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
|
152
140
|
end
|
153
|
-
scores
|
154
141
|
end
|
155
142
|
|
156
143
|
# Predict class labels for samples.
|
@@ -273,6 +260,68 @@ module Rumale
|
|
273
260
|
max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
|
274
261
|
)
|
275
262
|
end
|
263
|
+
|
264
|
+
def multiclass_base_predictions(y)
|
265
|
+
n_classes = @classes.size
|
266
|
+
b = if enable_parallel?
|
267
|
+
# :nocov:
|
268
|
+
parallel_map(n_classes) do |n|
|
269
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
270
|
+
y_mean = bin_y.mean
|
271
|
+
0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
|
272
|
+
end
|
273
|
+
# :nocov:
|
274
|
+
else
|
275
|
+
Array.new(n_classes) do |n|
|
276
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
277
|
+
y_mean = bin_y.mean
|
278
|
+
0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
|
279
|
+
end
|
280
|
+
end
|
281
|
+
Numo::DFloat.asarray(b)
|
282
|
+
end
|
283
|
+
|
284
|
+
def multiclass_estimators(x, y)
|
285
|
+
n_classes = @classes.size
|
286
|
+
if enable_parallel?
|
287
|
+
# :nocov:
|
288
|
+
parallel_map(n_classes) do |n|
|
289
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
290
|
+
partial_fit(x, bin_y, @base_predictions[n])
|
291
|
+
end
|
292
|
+
# :nocov:
|
293
|
+
else
|
294
|
+
Array.new(n_classes) do |n|
|
295
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
296
|
+
partial_fit(x, bin_y, @base_predictions[n])
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def multiclass_feature_importances
|
302
|
+
n_classes = @classes.size
|
303
|
+
if enable_parallel?
|
304
|
+
parallel_map(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
305
|
+
else
|
306
|
+
Array.new(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def multiclass_scores(x)
|
311
|
+
n_classes = @classes.size
|
312
|
+
s = if enable_parallel?
|
313
|
+
# :nocov:
|
314
|
+
parallel_map(n_classes) do |n|
|
315
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
316
|
+
end
|
317
|
+
# :nocov:
|
318
|
+
else
|
319
|
+
Array.new(n_classes) do |n|
|
320
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
Numo::DFloat.asarray(s).transpose + @base_predictions
|
324
|
+
end
|
276
325
|
end
|
277
326
|
end
|
278
327
|
end
|
@@ -51,19 +51,22 @@ module Rumale
|
|
51
51
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
52
52
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
53
53
|
# If nil is given, split process considers all features.
|
54
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
55
|
+
# If nil is given, the methods do not execute in parallel.
|
56
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
57
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
54
58
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
55
59
|
# It is used to randomly determine the order of features when deciding spliting point.
|
56
60
|
def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
|
57
61
|
max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
58
|
-
max_features: nil, random_seed: nil)
|
62
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
59
63
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
60
|
-
max_features: max_features, random_seed: random_seed)
|
64
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
61
65
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
62
66
|
check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
|
63
|
-
check_params_positive(n_estimators: n_estimators,
|
64
|
-
|
65
|
-
|
66
|
-
max_features: max_features)
|
67
|
+
check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
|
68
|
+
subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
69
|
+
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
67
70
|
@params = {}
|
68
71
|
@params[:n_estimators] = n_estimators
|
69
72
|
@params[:learning_rate] = learning_rate
|
@@ -73,6 +76,7 @@ module Rumale
|
|
73
76
|
@params[:max_leaf_nodes] = max_leaf_nodes
|
74
77
|
@params[:min_samples_leaf] = min_samples_leaf
|
75
78
|
@params[:max_features] = max_features
|
79
|
+
@params[:n_jobs] = n_jobs
|
76
80
|
@params[:random_seed] = random_seed
|
77
81
|
@params[:random_seed] ||= srand
|
78
82
|
@estimators = nil
|
@@ -90,32 +94,24 @@ module Rumale
|
|
90
94
|
check_sample_array(x)
|
91
95
|
check_tvalue_array(y)
|
92
96
|
check_sample_tvalue_size(x, y)
|
93
|
-
|
97
|
+
# initialize some variables.
|
94
98
|
n_features = x.shape[1]
|
95
99
|
@params[:max_features] = n_features if @params[:max_features].nil?
|
96
100
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
97
|
-
|
98
|
-
# train regressor.
|
99
101
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
102
|
+
# train regressor.
|
100
103
|
@base_predictions = n_outputs > 1 ? y.mean(0) : y.mean
|
101
104
|
@estimators = if n_outputs > 1
|
102
|
-
|
103
|
-
partial_fit(x, y[true, n], @base_predictions[n])
|
104
|
-
end
|
105
|
+
multivar_estimators(x, y)
|
105
106
|
else
|
106
107
|
partial_fit(x, y, @base_predictions)
|
107
108
|
end
|
108
|
-
|
109
109
|
# calculate feature importances.
|
110
|
-
@feature_importances =
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
else
|
116
|
-
@estimators.each { |tree| @feature_importances += tree.feature_importances }
|
117
|
-
end
|
118
|
-
|
110
|
+
@feature_importances = if n_outputs > 1
|
111
|
+
multivar_feature_importances
|
112
|
+
else
|
113
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
114
|
+
end
|
119
115
|
self
|
120
116
|
end
|
121
117
|
|
@@ -125,18 +121,16 @@ module Rumale
|
|
125
121
|
# @return [Numo::DFloat] (shape: [n_samples]) Predicted values per sample.
|
126
122
|
def predict(x)
|
127
123
|
check_sample_array(x)
|
128
|
-
n_samples = x.shape[0]
|
129
124
|
n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
|
130
125
|
if n_outputs > 1
|
131
|
-
|
132
|
-
n_outputs.times do |n|
|
133
|
-
@estimators[n].each { |tree| predicted[true, n] += tree.predict(x) }
|
134
|
-
end
|
126
|
+
multivar_predict(x)
|
135
127
|
else
|
136
|
-
|
137
|
-
|
128
|
+
if enable_parallel?
|
129
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.reduce(&:+) + @base_predictions
|
130
|
+
else
|
131
|
+
@estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
|
132
|
+
end
|
138
133
|
end
|
139
|
-
predicted
|
140
134
|
end
|
141
135
|
|
142
136
|
# Return the index of the leaf that each sample reached.
|
@@ -225,6 +219,40 @@ module Rumale
|
|
225
219
|
max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
|
226
220
|
)
|
227
221
|
end
|
222
|
+
|
223
|
+
def multivar_estimators(x, y)
|
224
|
+
n_outputs = y.shape[1]
|
225
|
+
if enable_parallel?
|
226
|
+
parallel_map(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
|
227
|
+
else
|
228
|
+
Array.new(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def multivar_feature_importances
|
233
|
+
n_outputs = @estimators.size
|
234
|
+
if enable_parallel?
|
235
|
+
parallel_map(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
236
|
+
else
|
237
|
+
Array.new(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def multivar_predict(x)
|
242
|
+
n_outputs = @estimators.size
|
243
|
+
p = if enable_parallel?
|
244
|
+
# :nocov:
|
245
|
+
parallel_map(n_outputs) do |n|
|
246
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
247
|
+
end
|
248
|
+
# :nocov:
|
249
|
+
else
|
250
|
+
Array.new(n_outputs) do |n|
|
251
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
Numo::DFloat.asarray(p).transpose + @base_predictions
|
255
|
+
end
|
228
256
|
end
|
229
257
|
end
|
230
258
|
end
|