rumale 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/lib/rumale/base/base_estimator.rb +16 -0
- data/lib/rumale/ensemble/extra_trees_classifier.rb +28 -13
- data/lib/rumale/ensemble/extra_trees_regressor.rb +28 -13
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +83 -34
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +58 -30
- data/lib/rumale/ensemble/random_forest_classifier.rb +66 -37
- data/lib/rumale/ensemble/random_forest_regressor.rb +45 -15
- data/lib/rumale/kernel_machine/kernel_svc.rb +37 -11
- data/lib/rumale/linear_model/base_linear_model.rb +5 -1
- data/lib/rumale/linear_model/lasso.rb +13 -4
- data/lib/rumale/linear_model/linear_regression.rb +13 -3
- data/lib/rumale/linear_model/logistic_regression.rb +25 -6
- data/lib/rumale/linear_model/ridge.rb +13 -3
- data/lib/rumale/linear_model/svc.rb +40 -18
- data/lib/rumale/linear_model/svr.rb +12 -3
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +6 -1
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +26 -7
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +12 -3
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50ce110d0d5ad24245b5b52347a7ae72c1a7c673
|
4
|
+
data.tar.gz: 52c1acc4ebe4c8da8120dc431be4e1a953317a63
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8774f51f6bde00ea9414de9bfbe2c31b1c3c09c6931bd29ae414117d2648ee8273fa4f8dc32e78573a9e9da96db2cba19ca67372e4ac56adbe2a68c9be5b92a
|
7
|
+
data.tar.gz: 7777ba4d627830877dea89b1c9573340fd03882ccdafac57700e261f1e0b621962cc9744129bdbf26ae1078995e7d16db9c36758ae9a327d93ef3e5c3f572b28
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
# 0.11.0
|
2
|
+
- Introduce [Parallel gem](https://github.com/grosser/parallel) to improve execution speed for one-vs-the-rest and bagging methods.
|
3
|
+
- Add the n_jobs parameter that specifies the number of jobs for parallel processing in some estimators belong to the Rumale::LinearModel, Rumale::PolynomialModel, and Rumale::Ensemble.
|
4
|
+
- The n_jobs parameter is valid only when parallel gem is loaded.
|
5
|
+
|
6
|
+
```ruby
|
7
|
+
require 'rumale'
|
8
|
+
require 'parallel'
|
9
|
+
|
10
|
+
svc = Rumale::LinearModel::SVC.new(n_jobs: -1)
|
11
|
+
```
|
12
|
+
|
1
13
|
# 0.10.0
|
2
14
|
- Add class for t-distributed Stochastic Neighborhood Embedding.
|
3
15
|
- Fix bug of zero division on min-max scaling class.
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
|
7
7
|
[![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
|
8
8
|
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
|
9
|
-
[![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.
|
9
|
+
[![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.11.0)
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
@@ -8,6 +8,22 @@ module Rumale
|
|
8
8
|
# Return parameters about an estimator.
|
9
9
|
# @return [Hash]
|
10
10
|
attr_reader :params
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def enable_parallel?
|
15
|
+
return false if @params[:n_jobs].nil? || defined?(Parallel).nil?
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
def n_processes
|
20
|
+
return 1 unless enable_parallel?
|
21
|
+
@params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
|
22
|
+
end
|
23
|
+
|
24
|
+
def parallel_map(n_outputs, &block)
|
25
|
+
Parallel.map(Array.new(n_outputs) { |v| v }, in_processes: n_processes, &block)
|
26
|
+
end
|
11
27
|
end
|
12
28
|
end
|
13
29
|
end
|
@@ -47,13 +47,17 @@ module Rumale
|
|
47
47
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
48
48
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
49
49
|
# If nil is given, split process considers all features.
|
50
|
+
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
51
|
+
# If nil is given, the method does not execute in parallel.
|
52
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
53
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
50
54
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
55
|
# It is used to randomly determine the order of features when deciding spliting point.
|
52
56
|
def initialize(n_estimators: 10,
|
53
57
|
criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
54
|
-
max_features: nil, random_seed: nil)
|
58
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
55
59
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
56
|
-
max_features: max_features, random_seed: random_seed)
|
60
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
57
61
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
58
62
|
check_params_string(criterion: criterion)
|
59
63
|
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
|
@@ -76,18 +80,19 @@ module Rumale
|
|
76
80
|
@params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
|
77
81
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
78
82
|
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
79
|
-
@feature_importances = Numo::DFloat.zeros(n_features)
|
80
83
|
# Construct trees.
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
84
|
+
rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
|
85
|
+
@estimators = if enable_parallel?
|
86
|
+
parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
87
|
+
else
|
88
|
+
Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
89
|
+
end
|
90
|
+
@feature_importances =
|
91
|
+
if enable_parallel?
|
92
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
|
93
|
+
else
|
94
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
95
|
+
end
|
91
96
|
@feature_importances /= @feature_importances.sum
|
92
97
|
self
|
93
98
|
end
|
@@ -130,6 +135,16 @@ module Rumale
|
|
130
135
|
def marshal_load(obj)
|
131
136
|
super
|
132
137
|
end
|
138
|
+
|
139
|
+
private
|
140
|
+
|
141
|
+
def plant_tree(rnd_seed)
|
142
|
+
Tree::ExtraTreeClassifier.new(
|
143
|
+
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
144
|
+
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
145
|
+
max_features: @params[:max_features], random_seed: rnd_seed
|
146
|
+
)
|
147
|
+
end
|
133
148
|
end
|
134
149
|
end
|
135
150
|
end
|
@@ -43,13 +43,17 @@ module Rumale
|
|
43
43
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
44
44
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
45
45
|
# If nil is given, split process considers all features.
|
46
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
47
|
+
# If nil is given, the methods do not execute in parallel.
|
48
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
49
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
46
50
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
51
|
# It is used to randomly determine the order of features when deciding spliting point.
|
48
52
|
def initialize(n_estimators: 10,
|
49
53
|
criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
50
|
-
max_features: nil, random_seed: nil)
|
54
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
51
55
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
52
|
-
max_features: max_features, random_seed: random_seed)
|
56
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
53
57
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
54
58
|
check_params_string(criterion: criterion)
|
55
59
|
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
|
@@ -71,18 +75,19 @@ module Rumale
|
|
71
75
|
n_features = x.shape[1]
|
72
76
|
@params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
|
73
77
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
74
|
-
@feature_importances = Numo::DFloat.zeros(n_features)
|
75
78
|
# Construct forest.
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
79
|
+
rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
|
80
|
+
@estimators = if enable_parallel?
|
81
|
+
parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
82
|
+
else
|
83
|
+
Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
|
84
|
+
end
|
85
|
+
@feature_importances =
|
86
|
+
if enable_parallel?
|
87
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
|
88
|
+
else
|
89
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
90
|
+
end
|
86
91
|
@feature_importances /= @feature_importances.sum
|
87
92
|
self
|
88
93
|
end
|
@@ -116,6 +121,16 @@ module Rumale
|
|
116
121
|
def marshal_load(obj)
|
117
122
|
super
|
118
123
|
end
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
def plant_tree(rnd_seed)
|
128
|
+
Tree::ExtraTreeRegressor.new(
|
129
|
+
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
130
|
+
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
131
|
+
max_features: @params[:max_features], random_seed:rnd_seed
|
132
|
+
)
|
133
|
+
end
|
119
134
|
end
|
120
135
|
end
|
121
136
|
end
|
@@ -56,19 +56,22 @@ module Rumale
|
|
56
56
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
57
57
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
58
58
|
# If nil is given, split process considers all features.
|
59
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
60
|
+
# If nil is given, the methods do not execute in parallel.
|
61
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
62
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
59
63
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
60
64
|
# It is used to randomly determine the order of features when deciding spliting point.
|
61
65
|
def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
|
62
66
|
max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
63
|
-
max_features: nil, random_seed: nil)
|
67
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
64
68
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
65
|
-
max_features: max_features, random_seed: random_seed)
|
69
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
66
70
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
67
71
|
check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
|
68
|
-
check_params_positive(n_estimators: n_estimators,
|
69
|
-
|
70
|
-
|
71
|
-
max_features: max_features)
|
72
|
+
check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
|
73
|
+
subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
74
|
+
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
72
75
|
@params = {}
|
73
76
|
@params[:n_estimators] = n_estimators
|
74
77
|
@params[:learning_rate] = learning_rate
|
@@ -78,6 +81,7 @@ module Rumale
|
|
78
81
|
@params[:max_leaf_nodes] = max_leaf_nodes
|
79
82
|
@params[:min_samples_leaf] = min_samples_leaf
|
80
83
|
@params[:max_features] = max_features
|
84
|
+
@params[:n_jobs] = n_jobs
|
81
85
|
@params[:random_seed] = random_seed
|
82
86
|
@params[:random_seed] ||= srand
|
83
87
|
@estimators = nil
|
@@ -96,22 +100,16 @@ module Rumale
|
|
96
100
|
check_sample_array(x)
|
97
101
|
check_label_array(y)
|
98
102
|
check_sample_label_size(x, y)
|
99
|
-
|
103
|
+
# initialize some variables.
|
100
104
|
n_features = x.shape[1]
|
101
105
|
@params[:max_features] = n_features if @params[:max_features].nil?
|
102
106
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
103
|
-
|
104
|
-
# train estimator.
|
105
107
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
106
108
|
n_classes = @classes.size
|
109
|
+
# train estimator.
|
107
110
|
if n_classes > 2
|
108
|
-
@base_predictions =
|
109
|
-
@estimators =
|
110
|
-
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
111
|
-
y_mean = bin_y.mean
|
112
|
-
@base_predictions[n] = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
|
113
|
-
partial_fit(x, bin_y, @base_predictions[n])
|
114
|
-
end
|
111
|
+
@base_predictions = multiclass_base_predictions(y)
|
112
|
+
@estimators = multiclass_estimators(x, y)
|
115
113
|
else
|
116
114
|
negative_label = y.to_a.uniq.min
|
117
115
|
bin_y = Numo::DFloat.cast(y.ne(negative_label)) * 2 - 1
|
@@ -119,17 +117,12 @@ module Rumale
|
|
119
117
|
@base_predictions = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
|
120
118
|
@estimators = partial_fit(x, bin_y, @base_predictions)
|
121
119
|
end
|
122
|
-
|
123
120
|
# calculate feature importances.
|
124
|
-
@feature_importances =
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
else
|
130
|
-
@estimators.each { |tree| @feature_importances += tree.feature_importances }
|
131
|
-
end
|
132
|
-
|
121
|
+
@feature_importances = if n_classes > 2
|
122
|
+
multiclass_feature_importances
|
123
|
+
else
|
124
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
125
|
+
end
|
133
126
|
self
|
134
127
|
end
|
135
128
|
|
@@ -139,18 +132,12 @@ module Rumale
|
|
139
132
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
140
133
|
def decision_function(x)
|
141
134
|
check_sample_array(x)
|
142
|
-
n_samples = x.shape[0]
|
143
135
|
n_classes = @classes.size
|
144
136
|
if n_classes > 2
|
145
|
-
|
146
|
-
n_classes.times do |n|
|
147
|
-
@estimators[n].each { |tree| scores[true, n] += tree.predict(x) }
|
148
|
-
end
|
137
|
+
multiclass_scores(x)
|
149
138
|
else
|
150
|
-
|
151
|
-
@estimators.each { |tree| scores += tree.predict(x) }
|
139
|
+
@estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
|
152
140
|
end
|
153
|
-
scores
|
154
141
|
end
|
155
142
|
|
156
143
|
# Predict class labels for samples.
|
@@ -273,6 +260,68 @@ module Rumale
|
|
273
260
|
max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
|
274
261
|
)
|
275
262
|
end
|
263
|
+
|
264
|
+
def multiclass_base_predictions(y)
|
265
|
+
n_classes = @classes.size
|
266
|
+
b = if enable_parallel?
|
267
|
+
# :nocov:
|
268
|
+
parallel_map(n_classes) do |n|
|
269
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
270
|
+
y_mean = bin_y.mean
|
271
|
+
0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
|
272
|
+
end
|
273
|
+
# :nocov:
|
274
|
+
else
|
275
|
+
Array.new(n_classes) do |n|
|
276
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
277
|
+
y_mean = bin_y.mean
|
278
|
+
0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
|
279
|
+
end
|
280
|
+
end
|
281
|
+
Numo::DFloat.asarray(b)
|
282
|
+
end
|
283
|
+
|
284
|
+
def multiclass_estimators(x, y)
|
285
|
+
n_classes = @classes.size
|
286
|
+
if enable_parallel?
|
287
|
+
# :nocov:
|
288
|
+
parallel_map(n_classes) do |n|
|
289
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
290
|
+
partial_fit(x, bin_y, @base_predictions[n])
|
291
|
+
end
|
292
|
+
# :nocov:
|
293
|
+
else
|
294
|
+
Array.new(n_classes) do |n|
|
295
|
+
bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
|
296
|
+
partial_fit(x, bin_y, @base_predictions[n])
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def multiclass_feature_importances
|
302
|
+
n_classes = @classes.size
|
303
|
+
if enable_parallel?
|
304
|
+
parallel_map(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
305
|
+
else
|
306
|
+
Array.new(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def multiclass_scores(x)
|
311
|
+
n_classes = @classes.size
|
312
|
+
s = if enable_parallel?
|
313
|
+
# :nocov:
|
314
|
+
parallel_map(n_classes) do |n|
|
315
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
316
|
+
end
|
317
|
+
# :nocov:
|
318
|
+
else
|
319
|
+
Array.new(n_classes) do |n|
|
320
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
Numo::DFloat.asarray(s).transpose + @base_predictions
|
324
|
+
end
|
276
325
|
end
|
277
326
|
end
|
278
327
|
end
|
@@ -51,19 +51,22 @@ module Rumale
|
|
51
51
|
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
52
52
|
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
53
53
|
# If nil is given, split process considers all features.
|
54
|
+
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
55
|
+
# If nil is given, the methods do not execute in parallel.
|
56
|
+
# If zero or less is given, it becomes equal to the number of processors.
|
57
|
+
# This parameter is ignored if the Parallel gem is not loaded.
|
54
58
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
55
59
|
# It is used to randomly determine the order of features when deciding spliting point.
|
56
60
|
def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
|
57
61
|
max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
58
|
-
max_features: nil, random_seed: nil)
|
62
|
+
max_features: nil, n_jobs: nil, random_seed: nil)
|
59
63
|
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
60
|
-
max_features: max_features, random_seed: random_seed)
|
64
|
+
max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
|
61
65
|
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
62
66
|
check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
|
63
|
-
check_params_positive(n_estimators: n_estimators,
|
64
|
-
|
65
|
-
|
66
|
-
max_features: max_features)
|
67
|
+
check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
|
68
|
+
subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
69
|
+
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
67
70
|
@params = {}
|
68
71
|
@params[:n_estimators] = n_estimators
|
69
72
|
@params[:learning_rate] = learning_rate
|
@@ -73,6 +76,7 @@ module Rumale
|
|
73
76
|
@params[:max_leaf_nodes] = max_leaf_nodes
|
74
77
|
@params[:min_samples_leaf] = min_samples_leaf
|
75
78
|
@params[:max_features] = max_features
|
79
|
+
@params[:n_jobs] = n_jobs
|
76
80
|
@params[:random_seed] = random_seed
|
77
81
|
@params[:random_seed] ||= srand
|
78
82
|
@estimators = nil
|
@@ -90,32 +94,24 @@ module Rumale
|
|
90
94
|
check_sample_array(x)
|
91
95
|
check_tvalue_array(y)
|
92
96
|
check_sample_tvalue_size(x, y)
|
93
|
-
|
97
|
+
# initialize some variables.
|
94
98
|
n_features = x.shape[1]
|
95
99
|
@params[:max_features] = n_features if @params[:max_features].nil?
|
96
100
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
97
|
-
|
98
|
-
# train regressor.
|
99
101
|
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
102
|
+
# train regressor.
|
100
103
|
@base_predictions = n_outputs > 1 ? y.mean(0) : y.mean
|
101
104
|
@estimators = if n_outputs > 1
|
102
|
-
|
103
|
-
partial_fit(x, y[true, n], @base_predictions[n])
|
104
|
-
end
|
105
|
+
multivar_estimators(x, y)
|
105
106
|
else
|
106
107
|
partial_fit(x, y, @base_predictions)
|
107
108
|
end
|
108
|
-
|
109
109
|
# calculate feature importances.
|
110
|
-
@feature_importances =
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
else
|
116
|
-
@estimators.each { |tree| @feature_importances += tree.feature_importances }
|
117
|
-
end
|
118
|
-
|
110
|
+
@feature_importances = if n_outputs > 1
|
111
|
+
multivar_feature_importances
|
112
|
+
else
|
113
|
+
@estimators.map(&:feature_importances).reduce(&:+)
|
114
|
+
end
|
119
115
|
self
|
120
116
|
end
|
121
117
|
|
@@ -125,18 +121,16 @@ module Rumale
|
|
125
121
|
# @return [Numo::DFloat] (shape: [n_samples]) Predicted values per sample.
|
126
122
|
def predict(x)
|
127
123
|
check_sample_array(x)
|
128
|
-
n_samples = x.shape[0]
|
129
124
|
n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
|
130
125
|
if n_outputs > 1
|
131
|
-
|
132
|
-
n_outputs.times do |n|
|
133
|
-
@estimators[n].each { |tree| predicted[true, n] += tree.predict(x) }
|
134
|
-
end
|
126
|
+
multivar_predict(x)
|
135
127
|
else
|
136
|
-
|
137
|
-
|
128
|
+
if enable_parallel?
|
129
|
+
parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.reduce(&:+) + @base_predictions
|
130
|
+
else
|
131
|
+
@estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
|
132
|
+
end
|
138
133
|
end
|
139
|
-
predicted
|
140
134
|
end
|
141
135
|
|
142
136
|
# Return the index of the leaf that each sample reached.
|
@@ -225,6 +219,40 @@ module Rumale
|
|
225
219
|
max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
|
226
220
|
)
|
227
221
|
end
|
222
|
+
|
223
|
+
def multivar_estimators(x, y)
|
224
|
+
n_outputs = y.shape[1]
|
225
|
+
if enable_parallel?
|
226
|
+
parallel_map(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
|
227
|
+
else
|
228
|
+
Array.new(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def multivar_feature_importances
|
233
|
+
n_outputs = @estimators.size
|
234
|
+
if enable_parallel?
|
235
|
+
parallel_map(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
236
|
+
else
|
237
|
+
Array.new(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def multivar_predict(x)
|
242
|
+
n_outputs = @estimators.size
|
243
|
+
p = if enable_parallel?
|
244
|
+
# :nocov:
|
245
|
+
parallel_map(n_outputs) do |n|
|
246
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
247
|
+
end
|
248
|
+
# :nocov:
|
249
|
+
else
|
250
|
+
Array.new(n_outputs) do |n|
|
251
|
+
@estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
Numo::DFloat.asarray(p).transpose + @base_predictions
|
255
|
+
end
|
228
256
|
end
|
229
257
|
end
|
230
258
|
end
|