rumale 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 939e53a9d8c52f86c1d112d1cfbca17802aa94d8
4
- data.tar.gz: cc1454ad92767b7ea27f9ec8d27cc7e7bbf97441
3
+ metadata.gz: 50ce110d0d5ad24245b5b52347a7ae72c1a7c673
4
+ data.tar.gz: 52c1acc4ebe4c8da8120dc431be4e1a953317a63
5
5
  SHA512:
6
- metadata.gz: bed2b567c09ec4e79e81181ca9978598cb89696463330c4ac6aa153375aaa7a86d2a6e0eb2af190037fd49d08aa720835af10ff3b021b851c285dc564607acaa
7
- data.tar.gz: 6b9faf79edecfad12ce46d3572810fdaa2648a3c1df090b99964c2e0ccef2553dcecc1cca7d12e1a9dd9b69108b91ff9fe68d94f74738f1a8f57ccdb4436c69c
6
+ metadata.gz: f8774f51f6bde00ea9414de9bfbe2c31b1c3c09c6931bd29ae414117d2648ee8273fa4f8dc32e78573a9e9da96db2cba19ca67372e4ac56adbe2a68c9be5b92a
7
+ data.tar.gz: 7777ba4d627830877dea89b1c9573340fd03882ccdafac57700e261f1e0b621962cc9744129bdbf26ae1078995e7d16db9c36758ae9a327d93ef3e5c3f572b28
@@ -1,3 +1,15 @@
1
+ # 0.11.0
2
+ - Introduce [Parallel gem](https://github.com/grosser/parallel) to improve execution speed for one-vs-the-rest and bagging methods.
3
+ - Add the n_jobs parameter that specifies the number of jobs for parallel processing in some estimators belong to the Rumale::LinearModel, Rumale::PolynomialModel, and Rumale::Ensemble.
4
+ - The n_jobs parameter is valid only when parallel gem is loaded.
5
+
6
+ ```ruby
7
+ require 'rumale'
8
+ require 'parallel'
9
+
10
+ svc = Rumale::LinearModel::SVC.new(n_jobs: -1)
11
+ ```
12
+
1
13
  # 0.10.0
2
14
  - Add class for t-distributed Stochastic Neighborhood Embedding.
3
15
  - Fix bug of zero division on min-max scaling class.
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.10.0)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.11.0)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -8,6 +8,22 @@ module Rumale
8
8
  # Return parameters about an estimator.
9
9
  # @return [Hash]
10
10
  attr_reader :params
11
+
12
+ private
13
+
14
+ def enable_parallel?
15
+ return false if @params[:n_jobs].nil? || defined?(Parallel).nil?
16
+ true
17
+ end
18
+
19
+ def n_processes
20
+ return 1 unless enable_parallel?
21
+ @params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
22
+ end
23
+
24
+ def parallel_map(n_outputs, &block)
25
+ Parallel.map(Array.new(n_outputs) { |v| v }, in_processes: n_processes, &block)
26
+ end
11
27
  end
12
28
  end
13
29
  end
@@ -47,13 +47,17 @@ module Rumale
47
47
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
48
48
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
49
49
  # If nil is given, split process considers all features.
50
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
51
+ # If nil is given, the method does not execute in parallel.
52
+ # If zero or less is given, it becomes equal to the number of processors.
53
+ # This parameter is ignored if the Parallel gem is not loaded.
50
54
  # @param random_seed [Integer] The seed value using to initialize the random generator.
51
55
  # It is used to randomly determine the order of features when deciding spliting point.
52
56
  def initialize(n_estimators: 10,
53
57
  criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
54
- max_features: nil, random_seed: nil)
58
+ max_features: nil, n_jobs: nil, random_seed: nil)
55
59
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
56
- max_features: max_features, random_seed: random_seed)
60
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
57
61
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
58
62
  check_params_string(criterion: criterion)
59
63
  check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
@@ -76,18 +80,19 @@ module Rumale
76
80
  @params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
77
81
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
78
82
  @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
79
- @feature_importances = Numo::DFloat.zeros(n_features)
80
83
  # Construct trees.
81
- @estimators = Array.new(@params[:n_estimators]) do
82
- tree = Tree::ExtraTreeClassifier.new(
83
- criterion: @params[:criterion], max_depth: @params[:max_depth],
84
- max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
85
- max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
86
- )
87
- tree.fit(x, y)
88
- @feature_importances += tree.feature_importances
89
- tree
90
- end
84
+ rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
85
+ @estimators = if enable_parallel?
86
+ parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
87
+ else
88
+ Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
89
+ end
90
+ @feature_importances =
91
+ if enable_parallel?
92
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
93
+ else
94
+ @estimators.map(&:feature_importances).reduce(&:+)
95
+ end
91
96
  @feature_importances /= @feature_importances.sum
92
97
  self
93
98
  end
@@ -130,6 +135,16 @@ module Rumale
130
135
  def marshal_load(obj)
131
136
  super
132
137
  end
138
+
139
+ private
140
+
141
+ def plant_tree(rnd_seed)
142
+ Tree::ExtraTreeClassifier.new(
143
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
144
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
145
+ max_features: @params[:max_features], random_seed: rnd_seed
146
+ )
147
+ end
133
148
  end
134
149
  end
135
150
  end
@@ -43,13 +43,17 @@ module Rumale
43
43
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
44
44
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
45
45
  # If nil is given, split process considers all features.
46
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
47
+ # If nil is given, the methods do not execute in parallel.
48
+ # If zero or less is given, it becomes equal to the number of processors.
49
+ # This parameter is ignored if the Parallel gem is not loaded.
46
50
  # @param random_seed [Integer] The seed value using to initialize the random generator.
47
51
  # It is used to randomly determine the order of features when deciding spliting point.
48
52
  def initialize(n_estimators: 10,
49
53
  criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
50
- max_features: nil, random_seed: nil)
54
+ max_features: nil, n_jobs: nil, random_seed: nil)
51
55
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
52
- max_features: max_features, random_seed: random_seed)
56
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
53
57
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
54
58
  check_params_string(criterion: criterion)
55
59
  check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
@@ -71,18 +75,19 @@ module Rumale
71
75
  n_features = x.shape[1]
72
76
  @params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
73
77
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
74
- @feature_importances = Numo::DFloat.zeros(n_features)
75
78
  # Construct forest.
76
- @estimators = Array.new(@params[:n_estimators]) do
77
- tree = Tree::ExtraTreeRegressor.new(
78
- criterion: @params[:criterion], max_depth: @params[:max_depth],
79
- max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
80
- max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
81
- )
82
- tree.fit(x, y)
83
- @feature_importances += tree.feature_importances
84
- tree
85
- end
79
+ rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
80
+ @estimators = if enable_parallel?
81
+ parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
82
+ else
83
+ Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
84
+ end
85
+ @feature_importances =
86
+ if enable_parallel?
87
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
88
+ else
89
+ @estimators.map(&:feature_importances).reduce(&:+)
90
+ end
86
91
  @feature_importances /= @feature_importances.sum
87
92
  self
88
93
  end
@@ -116,6 +121,16 @@ module Rumale
116
121
  def marshal_load(obj)
117
122
  super
118
123
  end
124
+
125
+ private
126
+
127
+ def plant_tree(rnd_seed)
128
+ Tree::ExtraTreeRegressor.new(
129
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
130
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
131
+ max_features: @params[:max_features], random_seed:rnd_seed
132
+ )
133
+ end
119
134
  end
120
135
  end
121
136
  end
@@ -56,19 +56,22 @@ module Rumale
56
56
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
57
57
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
58
58
  # If nil is given, split process considers all features.
59
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
60
+ # If nil is given, the methods do not execute in parallel.
61
+ # If zero or less is given, it becomes equal to the number of processors.
62
+ # This parameter is ignored if the Parallel gem is not loaded.
59
63
  # @param random_seed [Integer] The seed value using to initialize the random generator.
60
64
  # It is used to randomly determine the order of features when deciding spliting point.
61
65
  def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
62
66
  max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
63
- max_features: nil, random_seed: nil)
67
+ max_features: nil, n_jobs: nil, random_seed: nil)
64
68
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
65
- max_features: max_features, random_seed: random_seed)
69
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
66
70
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
67
71
  check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
68
- check_params_positive(n_estimators: n_estimators,
69
- learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample,
70
- max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
71
- max_features: max_features)
72
+ check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
73
+ subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
74
+ min_samples_leaf: min_samples_leaf, max_features: max_features)
72
75
  @params = {}
73
76
  @params[:n_estimators] = n_estimators
74
77
  @params[:learning_rate] = learning_rate
@@ -78,6 +81,7 @@ module Rumale
78
81
  @params[:max_leaf_nodes] = max_leaf_nodes
79
82
  @params[:min_samples_leaf] = min_samples_leaf
80
83
  @params[:max_features] = max_features
84
+ @params[:n_jobs] = n_jobs
81
85
  @params[:random_seed] = random_seed
82
86
  @params[:random_seed] ||= srand
83
87
  @estimators = nil
@@ -96,22 +100,16 @@ module Rumale
96
100
  check_sample_array(x)
97
101
  check_label_array(y)
98
102
  check_sample_label_size(x, y)
99
-
103
+ # initialize some variables.
100
104
  n_features = x.shape[1]
101
105
  @params[:max_features] = n_features if @params[:max_features].nil?
102
106
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
103
-
104
- # train estimator.
105
107
  @classes = Numo::Int32[*y.to_a.uniq.sort]
106
108
  n_classes = @classes.size
109
+ # train estimator.
107
110
  if n_classes > 2
108
- @base_predictions = Numo::DFloat.zeros(n_classes)
109
- @estimators = Array.new(n_classes) do |n|
110
- bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
111
- y_mean = bin_y.mean
112
- @base_predictions[n] = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
113
- partial_fit(x, bin_y, @base_predictions[n])
114
- end
111
+ @base_predictions = multiclass_base_predictions(y)
112
+ @estimators = multiclass_estimators(x, y)
115
113
  else
116
114
  negative_label = y.to_a.uniq.min
117
115
  bin_y = Numo::DFloat.cast(y.ne(negative_label)) * 2 - 1
@@ -119,17 +117,12 @@ module Rumale
119
117
  @base_predictions = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
120
118
  @estimators = partial_fit(x, bin_y, @base_predictions)
121
119
  end
122
-
123
120
  # calculate feature importances.
124
- @feature_importances = Numo::DFloat.zeros(n_features)
125
- if n_classes > 2
126
- n_classes.times do |n|
127
- @estimators[n].each { |tree| @feature_importances += tree.feature_importances }
128
- end
129
- else
130
- @estimators.each { |tree| @feature_importances += tree.feature_importances }
131
- end
132
-
121
+ @feature_importances = if n_classes > 2
122
+ multiclass_feature_importances
123
+ else
124
+ @estimators.map(&:feature_importances).reduce(&:+)
125
+ end
133
126
  self
134
127
  end
135
128
 
@@ -139,18 +132,12 @@ module Rumale
139
132
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
140
133
  def decision_function(x)
141
134
  check_sample_array(x)
142
- n_samples = x.shape[0]
143
135
  n_classes = @classes.size
144
136
  if n_classes > 2
145
- scores = Numo::DFloat.ones(n_samples, n_classes) * @base_predictions
146
- n_classes.times do |n|
147
- @estimators[n].each { |tree| scores[true, n] += tree.predict(x) }
148
- end
137
+ multiclass_scores(x)
149
138
  else
150
- scores = Numo::DFloat.ones(n_samples) * @base_predictions
151
- @estimators.each { |tree| scores += tree.predict(x) }
139
+ @estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
152
140
  end
153
- scores
154
141
  end
155
142
 
156
143
  # Predict class labels for samples.
@@ -273,6 +260,68 @@ module Rumale
273
260
  max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
274
261
  )
275
262
  end
263
+
264
+ def multiclass_base_predictions(y)
265
+ n_classes = @classes.size
266
+ b = if enable_parallel?
267
+ # :nocov:
268
+ parallel_map(n_classes) do |n|
269
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
270
+ y_mean = bin_y.mean
271
+ 0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
272
+ end
273
+ # :nocov:
274
+ else
275
+ Array.new(n_classes) do |n|
276
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
277
+ y_mean = bin_y.mean
278
+ 0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
279
+ end
280
+ end
281
+ Numo::DFloat.asarray(b)
282
+ end
283
+
284
+ def multiclass_estimators(x, y)
285
+ n_classes = @classes.size
286
+ if enable_parallel?
287
+ # :nocov:
288
+ parallel_map(n_classes) do |n|
289
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
290
+ partial_fit(x, bin_y, @base_predictions[n])
291
+ end
292
+ # :nocov:
293
+ else
294
+ Array.new(n_classes) do |n|
295
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
296
+ partial_fit(x, bin_y, @base_predictions[n])
297
+ end
298
+ end
299
+ end
300
+
301
+ def multiclass_feature_importances
302
+ n_classes = @classes.size
303
+ if enable_parallel?
304
+ parallel_map(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
305
+ else
306
+ Array.new(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
307
+ end
308
+ end
309
+
310
+ def multiclass_scores(x)
311
+ n_classes = @classes.size
312
+ s = if enable_parallel?
313
+ # :nocov:
314
+ parallel_map(n_classes) do |n|
315
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
316
+ end
317
+ # :nocov:
318
+ else
319
+ Array.new(n_classes) do |n|
320
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
321
+ end
322
+ end
323
+ Numo::DFloat.asarray(s).transpose + @base_predictions
324
+ end
276
325
  end
277
326
  end
278
327
  end
@@ -51,19 +51,22 @@ module Rumale
51
51
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
52
52
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
53
53
  # If nil is given, split process considers all features.
54
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
55
+ # If nil is given, the methods do not execute in parallel.
56
+ # If zero or less is given, it becomes equal to the number of processors.
57
+ # This parameter is ignored if the Parallel gem is not loaded.
54
58
  # @param random_seed [Integer] The seed value using to initialize the random generator.
55
59
  # It is used to randomly determine the order of features when deciding spliting point.
56
60
  def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
57
61
  max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
58
- max_features: nil, random_seed: nil)
62
+ max_features: nil, n_jobs: nil, random_seed: nil)
59
63
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
60
- max_features: max_features, random_seed: random_seed)
64
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
61
65
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
62
66
  check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
63
- check_params_positive(n_estimators: n_estimators,
64
- learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample,
65
- max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
66
- max_features: max_features)
67
+ check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
68
+ subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
69
+ min_samples_leaf: min_samples_leaf, max_features: max_features)
67
70
  @params = {}
68
71
  @params[:n_estimators] = n_estimators
69
72
  @params[:learning_rate] = learning_rate
@@ -73,6 +76,7 @@ module Rumale
73
76
  @params[:max_leaf_nodes] = max_leaf_nodes
74
77
  @params[:min_samples_leaf] = min_samples_leaf
75
78
  @params[:max_features] = max_features
79
+ @params[:n_jobs] = n_jobs
76
80
  @params[:random_seed] = random_seed
77
81
  @params[:random_seed] ||= srand
78
82
  @estimators = nil
@@ -90,32 +94,24 @@ module Rumale
90
94
  check_sample_array(x)
91
95
  check_tvalue_array(y)
92
96
  check_sample_tvalue_size(x, y)
93
-
97
+ # initialize some variables.
94
98
  n_features = x.shape[1]
95
99
  @params[:max_features] = n_features if @params[:max_features].nil?
96
100
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
97
-
98
- # train regressor.
99
101
  n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
102
+ # train regressor.
100
103
  @base_predictions = n_outputs > 1 ? y.mean(0) : y.mean
101
104
  @estimators = if n_outputs > 1
102
- Array.new(n_outputs) do |n|
103
- partial_fit(x, y[true, n], @base_predictions[n])
104
- end
105
+ multivar_estimators(x, y)
105
106
  else
106
107
  partial_fit(x, y, @base_predictions)
107
108
  end
108
-
109
109
  # calculate feature importances.
110
- @feature_importances = Numo::DFloat.zeros(n_features)
111
- if n_outputs > 1
112
- n_outputs.times do |n|
113
- @estimators[n].each { |tree| @feature_importances += tree.feature_importances }
114
- end
115
- else
116
- @estimators.each { |tree| @feature_importances += tree.feature_importances }
117
- end
118
-
110
+ @feature_importances = if n_outputs > 1
111
+ multivar_feature_importances
112
+ else
113
+ @estimators.map(&:feature_importances).reduce(&:+)
114
+ end
119
115
  self
120
116
  end
121
117
 
@@ -125,18 +121,16 @@ module Rumale
125
121
  # @return [Numo::DFloat] (shape: [n_samples]) Predicted values per sample.
126
122
  def predict(x)
127
123
  check_sample_array(x)
128
- n_samples = x.shape[0]
129
124
  n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
130
125
  if n_outputs > 1
131
- predicted = Numo::DFloat.ones(n_samples, n_outputs) * @base_predictions
132
- n_outputs.times do |n|
133
- @estimators[n].each { |tree| predicted[true, n] += tree.predict(x) }
134
- end
126
+ multivar_predict(x)
135
127
  else
136
- predicted = Numo::DFloat.ones(n_samples) * @base_predictions
137
- @estimators.each { |tree| predicted += tree.predict(x) }
128
+ if enable_parallel?
129
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.reduce(&:+) + @base_predictions
130
+ else
131
+ @estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
132
+ end
138
133
  end
139
- predicted
140
134
  end
141
135
 
142
136
  # Return the index of the leaf that each sample reached.
@@ -225,6 +219,40 @@ module Rumale
225
219
  max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
226
220
  )
227
221
  end
222
+
223
+ def multivar_estimators(x, y)
224
+ n_outputs = y.shape[1]
225
+ if enable_parallel?
226
+ parallel_map(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
227
+ else
228
+ Array.new(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
229
+ end
230
+ end
231
+
232
+ def multivar_feature_importances
233
+ n_outputs = @estimators.size
234
+ if enable_parallel?
235
+ parallel_map(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
236
+ else
237
+ Array.new(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
238
+ end
239
+ end
240
+
241
+ def multivar_predict(x)
242
+ n_outputs = @estimators.size
243
+ p = if enable_parallel?
244
+ # :nocov:
245
+ parallel_map(n_outputs) do |n|
246
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
247
+ end
248
+ # :nocov:
249
+ else
250
+ Array.new(n_outputs) do |n|
251
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
252
+ end
253
+ end
254
+ Numo::DFloat.asarray(p).transpose + @base_predictions
255
+ end
228
256
  end
229
257
  end
230
258
  end