rumale 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 939e53a9d8c52f86c1d112d1cfbca17802aa94d8
4
- data.tar.gz: cc1454ad92767b7ea27f9ec8d27cc7e7bbf97441
3
+ metadata.gz: 50ce110d0d5ad24245b5b52347a7ae72c1a7c673
4
+ data.tar.gz: 52c1acc4ebe4c8da8120dc431be4e1a953317a63
5
5
  SHA512:
6
- metadata.gz: bed2b567c09ec4e79e81181ca9978598cb89696463330c4ac6aa153375aaa7a86d2a6e0eb2af190037fd49d08aa720835af10ff3b021b851c285dc564607acaa
7
- data.tar.gz: 6b9faf79edecfad12ce46d3572810fdaa2648a3c1df090b99964c2e0ccef2553dcecc1cca7d12e1a9dd9b69108b91ff9fe68d94f74738f1a8f57ccdb4436c69c
6
+ metadata.gz: f8774f51f6bde00ea9414de9bfbe2c31b1c3c09c6931bd29ae414117d2648ee8273fa4f8dc32e78573a9e9da96db2cba19ca67372e4ac56adbe2a68c9be5b92a
7
+ data.tar.gz: 7777ba4d627830877dea89b1c9573340fd03882ccdafac57700e261f1e0b621962cc9744129bdbf26ae1078995e7d16db9c36758ae9a327d93ef3e5c3f572b28
@@ -1,3 +1,15 @@
1
+ # 0.11.0
2
+ - Introduce [Parallel gem](https://github.com/grosser/parallel) to improve execution speed for one-vs-the-rest and bagging methods.
3
+ - Add the n_jobs parameter that specifies the number of jobs for parallel processing in some estimators belong to the Rumale::LinearModel, Rumale::PolynomialModel, and Rumale::Ensemble.
4
+ - The n_jobs parameter is valid only when parallel gem is loaded.
5
+
6
+ ```ruby
7
+ require 'rumale'
8
+ require 'parallel'
9
+
10
+ svc = Rumale::LinearModel::SVC.new(n_jobs: -1)
11
+ ```
12
+
1
13
  # 0.10.0
2
14
  - Add class for t-distributed Stochastic Neighborhood Embedding.
3
15
  - Fix bug of zero division on min-max scaling class.
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.10.0)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.11.0)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -8,6 +8,22 @@ module Rumale
8
8
  # Return parameters about an estimator.
9
9
  # @return [Hash]
10
10
  attr_reader :params
11
+
12
+ private
13
+
14
+ def enable_parallel?
15
+ return false if @params[:n_jobs].nil? || defined?(Parallel).nil?
16
+ true
17
+ end
18
+
19
+ def n_processes
20
+ return 1 unless enable_parallel?
21
+ @params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
22
+ end
23
+
24
+ def parallel_map(n_outputs, &block)
25
+ Parallel.map(Array.new(n_outputs) { |v| v }, in_processes: n_processes, &block)
26
+ end
11
27
  end
12
28
  end
13
29
  end
@@ -47,13 +47,17 @@ module Rumale
47
47
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
48
48
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
49
49
  # If nil is given, split process considers all features.
50
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
51
+ # If nil is given, the method does not execute in parallel.
52
+ # If zero or less is given, it becomes equal to the number of processors.
53
+ # This parameter is ignored if the Parallel gem is not loaded.
50
54
  # @param random_seed [Integer] The seed value using to initialize the random generator.
51
55
  # It is used to randomly determine the order of features when deciding spliting point.
52
56
  def initialize(n_estimators: 10,
53
57
  criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
54
- max_features: nil, random_seed: nil)
58
+ max_features: nil, n_jobs: nil, random_seed: nil)
55
59
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
56
- max_features: max_features, random_seed: random_seed)
60
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
57
61
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
58
62
  check_params_string(criterion: criterion)
59
63
  check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
@@ -76,18 +80,19 @@ module Rumale
76
80
  @params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
77
81
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
78
82
  @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
79
- @feature_importances = Numo::DFloat.zeros(n_features)
80
83
  # Construct trees.
81
- @estimators = Array.new(@params[:n_estimators]) do
82
- tree = Tree::ExtraTreeClassifier.new(
83
- criterion: @params[:criterion], max_depth: @params[:max_depth],
84
- max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
85
- max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
86
- )
87
- tree.fit(x, y)
88
- @feature_importances += tree.feature_importances
89
- tree
90
- end
84
+ rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
85
+ @estimators = if enable_parallel?
86
+ parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
87
+ else
88
+ Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
89
+ end
90
+ @feature_importances =
91
+ if enable_parallel?
92
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
93
+ else
94
+ @estimators.map(&:feature_importances).reduce(&:+)
95
+ end
91
96
  @feature_importances /= @feature_importances.sum
92
97
  self
93
98
  end
@@ -130,6 +135,16 @@ module Rumale
130
135
  def marshal_load(obj)
131
136
  super
132
137
  end
138
+
139
+ private
140
+
141
+ def plant_tree(rnd_seed)
142
+ Tree::ExtraTreeClassifier.new(
143
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
144
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
145
+ max_features: @params[:max_features], random_seed: rnd_seed
146
+ )
147
+ end
133
148
  end
134
149
  end
135
150
  end
@@ -43,13 +43,17 @@ module Rumale
43
43
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
44
44
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
45
45
  # If nil is given, split process considers all features.
46
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
47
+ # If nil is given, the methods do not execute in parallel.
48
+ # If zero or less is given, it becomes equal to the number of processors.
49
+ # This parameter is ignored if the Parallel gem is not loaded.
46
50
  # @param random_seed [Integer] The seed value using to initialize the random generator.
47
51
  # It is used to randomly determine the order of features when deciding spliting point.
48
52
  def initialize(n_estimators: 10,
49
53
  criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
50
- max_features: nil, random_seed: nil)
54
+ max_features: nil, n_jobs: nil, random_seed: nil)
51
55
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
52
- max_features: max_features, random_seed: random_seed)
56
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
53
57
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
54
58
  check_params_string(criterion: criterion)
55
59
  check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
@@ -71,18 +75,19 @@ module Rumale
71
75
  n_features = x.shape[1]
72
76
  @params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
73
77
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
74
- @feature_importances = Numo::DFloat.zeros(n_features)
75
78
  # Construct forest.
76
- @estimators = Array.new(@params[:n_estimators]) do
77
- tree = Tree::ExtraTreeRegressor.new(
78
- criterion: @params[:criterion], max_depth: @params[:max_depth],
79
- max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
80
- max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
81
- )
82
- tree.fit(x, y)
83
- @feature_importances += tree.feature_importances
84
- tree
85
- end
79
+ rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
80
+ @estimators = if enable_parallel?
81
+ parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
82
+ else
83
+ Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
84
+ end
85
+ @feature_importances =
86
+ if enable_parallel?
87
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
88
+ else
89
+ @estimators.map(&:feature_importances).reduce(&:+)
90
+ end
86
91
  @feature_importances /= @feature_importances.sum
87
92
  self
88
93
  end
@@ -116,6 +121,16 @@ module Rumale
116
121
  def marshal_load(obj)
117
122
  super
118
123
  end
124
+
125
+ private
126
+
127
+ def plant_tree(rnd_seed)
128
+ Tree::ExtraTreeRegressor.new(
129
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
130
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
131
+ max_features: @params[:max_features], random_seed:rnd_seed
132
+ )
133
+ end
119
134
  end
120
135
  end
121
136
  end
@@ -56,19 +56,22 @@ module Rumale
56
56
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
57
57
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
58
58
  # If nil is given, split process considers all features.
59
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
60
+ # If nil is given, the methods do not execute in parallel.
61
+ # If zero or less is given, it becomes equal to the number of processors.
62
+ # This parameter is ignored if the Parallel gem is not loaded.
59
63
  # @param random_seed [Integer] The seed value using to initialize the random generator.
60
64
  # It is used to randomly determine the order of features when deciding spliting point.
61
65
  def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
62
66
  max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
63
- max_features: nil, random_seed: nil)
67
+ max_features: nil, n_jobs: nil, random_seed: nil)
64
68
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
65
- max_features: max_features, random_seed: random_seed)
69
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
66
70
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
67
71
  check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
68
- check_params_positive(n_estimators: n_estimators,
69
- learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample,
70
- max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
71
- max_features: max_features)
72
+ check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
73
+ subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
74
+ min_samples_leaf: min_samples_leaf, max_features: max_features)
72
75
  @params = {}
73
76
  @params[:n_estimators] = n_estimators
74
77
  @params[:learning_rate] = learning_rate
@@ -78,6 +81,7 @@ module Rumale
78
81
  @params[:max_leaf_nodes] = max_leaf_nodes
79
82
  @params[:min_samples_leaf] = min_samples_leaf
80
83
  @params[:max_features] = max_features
84
+ @params[:n_jobs] = n_jobs
81
85
  @params[:random_seed] = random_seed
82
86
  @params[:random_seed] ||= srand
83
87
  @estimators = nil
@@ -96,22 +100,16 @@ module Rumale
96
100
  check_sample_array(x)
97
101
  check_label_array(y)
98
102
  check_sample_label_size(x, y)
99
-
103
+ # initialize some variables.
100
104
  n_features = x.shape[1]
101
105
  @params[:max_features] = n_features if @params[:max_features].nil?
102
106
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
103
-
104
- # train estimator.
105
107
  @classes = Numo::Int32[*y.to_a.uniq.sort]
106
108
  n_classes = @classes.size
109
+ # train estimator.
107
110
  if n_classes > 2
108
- @base_predictions = Numo::DFloat.zeros(n_classes)
109
- @estimators = Array.new(n_classes) do |n|
110
- bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
111
- y_mean = bin_y.mean
112
- @base_predictions[n] = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
113
- partial_fit(x, bin_y, @base_predictions[n])
114
- end
111
+ @base_predictions = multiclass_base_predictions(y)
112
+ @estimators = multiclass_estimators(x, y)
115
113
  else
116
114
  negative_label = y.to_a.uniq.min
117
115
  bin_y = Numo::DFloat.cast(y.ne(negative_label)) * 2 - 1
@@ -119,17 +117,12 @@ module Rumale
119
117
  @base_predictions = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
120
118
  @estimators = partial_fit(x, bin_y, @base_predictions)
121
119
  end
122
-
123
120
  # calculate feature importances.
124
- @feature_importances = Numo::DFloat.zeros(n_features)
125
- if n_classes > 2
126
- n_classes.times do |n|
127
- @estimators[n].each { |tree| @feature_importances += tree.feature_importances }
128
- end
129
- else
130
- @estimators.each { |tree| @feature_importances += tree.feature_importances }
131
- end
132
-
121
+ @feature_importances = if n_classes > 2
122
+ multiclass_feature_importances
123
+ else
124
+ @estimators.map(&:feature_importances).reduce(&:+)
125
+ end
133
126
  self
134
127
  end
135
128
 
@@ -139,18 +132,12 @@ module Rumale
139
132
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
140
133
  def decision_function(x)
141
134
  check_sample_array(x)
142
- n_samples = x.shape[0]
143
135
  n_classes = @classes.size
144
136
  if n_classes > 2
145
- scores = Numo::DFloat.ones(n_samples, n_classes) * @base_predictions
146
- n_classes.times do |n|
147
- @estimators[n].each { |tree| scores[true, n] += tree.predict(x) }
148
- end
137
+ multiclass_scores(x)
149
138
  else
150
- scores = Numo::DFloat.ones(n_samples) * @base_predictions
151
- @estimators.each { |tree| scores += tree.predict(x) }
139
+ @estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
152
140
  end
153
- scores
154
141
  end
155
142
 
156
143
  # Predict class labels for samples.
@@ -273,6 +260,68 @@ module Rumale
273
260
  max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
274
261
  )
275
262
  end
263
+
264
+ def multiclass_base_predictions(y)
265
+ n_classes = @classes.size
266
+ b = if enable_parallel?
267
+ # :nocov:
268
+ parallel_map(n_classes) do |n|
269
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
270
+ y_mean = bin_y.mean
271
+ 0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
272
+ end
273
+ # :nocov:
274
+ else
275
+ Array.new(n_classes) do |n|
276
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
277
+ y_mean = bin_y.mean
278
+ 0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
279
+ end
280
+ end
281
+ Numo::DFloat.asarray(b)
282
+ end
283
+
284
+ def multiclass_estimators(x, y)
285
+ n_classes = @classes.size
286
+ if enable_parallel?
287
+ # :nocov:
288
+ parallel_map(n_classes) do |n|
289
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
290
+ partial_fit(x, bin_y, @base_predictions[n])
291
+ end
292
+ # :nocov:
293
+ else
294
+ Array.new(n_classes) do |n|
295
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
296
+ partial_fit(x, bin_y, @base_predictions[n])
297
+ end
298
+ end
299
+ end
300
+
301
+ def multiclass_feature_importances
302
+ n_classes = @classes.size
303
+ if enable_parallel?
304
+ parallel_map(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
305
+ else
306
+ Array.new(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
307
+ end
308
+ end
309
+
310
+ def multiclass_scores(x)
311
+ n_classes = @classes.size
312
+ s = if enable_parallel?
313
+ # :nocov:
314
+ parallel_map(n_classes) do |n|
315
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
316
+ end
317
+ # :nocov:
318
+ else
319
+ Array.new(n_classes) do |n|
320
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
321
+ end
322
+ end
323
+ Numo::DFloat.asarray(s).transpose + @base_predictions
324
+ end
276
325
  end
277
326
  end
278
327
  end
@@ -51,19 +51,22 @@ module Rumale
51
51
  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
52
52
  # @param max_features [Integer] The number of features to consider when searching optimal split point.
53
53
  # If nil is given, split process considers all features.
54
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
55
+ # If nil is given, the methods do not execute in parallel.
56
+ # If zero or less is given, it becomes equal to the number of processors.
57
+ # This parameter is ignored if the Parallel gem is not loaded.
54
58
  # @param random_seed [Integer] The seed value using to initialize the random generator.
55
59
  # It is used to randomly determine the order of features when deciding spliting point.
56
60
  def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
57
61
  max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
58
- max_features: nil, random_seed: nil)
62
+ max_features: nil, n_jobs: nil, random_seed: nil)
59
63
  check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
60
- max_features: max_features, random_seed: random_seed)
64
+ max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
61
65
  check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
62
66
  check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
63
- check_params_positive(n_estimators: n_estimators,
64
- learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample,
65
- max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
66
- max_features: max_features)
67
+ check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
68
+ subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
69
+ min_samples_leaf: min_samples_leaf, max_features: max_features)
67
70
  @params = {}
68
71
  @params[:n_estimators] = n_estimators
69
72
  @params[:learning_rate] = learning_rate
@@ -73,6 +76,7 @@ module Rumale
73
76
  @params[:max_leaf_nodes] = max_leaf_nodes
74
77
  @params[:min_samples_leaf] = min_samples_leaf
75
78
  @params[:max_features] = max_features
79
+ @params[:n_jobs] = n_jobs
76
80
  @params[:random_seed] = random_seed
77
81
  @params[:random_seed] ||= srand
78
82
  @estimators = nil
@@ -90,32 +94,24 @@ module Rumale
90
94
  check_sample_array(x)
91
95
  check_tvalue_array(y)
92
96
  check_sample_tvalue_size(x, y)
93
-
97
+ # initialize some variables.
94
98
  n_features = x.shape[1]
95
99
  @params[:max_features] = n_features if @params[:max_features].nil?
96
100
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
97
-
98
- # train regressor.
99
101
  n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
102
+ # train regressor.
100
103
  @base_predictions = n_outputs > 1 ? y.mean(0) : y.mean
101
104
  @estimators = if n_outputs > 1
102
- Array.new(n_outputs) do |n|
103
- partial_fit(x, y[true, n], @base_predictions[n])
104
- end
105
+ multivar_estimators(x, y)
105
106
  else
106
107
  partial_fit(x, y, @base_predictions)
107
108
  end
108
-
109
109
  # calculate feature importances.
110
- @feature_importances = Numo::DFloat.zeros(n_features)
111
- if n_outputs > 1
112
- n_outputs.times do |n|
113
- @estimators[n].each { |tree| @feature_importances += tree.feature_importances }
114
- end
115
- else
116
- @estimators.each { |tree| @feature_importances += tree.feature_importances }
117
- end
118
-
110
+ @feature_importances = if n_outputs > 1
111
+ multivar_feature_importances
112
+ else
113
+ @estimators.map(&:feature_importances).reduce(&:+)
114
+ end
119
115
  self
120
116
  end
121
117
 
@@ -125,18 +121,16 @@ module Rumale
125
121
  # @return [Numo::DFloat] (shape: [n_samples]) Predicted values per sample.
126
122
  def predict(x)
127
123
  check_sample_array(x)
128
- n_samples = x.shape[0]
129
124
  n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
130
125
  if n_outputs > 1
131
- predicted = Numo::DFloat.ones(n_samples, n_outputs) * @base_predictions
132
- n_outputs.times do |n|
133
- @estimators[n].each { |tree| predicted[true, n] += tree.predict(x) }
134
- end
126
+ multivar_predict(x)
135
127
  else
136
- predicted = Numo::DFloat.ones(n_samples) * @base_predictions
137
- @estimators.each { |tree| predicted += tree.predict(x) }
128
+ if enable_parallel?
129
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.reduce(&:+) + @base_predictions
130
+ else
131
+ @estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
132
+ end
138
133
  end
139
- predicted
140
134
  end
141
135
 
142
136
  # Return the index of the leaf that each sample reached.
@@ -225,6 +219,40 @@ module Rumale
225
219
  max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
226
220
  )
227
221
  end
222
+
223
+ def multivar_estimators(x, y)
224
+ n_outputs = y.shape[1]
225
+ if enable_parallel?
226
+ parallel_map(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
227
+ else
228
+ Array.new(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
229
+ end
230
+ end
231
+
232
+ def multivar_feature_importances
233
+ n_outputs = @estimators.size
234
+ if enable_parallel?
235
+ parallel_map(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
236
+ else
237
+ Array.new(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
238
+ end
239
+ end
240
+
241
+ def multivar_predict(x)
242
+ n_outputs = @estimators.size
243
+ p = if enable_parallel?
244
+ # :nocov:
245
+ parallel_map(n_outputs) do |n|
246
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
247
+ end
248
+ # :nocov:
249
+ else
250
+ Array.new(n_outputs) do |n|
251
+ @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
252
+ end
253
+ end
254
+ Numo::DFloat.asarray(p).transpose + @base_predictions
255
+ end
228
256
  end
229
257
  end
230
258
  end