rumale-ensemble 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/classifier'
6
+ require 'rumale/tree/decision_tree_classifier'
7
+ require 'rumale/ensemble/value'
8
+
9
+ module Rumale
10
+ # This module consists of the classes that implement ensemble-based methods.
11
+ module Ensemble
12
+ # RandomForestClassifier is a class that implements random forest for classification.
13
+ #
14
+ # @example
15
+ # require 'rumale/ensemble/random_forest_classifier'
16
+ #
17
+ # estimator =
18
+ # Rumale::Ensemble::RandomForestClassifier.new(
19
+ # n_estimators: 10, criterion: 'gini', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
20
+ # estimator.fit(training_samples, traininig_labels)
21
+ # results = estimator.predict(testing_samples)
22
+ #
23
+ class RandomForestClassifier < ::Rumale::Base::Estimator
24
+ include ::Rumale::Base::Classifier
25
+
26
+ # Return the set of estimators.
27
+ # @return [Array<DecisionTreeClassifier>]
28
+ attr_reader :estimators
29
+
30
+ # Return the class labels.
31
+ # @return [Numo::Int32] (size: n_classes)
32
+ attr_reader :classes
33
+
34
+ # Return the importance for each feature.
35
+ # @return [Numo::DFloat] (size: n_features)
36
+ attr_reader :feature_importances
37
+
38
+ # Return the random generator for random selection of feature index.
39
+ # @return [Random]
40
+ attr_reader :rng
41
+
42
+ # Create a new classifier with random forest.
43
+ #
44
+ # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
45
+ # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
46
+ # @param max_depth [Integer] The maximum depth of the tree.
47
+ # If nil is given, decision tree grows without concern for depth.
48
+ # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
49
+ # If nil is given, number of leaves is not limited.
50
+ # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
51
+ # @param max_features [Integer] The number of features to consider when searching optimal split point.
52
+ # If nil is given, split process considers 'Math.sqrt(n_features)' features.
53
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
54
+ # If nil is given, the method does not execute in parallel.
55
+ # If zero or less is given, it becomes equal to the number of processors.
56
+ # This parameter is ignored if the Parallel gem is not loaded.
57
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
58
+ # It is used to randomly determine the order of features when deciding spliting point.
59
+ def initialize(n_estimators: 10,
60
+ criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
61
+ max_features: nil, n_jobs: nil, random_seed: nil)
62
+ super()
63
+ @params = {
64
+ n_estimators: n_estimators,
65
+ criterion: criterion,
66
+ max_depth: max_depth,
67
+ max_leaf_nodes: max_leaf_nodes,
68
+ min_samples_leaf: min_samples_leaf,
69
+ max_features: max_features,
70
+ n_jobs: n_jobs,
71
+ random_seed: random_seed || srand
72
+ }
73
+ @rng = Random.new(@params[:random_seed])
74
+ end
75
+
76
+ # Fit the model with given training data.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
79
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
80
+ # @return [RandomForestClassifier] The learned classifier itself.
81
+ def fit(x, y)
82
+ x = ::Rumale::Validation.check_convert_sample_array(x)
83
+ y = ::Rumale::Validation.check_convert_label_array(y)
84
+ ::Rumale::Validation.check_sample_size(x, y)
85
+
86
+ # Initialize some variables.
87
+ n_samples, n_features = x.shape
88
+ @params[:max_features] = Math.sqrt(n_features).to_i if @params[:max_features].nil?
89
+ @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
90
+ @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
91
+ sub_rng = @rng.dup
92
+ rngs = Array.new(@params[:n_estimators]) { Random.new(sub_rng.rand(::Rumale::Ensemble::Value::SEED_BASE)) }
93
+ # Construct forest.
94
+ @estimators =
95
+ if enable_parallel?
96
+ parallel_map(@params[:n_estimators]) do |n|
97
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
98
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], y[bootstrap_ids])
99
+ end
100
+ else
101
+ Array.new(@params[:n_estimators]) do |n|
102
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
103
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], y[bootstrap_ids])
104
+ end
105
+ end
106
+ @feature_importances =
107
+ if enable_parallel?
108
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.sum
109
+ else
110
+ @estimators.sum(&:feature_importances)
111
+ end
112
+ @feature_importances /= @feature_importances.sum
113
+ self
114
+ end
115
+
116
+ # Predict class labels for samples.
117
+ #
118
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
119
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
120
+ def predict(x)
121
+ x = ::Rumale::Validation.check_convert_sample_array(x)
122
+
123
+ n_samples = x.shape[0]
124
+ n_estimators = @estimators.size
125
+ predicted = if enable_parallel?
126
+ predict_set = parallel_map(n_estimators) { |n| @estimators[n].predict(x).to_a }.transpose
127
+ parallel_map(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first }
128
+ else
129
+ predict_set = @estimators.map { |tree| tree.predict(x).to_a }.transpose
130
+ Array.new(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first }
131
+ end
132
+ Numo::Int32.asarray(predicted)
133
+ end
134
+
135
+ # Predict probability for samples.
136
+ #
137
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
138
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
139
+ def predict_proba(x)
140
+ x = ::Rumale::Validation.check_convert_sample_array(x)
141
+
142
+ n_estimators = @estimators.size
143
+ if enable_parallel?
144
+ parallel_map(n_estimators) { |n| predict_proba_tree(@estimators[n], x) }.sum / n_estimators
145
+ else
146
+ @estimators.sum { |tree| predict_proba_tree(tree, x) } / n_estimators
147
+ end
148
+ end
149
+
150
+ # Return the index of the leaf that each sample reached.
151
+ #
152
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
153
+ # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
154
+ def apply(x)
155
+ x = ::Rumale::Validation.check_convert_sample_array(x)
156
+
157
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
158
+ end
159
+
160
+ private
161
+
162
+ def plant_tree(rnd_seed)
163
+ ::Rumale::Tree::DecisionTreeClassifier.new(
164
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
165
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
166
+ max_features: @params[:max_features], random_seed: rnd_seed
167
+ )
168
+ end
169
+
170
+ def predict_proba_tree(tree, x)
171
+ # initialize some variables.
172
+ n_samples = x.shape[0]
173
+ base_classes = @classes.to_a
174
+ n_classes = base_classes.size
175
+ class_ids = tree.classes.map { |c| base_classes.index(c) }
176
+ # predict probabilities.
177
+ probs = Numo::DFloat.zeros(n_samples, n_classes)
178
+ tree_probs = tree.predict_proba(x)
179
+ class_ids.each_with_index { |i, j| probs[true, i] = tree_probs[true, j] }
180
+ probs
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/regressor'
6
+ require 'rumale/tree/decision_tree_regressor'
7
+ require 'rumale/ensemble/value'
8
+
9
+ module Rumale
10
+ module Ensemble
11
+ # RandomForestRegressor is a class that implements random forest for regression
12
+ #
13
+ # @example
14
+ # require 'rumale/ensemble/random_forest_regressor'
15
+ #
16
+ # estimator =
17
+ # Rumale::Ensemble::RandomForestRegressor.new(
18
+ # n_estimators: 10, criterion: 'mse', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
19
+ # estimator.fit(training_samples, traininig_values)
20
+ # results = estimator.predict(testing_samples)
21
+ #
22
+ class RandomForestRegressor < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Regressor
24
+
25
+ # Return the set of estimators.
26
+ # @return [Array<DecisionTreeRegressor>]
27
+ attr_reader :estimators
28
+
29
+ # Return the importance for each feature.
30
+ # @return [Numo::DFloat] (size: n_features)
31
+ attr_reader :feature_importances
32
+
33
+ # Return the random generator for random selection of feature index.
34
+ # @return [Random]
35
+ attr_reader :rng
36
+
37
+ # Create a new regressor with random forest.
38
+ #
39
+ # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
40
+ # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
41
+ # @param max_depth [Integer] The maximum depth of the tree.
42
+ # If nil is given, decision tree grows without concern for depth.
43
+ # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
44
+ # If nil is given, number of leaves is not limited.
45
+ # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
46
+ # @param max_features [Integer] The number of features to consider when searching optimal split point.
47
+ # If nil is given, split process considers 'Math.sqrt(n_features)' features.
48
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
49
+ # If nil is given, the methods do not execute in parallel.
50
+ # If zero or less is given, it becomes equal to the number of processors.
51
+ # This parameter is ignored if the Parallel gem is not loaded.
52
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
53
+ # It is used to randomly determine the order of features when deciding spliting point.
54
+ def initialize(n_estimators: 10,
55
+ criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
56
+ max_features: nil, n_jobs: nil, random_seed: nil)
57
+ super()
58
+ @params = {
59
+ n_estimators: n_estimators,
60
+ criterion: criterion,
61
+ max_depth: max_depth,
62
+ max_leaf_nodes: max_leaf_nodes,
63
+ min_samples_leaf: min_samples_leaf,
64
+ max_features: max_features,
65
+ n_jobs: n_jobs,
66
+ random_seed: random_seed || srand
67
+ }
68
+ @rng = Random.new(@params[:random_seed])
69
+ end
70
+
71
+ # Fit the model with given training data.
72
+ #
73
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
74
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
75
+ # @return [RandomForestRegressor] The learned regressor itself.
76
+ def fit(x, y)
77
+ x = ::Rumale::Validation.check_convert_sample_array(x)
78
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
79
+ ::Rumale::Validation.check_sample_size(x, y)
80
+
81
+ # Initialize some variables.
82
+ n_samples, n_features = x.shape
83
+ @params[:max_features] = Math.sqrt(n_features).to_i if @params[:max_features].nil?
84
+ @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
85
+ single_target = y.shape[1].nil?
86
+ sub_rng = @rng.dup
87
+ rngs = Array.new(@params[:n_estimators]) { Random.new(sub_rng.rand(::Rumale::Ensemble::Value::SEED_BASE)) }
88
+ # Construct forest.
89
+ @estimators =
90
+ if enable_parallel?
91
+ parallel_map(@params[:n_estimators]) do |n|
92
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
93
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], single_target ? y[bootstrap_ids] : y[bootstrap_ids, true])
94
+ end
95
+ else
96
+ Array.new(@params[:n_estimators]) do |n|
97
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
98
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], single_target ? y[bootstrap_ids] : y[bootstrap_ids, true])
99
+ end
100
+ end
101
+ @feature_importances =
102
+ if enable_parallel?
103
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.sum
104
+ else
105
+ @estimators.sum(&:feature_importances)
106
+ end
107
+ @feature_importances /= @feature_importances.sum
108
+ self
109
+ end
110
+
111
+ # Predict values for samples.
112
+ #
113
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
114
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
115
+ def predict(x)
116
+ x = ::Rumale::Validation.check_convert_sample_array(x)
117
+
118
+ if enable_parallel?
119
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.sum / @params[:n_estimators]
120
+ else
121
+ @estimators.sum { |tree| tree.predict(x) } / @params[:n_estimators]
122
+ end
123
+ end
124
+
125
+ # Return the index of the leaf that each sample reached.
126
+ #
127
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to assign each leaf.
128
+ # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
129
+ def apply(x)
130
+ x = ::Rumale::Validation.check_convert_sample_array(x)
131
+
132
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
133
+ end
134
+
135
+ private
136
+
137
+ def plant_tree(rnd_seed)
138
+ ::Rumale::Tree::DecisionTreeRegressor.new(
139
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
140
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
141
+ max_features: @params[:max_features], random_seed: rnd_seed
142
+ )
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/classifier'
6
+ require 'rumale/linear_model/logistic_regression'
7
+ require 'rumale/model_selection/stratified_k_fold'
8
+ require 'rumale/preprocessing/label_encoder'
9
+
10
+ module Rumale
11
+ module Ensemble
12
+ # StackingClassifier is a class that implements classifier with stacking method.
13
+ #
14
+ # @example
15
+ # require 'rumale/ensemble/stacking_classifier'
16
+ #
17
+ # estimators = {
18
+ # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
19
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
20
+ # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
21
+ # }
22
+ # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
23
+ # classifier = Rumale::Ensemble::StackedClassifier.new(
24
+ # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
25
+ # )
26
+ # classifier.fit(training_samples, training_labels)
27
+ # results = classifier.predict(testing_samples)
28
+ #
29
+ # *Reference*
30
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
31
+ class StackingClassifier < ::Rumale::Base::Estimator
32
+ include ::Rumale::Base::Classifier
33
+
34
+ # Return the base classifiers.
35
+ # @return [Hash<Symbol,Classifier>]
36
+ attr_reader :estimators
37
+
38
+ # Return the meta classifier.
39
+ # @return [Classifier]
40
+ attr_reader :meta_estimator
41
+
42
+ # Return the class labels.
43
+ # @return [Numo::Int32] (size: n_classes)
44
+ attr_reader :classes
45
+
46
+ # Return the method used by each base classifier.
47
+ # @return [Hash<Symbol,Symbol>]
48
+ attr_reader :stack_method
49
+
50
+ # Create a new classifier with stacking method.
51
+ #
52
+ # @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
53
+ # @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
54
+ # If nil is given, LogisticRegression is used.
55
+ # @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
56
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
57
+ # @param stack_method [String] The method name of base classifier for using meta feature extraction.
58
+ # If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
59
+ # on each classifier.
60
+ # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
61
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
62
+ def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false,
63
+ random_seed: nil)
64
+ super()
65
+ @estimators = estimators
66
+ @meta_estimator = meta_estimator || ::Rumale::LinearModel::LogisticRegression.new
67
+ @params = {
68
+ n_splits: n_splits,
69
+ shuffle: shuffle,
70
+ stack_method: stack_method,
71
+ passthrough: passthrough,
72
+ random_seed: random_seed || srand
73
+ }
74
+ end
75
+
76
+ # Fit the model with given training data.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
79
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
80
+ # @return [StackedClassifier] The learned classifier itself.
81
+ def fit(x, y)
82
+ x = ::Rumale::Validation.check_convert_sample_array(x)
83
+ y = ::Rumale::Validation.check_convert_label_array(y)
84
+ ::Rumale::Validation.check_sample_size(x, y)
85
+
86
+ n_samples, n_features = x.shape
87
+
88
+ @encoder = ::Rumale::Preprocessing::LabelEncoder.new
89
+ y_encoded = @encoder.fit_transform(y)
90
+ @classes = Numo::NArray[*@encoder.classes]
91
+
92
+ # training base classifiers with all training data.
93
+ @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
94
+
95
+ # detecting feature extraction method and its size of output for each base classifier.
96
+ @stack_method = detect_stack_method
97
+ @output_size = detect_output_size(n_features)
98
+
99
+ # extracting meta features with base classifiers.
100
+ n_components = @output_size.values.sum
101
+ z = Numo::DFloat.zeros(n_samples, n_components)
102
+
103
+ kf = ::Rumale::ModelSelection::StratifiedKFold.new(
104
+ n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
105
+ )
106
+
107
+ kf.split(x, y_encoded).each do |train_ids, valid_ids|
108
+ x_train = x[train_ids, true]
109
+ y_train = y_encoded[train_ids]
110
+ x_valid = x[valid_ids, true]
111
+ f_start = 0
112
+ @estimators.each_key do |name|
113
+ est_fold = Marshal.load(Marshal.dump(@estimators[name]))
114
+ f_last = f_start + @output_size[name]
115
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
116
+ z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
117
+ f_start = f_last
118
+ end
119
+ end
120
+
121
+ # concatenating original features.
122
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
123
+
124
+ # training meta classifier.
125
+ @meta_estimator.fit(z, y_encoded)
126
+
127
+ self
128
+ end
129
+
130
+ # Calculate confidence scores for samples.
131
+ #
132
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
133
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
134
+ def decision_function(x)
135
+ x = ::Rumale::Validation.check_convert_sample_array(x)
136
+
137
+ z = transform(x)
138
+ @meta_estimator.decision_function(z)
139
+ end
140
+
141
+ # Predict class labels for samples.
142
+ #
143
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
144
+ # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
145
+ def predict(x)
146
+ x = ::Rumale::Validation.check_convert_sample_array(x)
147
+
148
+ z = transform(x)
149
+ Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
150
+ end
151
+
152
+ # Predict probability for samples.
153
+ #
154
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
155
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
156
+ def predict_proba(x)
157
+ x = ::Rumale::Validation.check_convert_sample_array(x)
158
+
159
+ z = transform(x)
160
+ @meta_estimator.predict_proba(z)
161
+ end
162
+
163
+ # Transform the given data with the learned model.
164
+ #
165
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
166
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
167
+ def transform(x)
168
+ x = ::Rumale::Validation.check_convert_sample_array(x)
169
+
170
+ n_samples = x.shape[0]
171
+ n_components = @output_size.values.sum
172
+ z = Numo::DFloat.zeros(n_samples, n_components)
173
+ f_start = 0
174
+ @estimators.each_key do |name|
175
+ f_last = f_start + @output_size[name]
176
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
177
+ z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
178
+ f_start = f_last
179
+ end
180
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
181
+ z
182
+ end
183
+
184
+ # Fit the model with training data, and then transform them with the learned model.
185
+ #
186
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
187
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
188
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
189
+ def fit_transform(x, y)
190
+ x = ::Rumale::Validation.check_convert_sample_array(x)
191
+ y = ::Rumale::Validation.check_convert_label_array(y)
192
+ ::Rumale::Validation.check_sample_size(x, y)
193
+
194
+ fit(x, y).transform(x)
195
+ end
196
+
197
+ private
198
+
199
+ STACK_METHODS = %i[predict_proba decision_function predict].freeze
200
+
201
+ private_constant :STACK_METHODS
202
+
203
+ def detect_stack_method
204
+ if @params[:stack_method] == 'auto'
205
+ @estimators.each_key.with_object({}) do |name, obj|
206
+ obj[name] = STACK_METHODS.detect do |m|
207
+ @estimators[name].respond_to?(m)
208
+ end
209
+ end
210
+ else
211
+ @estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
212
+ end
213
+ end
214
+
215
+ def detect_output_size(n_features)
216
+ x_dummy = Numo::DFloat.new(2, n_features).rand
217
+ @estimators.each_key.with_object({}) do |name, obj|
218
+ output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
219
+ obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
220
+ end
221
+ end
222
+ end
223
+ end
224
+ end