rumale-ensemble 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/classifier'
6
+ require 'rumale/tree/decision_tree_classifier'
7
+ require 'rumale/ensemble/value'
8
+
9
+ module Rumale
10
+ # This module consists of the classes that implement ensemble-based methods.
11
+ module Ensemble
12
+ # RandomForestClassifier is a class that implements random forest for classification.
13
+ #
14
+ # @example
15
+ # require 'rumale/ensemble/random_forest_classifier'
16
+ #
17
+ # estimator =
18
+ # Rumale::Ensemble::RandomForestClassifier.new(
19
+ # n_estimators: 10, criterion: 'gini', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
20
+ # estimator.fit(training_samples, traininig_labels)
21
+ # results = estimator.predict(testing_samples)
22
+ #
23
+ class RandomForestClassifier < ::Rumale::Base::Estimator
24
+ include ::Rumale::Base::Classifier
25
+
26
+ # Return the set of estimators.
27
+ # @return [Array<DecisionTreeClassifier>]
28
+ attr_reader :estimators
29
+
30
+ # Return the class labels.
31
+ # @return [Numo::Int32] (size: n_classes)
32
+ attr_reader :classes
33
+
34
+ # Return the importance for each feature.
35
+ # @return [Numo::DFloat] (size: n_features)
36
+ attr_reader :feature_importances
37
+
38
+ # Return the random generator for random selection of feature index.
39
+ # @return [Random]
40
+ attr_reader :rng
41
+
42
+ # Create a new classifier with random forest.
43
+ #
44
+ # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
45
+ # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
46
+ # @param max_depth [Integer] The maximum depth of the tree.
47
+ # If nil is given, decision tree grows without concern for depth.
48
+ # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
49
+ # If nil is given, number of leaves is not limited.
50
+ # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
51
+ # @param max_features [Integer] The number of features to consider when searching optimal split point.
52
+ # If nil is given, split process considers 'Math.sqrt(n_features)' features.
53
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
54
+ # If nil is given, the method does not execute in parallel.
55
+ # If zero or less is given, it becomes equal to the number of processors.
56
+ # This parameter is ignored if the Parallel gem is not loaded.
57
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
58
+ # It is used to randomly determine the order of features when deciding spliting point.
59
+ def initialize(n_estimators: 10,
60
+ criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
61
+ max_features: nil, n_jobs: nil, random_seed: nil)
62
+ super()
63
+ @params = {
64
+ n_estimators: n_estimators,
65
+ criterion: criterion,
66
+ max_depth: max_depth,
67
+ max_leaf_nodes: max_leaf_nodes,
68
+ min_samples_leaf: min_samples_leaf,
69
+ max_features: max_features,
70
+ n_jobs: n_jobs,
71
+ random_seed: random_seed || srand
72
+ }
73
+ @rng = Random.new(@params[:random_seed])
74
+ end
75
+
76
+ # Fit the model with given training data.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
79
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
80
+ # @return [RandomForestClassifier] The learned classifier itself.
81
+ def fit(x, y)
82
+ x = ::Rumale::Validation.check_convert_sample_array(x)
83
+ y = ::Rumale::Validation.check_convert_label_array(y)
84
+ ::Rumale::Validation.check_sample_size(x, y)
85
+
86
+ # Initialize some variables.
87
+ n_samples, n_features = x.shape
88
+ @params[:max_features] = Math.sqrt(n_features).to_i if @params[:max_features].nil?
89
+ @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
90
+ @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
91
+ sub_rng = @rng.dup
92
+ rngs = Array.new(@params[:n_estimators]) { Random.new(sub_rng.rand(::Rumale::Ensemble::Value::SEED_BASE)) }
93
+ # Construct forest.
94
+ @estimators =
95
+ if enable_parallel?
96
+ parallel_map(@params[:n_estimators]) do |n|
97
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
98
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], y[bootstrap_ids])
99
+ end
100
+ else
101
+ Array.new(@params[:n_estimators]) do |n|
102
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
103
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], y[bootstrap_ids])
104
+ end
105
+ end
106
+ @feature_importances =
107
+ if enable_parallel?
108
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.sum
109
+ else
110
+ @estimators.sum(&:feature_importances)
111
+ end
112
+ @feature_importances /= @feature_importances.sum
113
+ self
114
+ end
115
+
116
+ # Predict class labels for samples.
117
+ #
118
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
119
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
120
+ def predict(x)
121
+ x = ::Rumale::Validation.check_convert_sample_array(x)
122
+
123
+ n_samples = x.shape[0]
124
+ n_estimators = @estimators.size
125
+ predicted = if enable_parallel?
126
+ predict_set = parallel_map(n_estimators) { |n| @estimators[n].predict(x).to_a }.transpose
127
+ parallel_map(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first }
128
+ else
129
+ predict_set = @estimators.map { |tree| tree.predict(x).to_a }.transpose
130
+ Array.new(n_samples) { |n| predict_set[n].group_by { |v| v }.max_by { |_k, v| v.size }.first }
131
+ end
132
+ Numo::Int32.asarray(predicted)
133
+ end
134
+
135
+ # Predict probability for samples.
136
+ #
137
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
138
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
139
+ def predict_proba(x)
140
+ x = ::Rumale::Validation.check_convert_sample_array(x)
141
+
142
+ n_estimators = @estimators.size
143
+ if enable_parallel?
144
+ parallel_map(n_estimators) { |n| predict_proba_tree(@estimators[n], x) }.sum / n_estimators
145
+ else
146
+ @estimators.sum { |tree| predict_proba_tree(tree, x) } / n_estimators
147
+ end
148
+ end
149
+
150
+ # Return the index of the leaf that each sample reached.
151
+ #
152
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
153
+ # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
154
+ def apply(x)
155
+ x = ::Rumale::Validation.check_convert_sample_array(x)
156
+
157
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
158
+ end
159
+
160
+ private
161
+
162
+ def plant_tree(rnd_seed)
163
+ ::Rumale::Tree::DecisionTreeClassifier.new(
164
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
165
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
166
+ max_features: @params[:max_features], random_seed: rnd_seed
167
+ )
168
+ end
169
+
170
+ def predict_proba_tree(tree, x)
171
+ # initialize some variables.
172
+ n_samples = x.shape[0]
173
+ base_classes = @classes.to_a
174
+ n_classes = base_classes.size
175
+ class_ids = tree.classes.map { |c| base_classes.index(c) }
176
+ # predict probabilities.
177
+ probs = Numo::DFloat.zeros(n_samples, n_classes)
178
+ tree_probs = tree.predict_proba(x)
179
+ class_ids.each_with_index { |i, j| probs[true, i] = tree_probs[true, j] }
180
+ probs
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/regressor'
6
+ require 'rumale/tree/decision_tree_regressor'
7
+ require 'rumale/ensemble/value'
8
+
9
+ module Rumale
10
+ module Ensemble
11
+ # RandomForestRegressor is a class that implements random forest for regression
12
+ #
13
+ # @example
14
+ # require 'rumale/ensemble/random_forest_regressor'
15
+ #
16
+ # estimator =
17
+ # Rumale::Ensemble::RandomForestRegressor.new(
18
+ # n_estimators: 10, criterion: 'mse', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
19
+ # estimator.fit(training_samples, traininig_values)
20
+ # results = estimator.predict(testing_samples)
21
+ #
22
+ class RandomForestRegressor < ::Rumale::Base::Estimator
23
+ include ::Rumale::Base::Regressor
24
+
25
+ # Return the set of estimators.
26
+ # @return [Array<DecisionTreeRegressor>]
27
+ attr_reader :estimators
28
+
29
+ # Return the importance for each feature.
30
+ # @return [Numo::DFloat] (size: n_features)
31
+ attr_reader :feature_importances
32
+
33
+ # Return the random generator for random selection of feature index.
34
+ # @return [Random]
35
+ attr_reader :rng
36
+
37
+ # Create a new regressor with random forest.
38
+ #
39
+ # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
40
+ # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
41
+ # @param max_depth [Integer] The maximum depth of the tree.
42
+ # If nil is given, decision tree grows without concern for depth.
43
+ # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
44
+ # If nil is given, number of leaves is not limited.
45
+ # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
46
+ # @param max_features [Integer] The number of features to consider when searching optimal split point.
47
+ # If nil is given, split process considers 'Math.sqrt(n_features)' features.
48
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
49
+ # If nil is given, the methods do not execute in parallel.
50
+ # If zero or less is given, it becomes equal to the number of processors.
51
+ # This parameter is ignored if the Parallel gem is not loaded.
52
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
53
+ # It is used to randomly determine the order of features when deciding spliting point.
54
+ def initialize(n_estimators: 10,
55
+ criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
56
+ max_features: nil, n_jobs: nil, random_seed: nil)
57
+ super()
58
+ @params = {
59
+ n_estimators: n_estimators,
60
+ criterion: criterion,
61
+ max_depth: max_depth,
62
+ max_leaf_nodes: max_leaf_nodes,
63
+ min_samples_leaf: min_samples_leaf,
64
+ max_features: max_features,
65
+ n_jobs: n_jobs,
66
+ random_seed: random_seed || srand
67
+ }
68
+ @rng = Random.new(@params[:random_seed])
69
+ end
70
+
71
+ # Fit the model with given training data.
72
+ #
73
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
74
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
75
+ # @return [RandomForestRegressor] The learned regressor itself.
76
+ def fit(x, y)
77
+ x = ::Rumale::Validation.check_convert_sample_array(x)
78
+ y = ::Rumale::Validation.check_convert_target_value_array(y)
79
+ ::Rumale::Validation.check_sample_size(x, y)
80
+
81
+ # Initialize some variables.
82
+ n_samples, n_features = x.shape
83
+ @params[:max_features] = Math.sqrt(n_features).to_i if @params[:max_features].nil?
84
+ @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
85
+ single_target = y.shape[1].nil?
86
+ sub_rng = @rng.dup
87
+ rngs = Array.new(@params[:n_estimators]) { Random.new(sub_rng.rand(::Rumale::Ensemble::Value::SEED_BASE)) }
88
+ # Construct forest.
89
+ @estimators =
90
+ if enable_parallel?
91
+ parallel_map(@params[:n_estimators]) do |n|
92
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
93
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], single_target ? y[bootstrap_ids] : y[bootstrap_ids, true])
94
+ end
95
+ else
96
+ Array.new(@params[:n_estimators]) do |n|
97
+ bootstrap_ids = Array.new(n_samples) { rngs[n].rand(0...n_samples) }
98
+ plant_tree(rngs[n].seed).fit(x[bootstrap_ids, true], single_target ? y[bootstrap_ids] : y[bootstrap_ids, true])
99
+ end
100
+ end
101
+ @feature_importances =
102
+ if enable_parallel?
103
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.sum
104
+ else
105
+ @estimators.sum(&:feature_importances)
106
+ end
107
+ @feature_importances /= @feature_importances.sum
108
+ self
109
+ end
110
+
111
+ # Predict values for samples.
112
+ #
113
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
114
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
115
+ def predict(x)
116
+ x = ::Rumale::Validation.check_convert_sample_array(x)
117
+
118
+ if enable_parallel?
119
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.sum / @params[:n_estimators]
120
+ else
121
+ @estimators.sum { |tree| tree.predict(x) } / @params[:n_estimators]
122
+ end
123
+ end
124
+
125
+ # Return the index of the leaf that each sample reached.
126
+ #
127
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to assign each leaf.
128
+ # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
129
+ def apply(x)
130
+ x = ::Rumale::Validation.check_convert_sample_array(x)
131
+
132
+ Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
133
+ end
134
+
135
+ private
136
+
137
+ def plant_tree(rnd_seed)
138
+ ::Rumale::Tree::DecisionTreeRegressor.new(
139
+ criterion: @params[:criterion], max_depth: @params[:max_depth],
140
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
141
+ max_features: @params[:max_features], random_seed: rnd_seed
142
+ )
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/classifier'
6
+ require 'rumale/linear_model/logistic_regression'
7
+ require 'rumale/model_selection/stratified_k_fold'
8
+ require 'rumale/preprocessing/label_encoder'
9
+
10
+ module Rumale
11
+ module Ensemble
12
+ # StackingClassifier is a class that implements classifier with stacking method.
13
+ #
14
+ # @example
15
+ # require 'rumale/ensemble/stacking_classifier'
16
+ #
17
+ # estimators = {
18
+ # lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
19
+ # mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
20
+ # rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
21
+ # }
22
+ # meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
23
+ # classifier = Rumale::Ensemble::StackedClassifier.new(
24
+ # estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
25
+ # )
26
+ # classifier.fit(training_samples, training_labels)
27
+ # results = classifier.predict(testing_samples)
28
+ #
29
+ # *Reference*
30
+ # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
31
+ class StackingClassifier < ::Rumale::Base::Estimator
32
+ include ::Rumale::Base::Classifier
33
+
34
+ # Return the base classifiers.
35
+ # @return [Hash<Symbol,Classifier>]
36
+ attr_reader :estimators
37
+
38
+ # Return the meta classifier.
39
+ # @return [Classifier]
40
+ attr_reader :meta_estimator
41
+
42
+ # Return the class labels.
43
+ # @return [Numo::Int32] (size: n_classes)
44
+ attr_reader :classes
45
+
46
+ # Return the method used by each base classifier.
47
+ # @return [Hash<Symbol,Symbol>]
48
+ attr_reader :stack_method
49
+
50
+ # Create a new classifier with stacking method.
51
+ #
52
+ # @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
53
+ # @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
54
+ # If nil is given, LogisticRegression is used.
55
+ # @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
56
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
57
+ # @param stack_method [String] The method name of base classifier for using meta feature extraction.
58
+ # If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
59
+ # on each classifier.
60
+ # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
61
+ # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
62
+ def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false,
63
+ random_seed: nil)
64
+ super()
65
+ @estimators = estimators
66
+ @meta_estimator = meta_estimator || ::Rumale::LinearModel::LogisticRegression.new
67
+ @params = {
68
+ n_splits: n_splits,
69
+ shuffle: shuffle,
70
+ stack_method: stack_method,
71
+ passthrough: passthrough,
72
+ random_seed: random_seed || srand
73
+ }
74
+ end
75
+
76
+ # Fit the model with given training data.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
79
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
80
+ # @return [StackedClassifier] The learned classifier itself.
81
+ def fit(x, y)
82
+ x = ::Rumale::Validation.check_convert_sample_array(x)
83
+ y = ::Rumale::Validation.check_convert_label_array(y)
84
+ ::Rumale::Validation.check_sample_size(x, y)
85
+
86
+ n_samples, n_features = x.shape
87
+
88
+ @encoder = ::Rumale::Preprocessing::LabelEncoder.new
89
+ y_encoded = @encoder.fit_transform(y)
90
+ @classes = Numo::NArray[*@encoder.classes]
91
+
92
+ # training base classifiers with all training data.
93
+ @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
94
+
95
+ # detecting feature extraction method and its size of output for each base classifier.
96
+ @stack_method = detect_stack_method
97
+ @output_size = detect_output_size(n_features)
98
+
99
+ # extracting meta features with base classifiers.
100
+ n_components = @output_size.values.sum
101
+ z = Numo::DFloat.zeros(n_samples, n_components)
102
+
103
+ kf = ::Rumale::ModelSelection::StratifiedKFold.new(
104
+ n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
105
+ )
106
+
107
+ kf.split(x, y_encoded).each do |train_ids, valid_ids|
108
+ x_train = x[train_ids, true]
109
+ y_train = y_encoded[train_ids]
110
+ x_valid = x[valid_ids, true]
111
+ f_start = 0
112
+ @estimators.each_key do |name|
113
+ est_fold = Marshal.load(Marshal.dump(@estimators[name]))
114
+ f_last = f_start + @output_size[name]
115
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
116
+ z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
117
+ f_start = f_last
118
+ end
119
+ end
120
+
121
+ # concatenating original features.
122
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
123
+
124
+ # training meta classifier.
125
+ @meta_estimator.fit(z, y_encoded)
126
+
127
+ self
128
+ end
129
+
130
+ # Calculate confidence scores for samples.
131
+ #
132
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
133
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
134
+ def decision_function(x)
135
+ x = ::Rumale::Validation.check_convert_sample_array(x)
136
+
137
+ z = transform(x)
138
+ @meta_estimator.decision_function(z)
139
+ end
140
+
141
+ # Predict class labels for samples.
142
+ #
143
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
144
+ # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
145
+ def predict(x)
146
+ x = ::Rumale::Validation.check_convert_sample_array(x)
147
+
148
+ z = transform(x)
149
+ Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
150
+ end
151
+
152
+ # Predict probability for samples.
153
+ #
154
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
155
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
156
+ def predict_proba(x)
157
+ x = ::Rumale::Validation.check_convert_sample_array(x)
158
+
159
+ z = transform(x)
160
+ @meta_estimator.predict_proba(z)
161
+ end
162
+
163
+ # Transform the given data with the learned model.
164
+ #
165
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
166
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
167
+ def transform(x)
168
+ x = ::Rumale::Validation.check_convert_sample_array(x)
169
+
170
+ n_samples = x.shape[0]
171
+ n_components = @output_size.values.sum
172
+ z = Numo::DFloat.zeros(n_samples, n_components)
173
+ f_start = 0
174
+ @estimators.each_key do |name|
175
+ f_last = f_start + @output_size[name]
176
+ f_position = @output_size[name] == 1 ? f_start : f_start...f_last
177
+ z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
178
+ f_start = f_last
179
+ end
180
+ z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
181
+ z
182
+ end
183
+
184
+ # Fit the model with training data, and then transform them with the learned model.
185
+ #
186
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
187
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
188
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
189
+ def fit_transform(x, y)
190
+ x = ::Rumale::Validation.check_convert_sample_array(x)
191
+ y = ::Rumale::Validation.check_convert_label_array(y)
192
+ ::Rumale::Validation.check_sample_size(x, y)
193
+
194
+ fit(x, y).transform(x)
195
+ end
196
+
197
+ private
198
+
199
+ STACK_METHODS = %i[predict_proba decision_function predict].freeze
200
+
201
+ private_constant :STACK_METHODS
202
+
203
+ def detect_stack_method
204
+ if @params[:stack_method] == 'auto'
205
+ @estimators.each_key.with_object({}) do |name, obj|
206
+ obj[name] = STACK_METHODS.detect do |m|
207
+ @estimators[name].respond_to?(m)
208
+ end
209
+ end
210
+ else
211
+ @estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
212
+ end
213
+ end
214
+
215
+ def detect_output_size(n_features)
216
+ x_dummy = Numo::DFloat.new(2, n_features).rand
217
+ @estimators.each_key.with_object({}) do |name, obj|
218
+ output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
219
+ obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
220
+ end
221
+ end
222
+ end
223
+ end
224
+ end