rumale-ensemble 0.24.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,296 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/classifier'
6
+ require 'rumale/tree/gradient_tree_regressor'
7
+ require 'rumale/ensemble/value'
8
+
9
+ module Rumale
10
+ module Ensemble
11
+ # GradientBoostingClassifier is a class that implements gradient tree boosting for classification.
12
+ # The class use negative binomial log-likelihood for the loss function.
13
+ # For multiclass classification problem, it uses one-vs-the-rest strategy.
14
+ #
15
+ # @example
16
+ # require 'rumale/ensemble/gradient_boosting_classifier'
17
+ #
18
+ # estimator =
19
+ # Rumale::Ensemble::GradientBoostingClassifier.new(
20
+ # n_estimators: 100, learning_rate: 0.3, reg_lambda: 0.001, random_seed: 1)
21
+ # estimator.fit(training_samples, traininig_values)
22
+ # results = estimator.predict(testing_samples)
23
+ #
24
+ # *Reference*
25
+ # - Friedman, J H., "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics, 29 (5), pp. 1189--1232, 2001.
26
+ # - Friedman, J H., "Stochastic Gradient Boosting," Computational Statistics and Data Analysis, 38 (4), pp. 367--378, 2002.
27
+ # - Chen, T., and Guestrin, C., "XGBoost: A Scalable Tree Boosting System," Proc. KDD'16, pp. 785--794, 2016.
28
+ #
29
+ class GradientBoostingClassifier < ::Rumale::Base::Estimator # rubocop:disable Metrics/ClassLength
30
+ include ::Rumale::Base::Classifier
31
+
32
+ # Return the set of estimators.
33
+ # @return [Array<GradientTreeRegressor>] or [Array<Array<GradientTreeRegressor>>]
34
+ attr_reader :estimators
35
+
36
+ # Return the class labels.
37
+ # @return [Numo::Int32] (size: n_classes)
38
+ attr_reader :classes
39
+
40
+ # Return the importance for each feature.
41
+ # The feature importances are calculated based on the numbers of times the feature is used for splitting.
42
+ # @return [Numo::DFloat] (size: n_features)
43
+ attr_reader :feature_importances
44
+
45
+ # Return the random generator for random selection of feature index.
46
+ # @return [Random]
47
+ attr_reader :rng
48
+
49
+ # Create a new classifier with gradient tree boosting.
50
+ #
51
+ # @param n_estimators [Integer] The numeber of trees for contructing classifier.
52
+ # @param learning_rate [Float] The boosting learining rate
53
+ # @param reg_lambda [Float] The L2 regularization term on weight.
54
+ # @param subsample [Float] The subsampling ratio of the training samples.
55
+ # @param max_depth [Integer] The maximum depth of the tree.
56
+ # If nil is given, decision tree grows without concern for depth.
57
+ # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
58
+ # If nil is given, number of leaves is not limited.
59
+ # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
60
+ # @param max_features [Integer] The number of features to consider when searching optimal split point.
61
+ # If nil is given, split process considers all features.
62
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
63
+ # If nil is given, the methods do not execute in parallel.
64
+ # If zero or less is given, it becomes equal to the number of processors.
65
+ # This parameter is ignored if the Parallel gem is not loaded.
66
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
67
+ # It is used to randomly determine the order of features when deciding spliting point.
68
+ def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
69
+ max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
70
+ max_features: nil, n_jobs: nil, random_seed: nil)
71
+ super()
72
+ @params = {
73
+ n_estimators: n_estimators,
74
+ learning_rate: learning_rate,
75
+ reg_lambda: reg_lambda,
76
+ subsample: subsample,
77
+ max_depth: max_depth,
78
+ max_leaf_nodes: max_leaf_nodes,
79
+ min_samples_leaf: min_samples_leaf,
80
+ max_features: max_features,
81
+ n_jobs: n_jobs,
82
+ random_seed: random_seed || srand
83
+ }
84
+ @rng = Random.new(@params[:random_seed])
85
+ end
86
+
87
+ # Fit the model with given training data.
88
+ #
89
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
90
+ # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
91
+ # @return [GradientBoostingClassifier] The learned classifier itself.
92
+ def fit(x, y)
93
+ x = ::Rumale::Validation.check_convert_sample_array(x)
94
+ y = ::Rumale::Validation.check_convert_label_array(y)
95
+ ::Rumale::Validation.check_sample_size(x, y)
96
+
97
+ # initialize some variables.
98
+ n_features = x.shape[1]
99
+ @params[:max_features] = n_features if @params[:max_features].nil?
100
+ @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
101
+ @classes = Numo::Int32[*y.to_a.uniq.sort]
102
+ n_classes = @classes.size
103
+ # train estimator.
104
+ if n_classes > 2
105
+ @base_predictions = multiclass_base_predictions(y)
106
+ @estimators = multiclass_estimators(x, y)
107
+ else
108
+ negative_label = y.to_a.uniq.min
109
+ bin_y = Numo::DFloat.cast(y.ne(negative_label)) * 2 - 1
110
+ y_mean = bin_y.mean
111
+ @base_predictions = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
112
+ @estimators = partial_fit(x, bin_y, @base_predictions)
113
+ end
114
+ # calculate feature importances.
115
+ @feature_importances = if n_classes > 2
116
+ multiclass_feature_importances
117
+ else
118
+ @estimators.sum(&:feature_importances)
119
+ end
120
+ self
121
+ end
122
+
123
+ # Calculate confidence scores for samples.
124
+ #
125
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
126
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
127
+ def decision_function(x)
128
+ x = ::Rumale::Validation.check_convert_sample_array(x)
129
+
130
+ n_classes = @classes.size
131
+ if n_classes > 2
132
+ multiclass_scores(x)
133
+ else
134
+ @estimators.sum { |tree| tree.predict(x) } + @base_predictions
135
+ end
136
+ end
137
+
138
+ # Predict class labels for samples.
139
+ #
140
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
141
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
142
+ def predict(x)
143
+ x = ::Rumale::Validation.check_convert_sample_array(x)
144
+
145
+ n_samples = x.shape[0]
146
+ probs = predict_proba(x)
147
+ Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[probs[n, true].max_index] })
148
+ end
149
+
150
+ # Predict probability for samples.
151
+ #
152
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
153
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
154
+ def predict_proba(x)
155
+ x = ::Rumale::Validation.check_convert_sample_array(x)
156
+
157
+ proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
158
+
159
+ return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
160
+
161
+ n_samples, = x.shape
162
+ probs = Numo::DFloat.zeros(n_samples, 2)
163
+ probs[true, 1] = proba
164
+ probs[true, 0] = 1.0 - proba
165
+ probs
166
+ end
167
+
168
+ # Return the index of the leaf that each sample reached.
169
+ #
170
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
171
+ # @return [Numo::Int32] (shape: [n_samples, n_estimators, n_classes]) Leaf index for sample.
172
+ def apply(x)
173
+ x = ::Rumale::Validation.check_convert_sample_array(x)
174
+
175
+ n_classes = @classes.size
176
+ leaf_ids = if n_classes > 2
177
+ Array.new(n_classes) { |n| @estimators[n].map { |tree| tree.apply(x) } }
178
+ else
179
+ @estimators.map { |tree| tree.apply(x) }
180
+ end
181
+ Numo::Int32[*leaf_ids].transpose.dup
182
+ end
183
+
184
+ private
185
+
186
+ def partial_fit(x, y, init_pred)
187
+ # initialize some variables.
188
+ estimators = []
189
+ n_samples = x.shape[0]
190
+ n_sub_samples = [n_samples, [(n_samples * @params[:subsample]).to_i, 1].max].min
191
+ whole_ids = Array.new(n_samples) { |v| v }
192
+ y_pred = Numo::DFloat.ones(n_samples) * init_pred
193
+ sub_rng = @rng.dup
194
+ # grow trees.
195
+ @params[:n_estimators].times do |_t|
196
+ # subsampling
197
+ ids = whole_ids.sample(n_sub_samples, random: sub_rng)
198
+ x_sub = x[ids, true]
199
+ y_sub = y[ids]
200
+ y_pred_sub = y_pred[ids]
201
+ # train tree
202
+ g = gradient(y_sub, y_pred_sub)
203
+ h = hessian(y_sub, y_pred_sub)
204
+ tree = plant_tree(sub_rng)
205
+ tree.fit(x_sub, y_sub, g, h)
206
+ estimators.push(tree)
207
+ # update
208
+ y_pred += tree.predict(x)
209
+ end
210
+ estimators
211
+ end
212
+
213
+ # for debug
214
+ #
215
+ # def loss(y_true, y_pred)
216
+ # # y_true in {-1, 1}
217
+ # Numo::NMath.log(1.0 + Numo::NMath.exp(-2.0 * y_true * y_pred)).mean
218
+ # end
219
+
220
+ def gradient(y_true, y_pred)
221
+ # y in {-1, 1}
222
+ -2.0 * y_true / (1.0 + Numo::NMath.exp(2.0 * y_true * y_pred))
223
+ end
224
+
225
+ def hessian(y_true, y_pred)
226
+ abs_response = gradient(y_true, y_pred).abs
227
+ abs_response * (2.0 - abs_response)
228
+ end
229
+
230
+ def plant_tree(sub_rng)
231
+ ::Rumale::Tree::GradientTreeRegressor.new(
232
+ reg_lambda: @params[:reg_lambda], shrinkage_rate: @params[:learning_rate],
233
+ max_depth: @params[:max_depth],
234
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
235
+ max_features: @params[:max_features], random_seed: sub_rng.rand(::Rumale::Ensemble::Value::SEED_BASE)
236
+ )
237
+ end
238
+
239
+ def multiclass_base_predictions(y)
240
+ n_classes = @classes.size
241
+ b = if enable_parallel?
242
+ parallel_map(n_classes) do |n|
243
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
244
+ y_mean = bin_y.mean
245
+ 0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
246
+ end
247
+ else
248
+ Array.new(n_classes) do |n|
249
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
250
+ y_mean = bin_y.mean
251
+ 0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
252
+ end
253
+ end
254
+ Numo::DFloat.asarray(b)
255
+ end
256
+
257
+ def multiclass_estimators(x, y)
258
+ n_classes = @classes.size
259
+ if enable_parallel?
260
+ parallel_map(n_classes) do |n|
261
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
262
+ partial_fit(x, bin_y, @base_predictions[n])
263
+ end
264
+ else
265
+ Array.new(n_classes) do |n|
266
+ bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
267
+ partial_fit(x, bin_y, @base_predictions[n])
268
+ end
269
+ end
270
+ end
271
+
272
+ def multiclass_feature_importances
273
+ n_classes = @classes.size
274
+ if enable_parallel?
275
+ parallel_map(n_classes) { |n| @estimators[n].sum(&:feature_importances) }.sum
276
+ else
277
+ Array.new(n_classes) { |n| @estimators[n].sum(&:feature_importances) }.sum
278
+ end
279
+ end
280
+
281
+ def multiclass_scores(x)
282
+ n_classes = @classes.size
283
+ s = if enable_parallel?
284
+ parallel_map(n_classes) do |n|
285
+ @estimators[n].sum { |tree| tree.predict(x) }
286
+ end
287
+ else
288
+ Array.new(n_classes) do |n|
289
+ @estimators[n].sum { |tree| tree.predict(x) }
290
+ end
291
+ end
292
+ Numo::DFloat.asarray(s).transpose + @base_predictions
293
+ end
294
+ end
295
+ end
296
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/estimator'
5
+ require 'rumale/base/regressor'
6
+ require 'rumale/tree/gradient_tree_regressor'
7
+ require 'rumale/ensemble/value'
8
+
9
+ module Rumale
10
+ module Ensemble
11
+ # GradientBoostingRegressor is a class that implements gradient tree boosting for regression.
12
+ # The class use L2 loss for the loss function.
13
+ #
14
+ # @example
15
+ # require 'rumale/ensemble/gradient_boosting_regressor'
16
+ #
17
+ # estimator =
18
+ # Rumale::Ensemble::GradientBoostingRegressor.new(
19
+ # n_estimators: 100, learning_rate: 0.3, reg_lambda: 0.001, random_seed: 1)
20
+ # estimator.fit(training_samples, traininig_values)
21
+ # results = estimator.predict(testing_samples)
22
+ #
23
+ # *Reference*
24
+ # - Friedman, J H. "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics, 29 (5), pp. 1189--1232, 2001.
25
+ # - Friedman, J H. "Stochastic Gradient Boosting," Computational Statistics and Data Analysis, 38 (4), pp. 367--378, 2002.
26
+ # - Chen, T., and Guestrin, C., "XGBoost: A Scalable Tree Boosting System," Proc. KDD'16, pp. 785--794, 2016.
27
+ #
28
+ class GradientBoostingRegressor < ::Rumale::Base::Estimator
29
+ include ::Rumale::Base::Regressor
30
+
31
+ # Return the set of estimators.
32
+ # @return [Array<GradientTreeRegressor>] or [Array<Array<GradientTreeRegressor>>]
33
+ attr_reader :estimators
34
+
35
+ # Return the importance for each feature.
36
+ # The feature importances are calculated based on the numbers of times the feature is used for splitting.
37
+ # @return [Numo::DFloat] (size: n_features)
38
+ attr_reader :feature_importances
39
+
40
+ # Return the random generator for random selection of feature index.
41
+ # @return [Random]
42
+ attr_reader :rng
43
+
44
+ # Create a new regressor with gradient tree boosting.
45
+ #
46
+ # @param n_estimators [Integer] The numeber of trees for contructing regressor.
47
+ # @param learning_rate [Float] The boosting learining rate
48
+ # @param reg_lambda [Float] The L2 regularization term on weight.
49
+ # @param subsample [Float] The subsampling ratio of the training samples.
50
+ # @param max_depth [Integer] The maximum depth of the tree.
51
+ # If nil is given, decision tree grows without concern for depth.
52
+ # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
53
+ # If nil is given, number of leaves is not limited.
54
+ # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
55
+ # @param max_features [Integer] The number of features to consider when searching optimal split point.
56
+ # If nil is given, split process considers all features.
57
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
58
+ # If nil is given, the methods do not execute in parallel.
59
+ # If zero or less is given, it becomes equal to the number of processors.
60
+ # This parameter is ignored if the Parallel gem is not loaded.
61
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
62
+ # It is used to randomly determine the order of features when deciding spliting point.
63
+ def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
64
+ max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
65
+ max_features: nil, n_jobs: nil, random_seed: nil)
66
+ super()
67
+ @params = {
68
+ n_estimators: n_estimators,
69
+ learning_rate: learning_rate,
70
+ reg_lambda: reg_lambda,
71
+ subsample: subsample,
72
+ max_depth: max_depth,
73
+ max_leaf_nodes: max_leaf_nodes,
74
+ min_samples_leaf: min_samples_leaf,
75
+ max_features: max_features,
76
+ n_jobs: n_jobs,
77
+ random_seed: random_seed || srand
78
+ }
79
+ @rng = Random.new(@params[:random_seed])
80
+ end
81
+
82
+ # Fit the model with given training data.
83
+ #
84
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
85
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
86
+ # @return [GradientBoostingRegressor] The learned regressor itself.
87
+ def fit(x, y)
88
+ # initialize some variables.
89
+ n_features = x.shape[1]
90
+ @params[:max_features] = n_features if @params[:max_features].nil?
91
+ @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
92
+ n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
93
+ # train regressor.
94
+ @base_predictions = n_outputs > 1 ? y.mean(0) : y.mean
95
+ @estimators = if n_outputs > 1
96
+ multivar_estimators(x, y)
97
+ else
98
+ partial_fit(x, y, @base_predictions)
99
+ end
100
+ # calculate feature importances.
101
+ @feature_importances = if n_outputs > 1
102
+ multivar_feature_importances
103
+ else
104
+ @estimators.sum(&:feature_importances)
105
+ end
106
+ self
107
+ end
108
+
109
+ # Predict values for samples.
110
+ #
111
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
112
+ # @return [Numo::DFloat] (shape: [n_samples]) Predicted values per sample.
113
+ def predict(x)
114
+ n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
115
+ if n_outputs > 1
116
+ multivar_predict(x)
117
+ elsif enable_parallel?
118
+ parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.sum + @base_predictions
119
+ else
120
+ @estimators.sum { |tree| tree.predict(x) } + @base_predictions
121
+ end
122
+ end
123
+
124
+ # Return the index of the leaf that each sample reached.
125
+ #
126
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
127
+ # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
128
+ def apply(x)
129
+ n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
130
+ leaf_ids = if n_outputs > 1
131
+ Array.new(n_outputs) { |n| @estimators[n].map { |tree| tree.apply(x) } }
132
+ else
133
+ @estimators.map { |tree| tree.apply(x) }
134
+ end
135
+ Numo::Int32[*leaf_ids].transpose.dup
136
+ end
137
+
138
+ private
139
+
140
+ def partial_fit(x, y, init_pred)
141
+ # initialize some variables.
142
+ estimators = []
143
+ n_samples = x.shape[0]
144
+ n_sub_samples = [n_samples, [(n_samples * @params[:subsample]).to_i, 1].max].min
145
+ whole_ids = Array.new(n_samples) { |v| v }
146
+ y_pred = Numo::DFloat.ones(n_samples) * init_pred
147
+ sub_rng = @rng.dup
148
+ # grow trees.
149
+ @params[:n_estimators].times do |_t|
150
+ # subsampling
151
+ ids = whole_ids.sample(n_sub_samples, random: sub_rng)
152
+ x_sub = x[ids, true]
153
+ y_sub = y[ids]
154
+ y_pred_sub = y_pred[ids]
155
+ # train tree
156
+ g = gradient(y_sub, y_pred_sub)
157
+ h = hessian(n_sub_samples)
158
+ tree = plant_tree(sub_rng)
159
+ tree.fit(x_sub, y_sub, g, h)
160
+ estimators.push(tree)
161
+ # update
162
+ y_pred += tree.predict(x)
163
+ end
164
+ estimators
165
+ end
166
+
167
+ # for debug
168
+ #
169
+ # def loss(y_true, y_pred)
170
+ # ((y_true - y_pred)**2).mean
171
+ # end
172
+
173
+ def gradient(y_true, y_pred)
174
+ y_pred - y_true
175
+ end
176
+
177
+ def hessian(n_samples)
178
+ Numo::DFloat.ones(n_samples)
179
+ end
180
+
181
+ def plant_tree(sub_rng)
182
+ ::Rumale::Tree::GradientTreeRegressor.new(
183
+ reg_lambda: @params[:reg_lambda], shrinkage_rate: @params[:learning_rate],
184
+ max_depth: @params[:max_depth],
185
+ max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
186
+ max_features: @params[:max_features], random_seed: sub_rng.rand(::Rumale::Ensemble::Value::SEED_BASE)
187
+ )
188
+ end
189
+
190
+ def multivar_estimators(x, y)
191
+ n_outputs = y.shape[1]
192
+ if enable_parallel?
193
+ parallel_map(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
194
+ else
195
+ Array.new(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
196
+ end
197
+ end
198
+
199
+ def multivar_feature_importances
200
+ n_outputs = @estimators.size
201
+ if enable_parallel?
202
+ parallel_map(n_outputs) { |n| @estimators[n].sum(&:feature_importances) }.sum
203
+ else
204
+ Array.new(n_outputs) { |n| @estimators[n].sum(&:feature_importances) }.sum
205
+ end
206
+ end
207
+
208
+ def multivar_predict(x)
209
+ n_outputs = @estimators.size
210
+ pred = if enable_parallel?
211
+ parallel_map(n_outputs) do |n|
212
+ @estimators[n].sum { |tree| tree.predict(x) }
213
+ end
214
+ else
215
+ Array.new(n_outputs) do |n|
216
+ @estimators[n].sum { |tree| tree.predict(x) }
217
+ end
218
+ end
219
+ Numo::DFloat.asarray(pred).transpose + @base_predictions
220
+ end
221
+ end
222
+ end
223
+ end