gbrl 1.0.0.dev1__tar.gz → 1.0.0.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gbrl-1.0.0.dev1/gbrl.egg-info → gbrl-1.0.0.dev3}/PKG-INFO +1 -1
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/README.md +16 -1
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/fitter.cpp +4 -2
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/gbrl.cpp +55 -31
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/gbrl.h +3 -1
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/gbrl_binding.cpp +14 -14
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/loss.cpp +2 -2
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/math_ops.cpp +13 -13
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/node.cpp +5 -5
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/optimizer.cpp +7 -7
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/optimizer.h +5 -5
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/predictor.cpp +3 -3
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/scheduler.h +1 -1
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/split_candidate_generator.cpp +5 -5
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/types.cpp +10 -10
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/utils.cpp +1 -1
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_fitter.cu +6 -6
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_loss.cu +4 -3
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_predictor.cu +4 -4
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_types.cu +2 -2
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3/gbrl.egg-info}/PKG-INFO +1 -1
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/pyproject.toml +1 -1
- gbrl-1.0.0.dev3/setup.py +261 -0
- gbrl-1.0.0.dev1/setup.py +0 -178
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/LICENSE +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/MANIFEST.in +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/fitter.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/loss.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/main.cpp +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/math_ops.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/node.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/predictor.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/scheduler.cpp +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/split_candidate_generator.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/types.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cpp/utils.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_fitter.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_loss.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_predictor.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_preprocess.cu +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_preprocess.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_types.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_utils.cu +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl/src/cuda/cuda_utils.h +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl.egg-info/SOURCES.txt +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl.egg-info/dependency_links.txt +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl.egg-info/requires.txt +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/gbrl.egg-info/top_level.txt +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/setup.cfg +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/tests/test_gbt_multi.py +0 -0
- {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev3}/tests/test_gbt_single.py +0 -0
|
@@ -10,9 +10,24 @@ GBRL is a Python-based GBT library designed and optimized for reinforcement lear
|
|
|
10
10
|
|
|
11
11
|
## Getting started
|
|
12
12
|
|
|
13
|
+
### Dependencies
|
|
14
|
+
llvm
|
|
15
|
+
openmp
|
|
16
|
+
|
|
17
|
+
#### MAC OS
|
|
18
|
+
|
|
19
|
+
Make sure to run:
|
|
20
|
+
```
|
|
21
|
+
brew install libomp
|
|
22
|
+
brew install llvm
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
xcode command line tools should be installed installed
|
|
26
|
+
|
|
27
|
+
### Installation
|
|
13
28
|
```
|
|
14
29
|
pip install gbrl
|
|
15
|
-
```
|
|
30
|
+
```
|
|
16
31
|
|
|
17
32
|
For GPU support GBRL looks for `CUDA_PATH` or `CUDA_HOME` environment variables. Unless found, GBRL will automatically compile only for CPU.
|
|
18
33
|
|
|
@@ -335,8 +335,10 @@ int Fitter::fit_oblivious_tree(dataSet *dataset, ensembleData *edata, ensembleMe
|
|
|
335
335
|
int *root_sample_indices = new int[n_samples];
|
|
336
336
|
std::iota(root_sample_indices, root_sample_indices + n_samples, 0);
|
|
337
337
|
|
|
338
|
-
|
|
339
|
-
|
|
338
|
+
int max_n_leaves = 1 << metadata->max_depth;
|
|
339
|
+
|
|
340
|
+
std::vector<TreeNode*> tree_nodes(max_n_leaves);
|
|
341
|
+
std::vector<TreeNode*> child_tree_nodes(max_n_leaves);
|
|
340
342
|
TreeNode *rootNode = new TreeNode(root_sample_indices, n_samples, metadata->n_num_features, metadata->n_cat_features, metadata->output_dim, metadata->policy_dim, depth, 0);
|
|
341
343
|
tree_nodes[0] = rootNode;
|
|
342
344
|
|
|
@@ -81,7 +81,7 @@ GBRL::GBRL(int output_dim, int policy_dim, int max_depth, int min_data_in_leaf,
|
|
|
81
81
|
GBRL::GBRL(const std::string& filename){
|
|
82
82
|
int status = this->loadFromFile(filename);
|
|
83
83
|
if (status != 0){
|
|
84
|
-
std::cerr << "Error loading
|
|
84
|
+
std::cerr << "Error loading . " << filename << std::endl;
|
|
85
85
|
throw std::runtime_error("File load error");
|
|
86
86
|
}
|
|
87
87
|
}
|
|
@@ -127,6 +127,8 @@ GBRL::~GBRL() {
|
|
|
127
127
|
this->metadata = nullptr;
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
+
|
|
131
|
+
|
|
130
132
|
void GBRL::to_device(deviceType device){
|
|
131
133
|
if (device == this->device){
|
|
132
134
|
std::cout << "GBRL device is already " << deviceTypeToString(device) << std::endl;
|
|
@@ -134,11 +136,19 @@ void GBRL::to_device(deviceType device){
|
|
|
134
136
|
}
|
|
135
137
|
#ifndef USE_CUDA
|
|
136
138
|
if (device == gpu)
|
|
137
|
-
std::cerr << "GBRL was not compiled for GPU
|
|
139
|
+
std::cerr << "GBRL was not compiled for GPU. Using cpu device" << std::endl;
|
|
138
140
|
this->edata = ensemble_data_alloc(this->metadata);
|
|
139
141
|
this->device = cpu;
|
|
140
142
|
return;
|
|
141
143
|
#else
|
|
144
|
+
|
|
145
|
+
if (device == gpu){
|
|
146
|
+
bool is_valid = valid_device();
|
|
147
|
+
if (!is_valid){
|
|
148
|
+
std::cerr << "No GPU device found. Using cpu device" << std::endl;
|
|
149
|
+
device = cpu;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
142
152
|
if (this->device == unspecified){
|
|
143
153
|
if (device == cpu){
|
|
144
154
|
this->edata = ensemble_data_alloc(this->metadata);
|
|
@@ -146,7 +156,6 @@ void GBRL::to_device(deviceType device){
|
|
|
146
156
|
} else {
|
|
147
157
|
this->edata = ensemble_data_alloc_cuda(this->metadata);
|
|
148
158
|
this->device = gpu;
|
|
149
|
-
return;
|
|
150
159
|
}
|
|
151
160
|
} else if (this->device == cpu && device == gpu){
|
|
152
161
|
ensembleData* edata_gpu = ensemble_data_copy_cpu_gpu(this->metadata, this->edata);
|
|
@@ -160,7 +169,7 @@ void GBRL::to_device(deviceType device){
|
|
|
160
169
|
this->device = cpu;
|
|
161
170
|
}
|
|
162
171
|
if (this->device == gpu && this->metadata->use_cv){
|
|
163
|
-
std::cout << "Cannot use control variates with GPU
|
|
172
|
+
std::cout << "Cannot use control variates with GPU. Setting use_cv to False." << std::endl;
|
|
164
173
|
this->metadata->use_cv = false;
|
|
165
174
|
}
|
|
166
175
|
#endif
|
|
@@ -170,7 +179,7 @@ void GBRL::to_device(deviceType device){
|
|
|
170
179
|
void GBRL::set_bias(float *bias, const int output_dim){
|
|
171
180
|
if (output_dim != this->metadata->output_dim)
|
|
172
181
|
{
|
|
173
|
-
std::cerr << "Given bias vector has different dimensions than expect
|
|
182
|
+
std::cerr << "Given bias vector has different dimensions than expect. " << " Given: " << output_dim << " expected: " << this->metadata->output_dim << std::endl;
|
|
174
183
|
throw std::runtime_error("Incompatible dimensions");
|
|
175
184
|
return;
|
|
176
185
|
}
|
|
@@ -183,7 +192,7 @@ void GBRL::set_bias(float *bias, const int output_dim){
|
|
|
183
192
|
}
|
|
184
193
|
|
|
185
194
|
float* GBRL::get_bias(){
|
|
186
|
-
// returns a copy
|
|
195
|
+
// returns a copy. must deallocated new float pointer!
|
|
187
196
|
#ifdef USE_CUDA
|
|
188
197
|
if (this->device == gpu){
|
|
189
198
|
float *bias = new float[this->metadata->output_dim];
|
|
@@ -208,7 +217,7 @@ float* GBRL::predict(const float *obs, const char *categorical_obs, const int n_
|
|
|
208
217
|
}
|
|
209
218
|
if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
|
|
210
219
|
delete[] preds;
|
|
211
|
-
std::cerr << "Error
|
|
220
|
+
std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
|
|
212
221
|
throw std::runtime_error("Incompatible dataset");
|
|
213
222
|
}
|
|
214
223
|
|
|
@@ -219,7 +228,7 @@ float* GBRL::predict(const float *obs, const char *categorical_obs, const int n_
|
|
|
219
228
|
if (this->device == gpu){
|
|
220
229
|
if (this->cuda_opt == nullptr){
|
|
221
230
|
this->cuda_opt = deepCopySGDOptimizerVectorToGPU(this->opts);
|
|
222
|
-
this->n_cuda_opts = this->opts.size();
|
|
231
|
+
this->n_cuda_opts = static_cast<int>(this->opts.size());
|
|
223
232
|
}
|
|
224
233
|
predict_cuda(&dataset, preds, this->metadata, this->edata, this->cuda_opt, this->n_cuda_opts, start_tree_idx, stop_tree_idx);
|
|
225
234
|
|
|
@@ -241,7 +250,7 @@ void GBRL::predict(const float *obs, const char *categorical_obs, float *start_p
|
|
|
241
250
|
this->metadata->n_cat_features = n_cat_features;
|
|
242
251
|
}
|
|
243
252
|
if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
|
|
244
|
-
std::cerr << "Error
|
|
253
|
+
std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
|
|
245
254
|
throw std::runtime_error("Incompatible dataset");
|
|
246
255
|
return;
|
|
247
256
|
}
|
|
@@ -252,7 +261,7 @@ void GBRL::predict(const float *obs, const char *categorical_obs, float *start_p
|
|
|
252
261
|
if (this->device == gpu){
|
|
253
262
|
if (this->cuda_opt == nullptr){
|
|
254
263
|
this->cuda_opt = deepCopySGDOptimizerVectorToGPU(this->opts);
|
|
255
|
-
this->n_cuda_opts = this->opts.size();
|
|
264
|
+
this->n_cuda_opts = static_cast<int>(this->opts.size());
|
|
256
265
|
}
|
|
257
266
|
predict_cuda(&dataset, start_preds, this->metadata, this->edata, this->cuda_opt, this->n_cuda_opts, start_tree_idx, stop_tree_idx);
|
|
258
267
|
}
|
|
@@ -278,7 +287,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
|
|
|
278
287
|
float stop_lr, int T,
|
|
279
288
|
float beta_1, float beta_2, float eps = 1.0e-8, float shrinkage = 1.0e-5){
|
|
280
289
|
if (this->opts.size() >= 2){
|
|
281
|
-
std::cerr << "Already set two optimizers
|
|
290
|
+
std::cerr << "Already set two optimizers. This is the limit." << std::endl;
|
|
282
291
|
throw std::runtime_error("Optimizer Limit Reached");
|
|
283
292
|
return;
|
|
284
293
|
}
|
|
@@ -289,7 +298,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
|
|
|
289
298
|
if (algo == Adam){
|
|
290
299
|
#ifdef USE_CUDA
|
|
291
300
|
if (this->device == gpu){
|
|
292
|
-
std::cerr << "The Adam optimizer has cpu support only
|
|
301
|
+
std::cerr << "The Adam optimizer has cpu support only." << std::endl;
|
|
293
302
|
throw std::runtime_error("Incompatible GPU optimizer");
|
|
294
303
|
return;
|
|
295
304
|
}
|
|
@@ -299,7 +308,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
|
|
|
299
308
|
} else if (scheduler_func == Linear){
|
|
300
309
|
opt = new AdamOptimizer(scheduler_func, init_lr, stop_lr, T, beta_1, beta_2, eps);
|
|
301
310
|
} else {
|
|
302
|
-
std::cerr << "Unrecoginized scheduler func
|
|
311
|
+
std::cerr << "Unrecoginized scheduler func." << std::endl;
|
|
303
312
|
throw std::runtime_error("Unrecognized scheduler func");
|
|
304
313
|
opt = nullptr;
|
|
305
314
|
return;
|
|
@@ -307,10 +316,10 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
|
|
|
307
316
|
|
|
308
317
|
if (this->opts.size() == 0){
|
|
309
318
|
opt->set_indices(0, this->metadata->policy_dim);
|
|
310
|
-
std::cout << "Setting policy optimizer
|
|
319
|
+
std::cout << "Setting policy optimizer." << std::endl;
|
|
311
320
|
} else {
|
|
312
321
|
opt->set_indices(this->metadata->policy_dim, this->metadata->output_dim);
|
|
313
|
-
std::cout << "Setting value optimizer
|
|
322
|
+
std::cout << "Setting value optimizer. Warning cannot set more optimizers" << std::endl;
|
|
314
323
|
}
|
|
315
324
|
|
|
316
325
|
this->opts.push_back(opt);
|
|
@@ -321,7 +330,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
|
|
|
321
330
|
} else if (scheduler_func == Linear){
|
|
322
331
|
opt = new SGDOptimizer(scheduler_func, init_lr, stop_lr, T);
|
|
323
332
|
} else {
|
|
324
|
-
std::cerr << "Unrecoginized scheduler func
|
|
333
|
+
std::cerr << "Unrecoginized scheduler func." << std::endl;
|
|
325
334
|
throw std::runtime_error("Unrecoginized scheduler func");
|
|
326
335
|
opt = nullptr;
|
|
327
336
|
return;
|
|
@@ -461,7 +470,7 @@ float GBRL::_fit_sl_gpu(dataSet *dataset, float *targets, const int n_iterations
|
|
|
461
470
|
|
|
462
471
|
if (this->cuda_opt == nullptr){
|
|
463
472
|
this->cuda_opt = deepCopySGDOptimizerVectorToGPU(this->opts);
|
|
464
|
-
this->n_cuda_opts = this->opts.size();
|
|
473
|
+
this->n_cuda_opts = static_cast<int>(this->opts.size());
|
|
465
474
|
}
|
|
466
475
|
|
|
467
476
|
err = cudaMalloc((void**)&device_memory_block, alloc_size);
|
|
@@ -568,7 +577,7 @@ void GBRL::fit(const float *obs, const char *categorical_obs, float *grads, cons
|
|
|
568
577
|
this->metadata->n_cat_features = n_cat_features;
|
|
569
578
|
}
|
|
570
579
|
if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
|
|
571
|
-
std::cerr << "Error
|
|
580
|
+
std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
|
|
572
581
|
throw std::runtime_error("Incompatible dataset");
|
|
573
582
|
return;
|
|
574
583
|
}
|
|
@@ -590,14 +599,14 @@ float GBRL::fit_sl(float *obs, char *categorical_obs, float *targets, int iterat
|
|
|
590
599
|
}
|
|
591
600
|
|
|
592
601
|
if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
|
|
593
|
-
std::cerr << "Error
|
|
602
|
+
std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
|
|
594
603
|
throw std::runtime_error("Incompatible dataset");
|
|
595
604
|
return -INFINITY;
|
|
596
605
|
}
|
|
597
606
|
|
|
598
607
|
for (auto& algo: this->opts){
|
|
599
608
|
if (algo->getAlgo() == Adam){
|
|
600
|
-
std::cerr << "Adam optimizer not supported in fit_sl function
|
|
609
|
+
std::cerr << "Adam optimizer not supported in fit_sl function. Use SGD" << std::endl;
|
|
601
610
|
throw std::runtime_error("Unsupported optimizer");
|
|
602
611
|
return 0.0;
|
|
603
612
|
}
|
|
@@ -688,7 +697,7 @@ int GBRL::saveToFile(const std::string& filename){
|
|
|
688
697
|
|
|
689
698
|
save_ensemble_data(file, this->edata, this->metadata, this->device);
|
|
690
699
|
|
|
691
|
-
int num_opts = this->opts.size();
|
|
700
|
+
int num_opts = static_cast<int>(this->opts.size());
|
|
692
701
|
file.write(reinterpret_cast<char*>(&num_opts), sizeof(int));
|
|
693
702
|
|
|
694
703
|
for (int i = 0; i < num_opts; ++i){
|
|
@@ -701,7 +710,7 @@ int GBRL::saveToFile(const std::string& filename){
|
|
|
701
710
|
}
|
|
702
711
|
|
|
703
712
|
if (!file.good()) {
|
|
704
|
-
std::cerr << "Error occurred at writing time
|
|
713
|
+
std::cerr << "Error occurred at writing time." << std::endl;
|
|
705
714
|
throw std::runtime_error("Writing to file error");
|
|
706
715
|
return -1;
|
|
707
716
|
}
|
|
@@ -727,7 +736,7 @@ int GBRL::loadFromFile(const std::string& filename){
|
|
|
727
736
|
this->metadata->use_cv = static_cast<bool>(byte);
|
|
728
737
|
|
|
729
738
|
if (!file.good()) {
|
|
730
|
-
std::cerr << "Error occurred while reading the file
|
|
739
|
+
std::cerr << "Error occurred while reading the file." << std::endl;
|
|
731
740
|
throw std::runtime_error("Reading file error");
|
|
732
741
|
return -1;
|
|
733
742
|
}
|
|
@@ -755,7 +764,7 @@ int GBRL::loadFromFile(const std::string& filename){
|
|
|
755
764
|
|
|
756
765
|
|
|
757
766
|
if (file.fail()) {
|
|
758
|
-
std::cerr << "Error occurred at file closing time
|
|
767
|
+
std::cerr << "Error occurred at file closing time." << std::endl;
|
|
759
768
|
throw std::runtime_error("File closing error");
|
|
760
769
|
return -1;
|
|
761
770
|
}
|
|
@@ -811,6 +820,21 @@ void GBRL::print_tree(int tree_idx){
|
|
|
811
820
|
#endif
|
|
812
821
|
}
|
|
813
822
|
|
|
823
|
+
#ifdef USE_CUDA
|
|
824
|
+
bool valid_device(){
|
|
825
|
+
int device_count = 0;
|
|
826
|
+
cudaError_t error = cudaGetDeviceCount(&device_count);
|
|
827
|
+
|
|
828
|
+
if (error != cudaSuccess) {
|
|
829
|
+
std::cout << "CUDA error when querying device count: " << cudaGetErrorString(error) << std::endl;
|
|
830
|
+
return false;
|
|
831
|
+
}
|
|
832
|
+
if (device_count == 0)
|
|
833
|
+
return false;
|
|
834
|
+
return true;
|
|
835
|
+
}
|
|
836
|
+
#endif
|
|
837
|
+
|
|
814
838
|
#ifdef USE_GRAPHVIZ
|
|
815
839
|
void GBRL::plot_tree(int tree_idx, const std::string &filename){
|
|
816
840
|
ensembleData *edata_cpu = this->edata;
|
|
@@ -820,7 +844,7 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
|
|
|
820
844
|
}
|
|
821
845
|
#endif
|
|
822
846
|
if (tree_idx >= this->metadata->n_trees){
|
|
823
|
-
std::cerr << "ERROR - Tree idx: " << tree_idx << " > " << this->metadata->n_trees - 1 << " maximum index
|
|
847
|
+
std::cerr << "ERROR - Tree idx: " << tree_idx << " > " << this->metadata->n_trees - 1 << " maximum index." << std::endl;
|
|
824
848
|
throw std::runtime_error("Invalid tree index");
|
|
825
849
|
return;
|
|
826
850
|
}
|
|
@@ -855,7 +879,7 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
|
|
|
855
879
|
|
|
856
880
|
if (nodesMap.find(nodeIndex) == nodesMap.end()) { // Check if the root node already exists
|
|
857
881
|
std::strcpy(buffer, std::to_string(nodeIndex).c_str());
|
|
858
|
-
parentNode = agnode(g, buffer,
|
|
882
|
+
parentNode = agnode(g, buffer, true);
|
|
859
883
|
std::string nodeLabel = (is_numeric) ? std::to_string(feature_idx) + ", value > " + std::to_string(feature_value) : std::to_string(feature_idx + this->metadata->n_num_features) + ", value == " + std::string(categorical_value);
|
|
860
884
|
std::strcpy(buffer, nodeLabel.c_str());
|
|
861
885
|
// parentNode = agnode(g, buffer, TRUE); // Create root node or get it if already exists
|
|
@@ -878,7 +902,7 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
|
|
|
878
902
|
|
|
879
903
|
if (nodesMap.find(nodeIndex) == nodesMap.end()) {
|
|
880
904
|
std::strcpy(buffer, std::to_string(nodeIndex).c_str());
|
|
881
|
-
currentNode = agnode(g, buffer,
|
|
905
|
+
currentNode = agnode(g, buffer, true);
|
|
882
906
|
std::string nodeLabel = is_numeric ? std::to_string(feature_idx) + ", value > " + std::to_string(feature_value) : std::to_string(feature_idx + this->metadata->n_num_features) + ", value == " + std::string(categorical_value) ;
|
|
883
907
|
// std::cout << "printing child node: " << nodeIndex << " with label: " << nodeLabel << std::endl;
|
|
884
908
|
std::strcpy(buffer, nodeLabel.c_str());
|
|
@@ -893,8 +917,8 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
|
|
|
893
917
|
|
|
894
918
|
if (edgesSet.find(edgeKey) == edgesSet.end()) {
|
|
895
919
|
std::strcpy(buffer, edgeLabel.c_str());
|
|
896
|
-
edge = agedge(g, parentNode, currentNode, buffer,
|
|
897
|
-
agsafeset(edge, (char*)"label", buffer, (char*)"");
|
|
920
|
+
edge = agedge(g, parentNode, currentNode, buffer, true);
|
|
921
|
+
agsafeset(edge, (char*)"label", buffer, (char*)"");
|
|
898
922
|
edgesSet.insert(edgeKey);
|
|
899
923
|
}
|
|
900
924
|
|
|
@@ -907,8 +931,8 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
|
|
|
907
931
|
std::string leafLabel = "val = " + VectoString(edata_cpu->values + leaf_idx*this->metadata->output_dim, this->metadata->output_dim);
|
|
908
932
|
std::string uniqueLeafLabel = leafLabel + "_idx_" + std::to_string(leaf_idx);
|
|
909
933
|
std::strcpy(buffer, uniqueLeafLabel.c_str());
|
|
910
|
-
currentNode = agnode(g, buffer,
|
|
911
|
-
edge = agedge(g, parentNode, currentNode, NULL,
|
|
934
|
+
currentNode = agnode(g, buffer, true);
|
|
935
|
+
edge = agedge(g, parentNode, currentNode, NULL, true);
|
|
912
936
|
|
|
913
937
|
agsafeset(currentNode, (char*)"label", buffer, (char*)"");
|
|
914
938
|
agsafeset(currentNode, (char*)"shape", (char*)"box", (char*)"");
|
|
@@ -88,7 +88,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
88
88
|
}
|
|
89
89
|
py::buffer_info info_grads = grads.request();
|
|
90
90
|
float* grads_ptr = static_cast<float*>(info_grads.ptr);
|
|
91
|
-
int n_samples = info_grads.shape[0];
|
|
91
|
+
int n_samples = static_cast<int>(info_grads.shape[0]);
|
|
92
92
|
|
|
93
93
|
const float* obs_ptr = nullptr;
|
|
94
94
|
int n_num_features = 0;
|
|
@@ -98,7 +98,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
98
98
|
throw std::runtime_error("Arrays must be C-contiguous");
|
|
99
99
|
py::buffer_info info_obs = obs_array.request();
|
|
100
100
|
obs_ptr = static_cast<const float*>(info_obs.ptr);
|
|
101
|
-
n_num_features = info_obs.shape[1];
|
|
101
|
+
n_num_features = static_cast<int>(info_obs.shape[1]);
|
|
102
102
|
}
|
|
103
103
|
const char* cat_obs_ptr = nullptr;
|
|
104
104
|
int n_cat_features = 0;
|
|
@@ -108,7 +108,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
108
108
|
throw std::runtime_error("Arrays must be C-contiguous");
|
|
109
109
|
py::buffer_info info_categorical_obs = py_array.request();
|
|
110
110
|
cat_obs_ptr = static_cast<const char*>(info_categorical_obs.ptr);
|
|
111
|
-
n_cat_features = info_categorical_obs.shape[1];
|
|
111
|
+
n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
|
|
112
112
|
}
|
|
113
113
|
|
|
114
114
|
py::gil_scoped_release release;
|
|
@@ -130,7 +130,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
130
130
|
throw std::runtime_error("Arrays must be C-contiguous");
|
|
131
131
|
py::buffer_info info_obs = obs_array.request();
|
|
132
132
|
obs_ptr = static_cast<float*>(info_obs.ptr);
|
|
133
|
-
n_num_features = info_obs.shape[1];
|
|
133
|
+
n_num_features = static_cast<int>(info_obs.shape[1]);
|
|
134
134
|
}
|
|
135
135
|
|
|
136
136
|
char* cat_obs_ptr = nullptr;
|
|
@@ -141,13 +141,13 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
141
141
|
throw std::runtime_error("Arrays must be C-contiguous");
|
|
142
142
|
py::buffer_info info_categorical_obs = py_array.request();
|
|
143
143
|
cat_obs_ptr = static_cast<char*>(info_categorical_obs.ptr);
|
|
144
|
-
n_cat_features = info_categorical_obs.shape[1];
|
|
144
|
+
n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
|
|
145
145
|
}
|
|
146
146
|
|
|
147
147
|
py::gil_scoped_release release;
|
|
148
148
|
py::buffer_info info_targets = targets.request();
|
|
149
149
|
float* targets_ptr = static_cast<float*>(info_targets.ptr);
|
|
150
|
-
int n_samples = info_targets.shape[0];
|
|
150
|
+
int n_samples = static_cast<int>(info_targets.shape[0]);
|
|
151
151
|
return self.fit_sl(obs_ptr, cat_obs_ptr, targets_ptr, iterations, n_samples, n_num_features, n_cat_features, shuffle, loss_type);
|
|
152
152
|
}, py::arg("obs"),
|
|
153
153
|
py::arg("categorical_obs"),
|
|
@@ -164,7 +164,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
164
164
|
|
|
165
165
|
py::buffer_info info = bias.request();
|
|
166
166
|
float* bias_ptr = static_cast<float*>(info.ptr);
|
|
167
|
-
int output_dim = len(bias);
|
|
167
|
+
int output_dim = static_cast<int>(len(bias));
|
|
168
168
|
|
|
169
169
|
self.set_bias(bias_ptr, output_dim);
|
|
170
170
|
}, "Set GBRL model bias");
|
|
@@ -199,8 +199,8 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
199
199
|
throw std::runtime_error("Arrays must be C-contiguous");
|
|
200
200
|
py::buffer_info info_obs = obs_array.request();
|
|
201
201
|
obs_ptr = static_cast<const float*>(info_obs.ptr);
|
|
202
|
-
n_num_features = info_obs.shape[1];
|
|
203
|
-
n_samples = info_obs.shape[0];
|
|
202
|
+
n_num_features = static_cast<int>(info_obs.shape[1]);
|
|
203
|
+
n_samples = static_cast<int>(info_obs.shape[0]);
|
|
204
204
|
}
|
|
205
205
|
int n_cat_features = 0;
|
|
206
206
|
const char *cat_obs_ptr = nullptr;
|
|
@@ -211,8 +211,8 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
211
211
|
|
|
212
212
|
py::buffer_info info_categorical_obs = py_array.request();
|
|
213
213
|
cat_obs_ptr = static_cast<const char*>(info_categorical_obs.ptr);
|
|
214
|
-
n_cat_features = info_categorical_obs.shape[1];
|
|
215
|
-
n_samples = info_categorical_obs.shape[0];
|
|
214
|
+
n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
|
|
215
|
+
n_samples = static_cast<int>(info_categorical_obs.shape[0]);
|
|
216
216
|
}
|
|
217
217
|
|
|
218
218
|
py::gil_scoped_release release;
|
|
@@ -235,7 +235,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
235
235
|
throw std::runtime_error("Arrays must be C-contiguous");
|
|
236
236
|
py::buffer_info info_obs = obs_array.request();
|
|
237
237
|
obs_ptr = static_cast<const float*>(info_obs.ptr);
|
|
238
|
-
n_num_features = info_obs.shape[1];
|
|
238
|
+
n_num_features = static_cast<int>(info_obs.shape[1]);
|
|
239
239
|
}
|
|
240
240
|
int n_cat_features = 0;
|
|
241
241
|
const char *cat_obs_ptr = nullptr;
|
|
@@ -245,12 +245,12 @@ PYBIND11_MODULE(gbrl_cpp, m) {
|
|
|
245
245
|
throw std::runtime_error("Arrays must be C-contiguous");
|
|
246
246
|
py::buffer_info info_categorical_obs = py_array.request();
|
|
247
247
|
cat_obs_ptr = static_cast<const char*>(info_categorical_obs.ptr);
|
|
248
|
-
n_cat_features = info_categorical_obs.shape[1];
|
|
248
|
+
n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
|
|
249
249
|
}
|
|
250
250
|
py::gil_scoped_release release;
|
|
251
251
|
py::buffer_info info_preds = start_preds.request();
|
|
252
252
|
float* preds_ptr = static_cast<float*>(info_preds.ptr);
|
|
253
|
-
int n_samples = info_preds.shape[0];
|
|
253
|
+
int n_samples = static_cast<int>(info_preds.shape[0]);
|
|
254
254
|
self.predict(obs_ptr, cat_obs_ptr, preds_ptr, n_samples, n_num_features, n_cat_features, start_tree_idx, stop_tree_idx);
|
|
255
255
|
}, py::arg("obs"), py::arg("categorical_obs"), py::arg("start_preds"), py::arg("start_tree_idx")=0, py::arg("stop_tree_idx")=0, "Predict using the model");
|
|
256
256
|
// saveToFile method
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
#include "loss.h"
|
|
7
7
|
|
|
8
8
|
float MultiRMSE::get_loss_and_gradients(const float *raw_preds, const float *raw_targets, float *raw_grads, const int n_samples, const int output_dim){
|
|
9
|
-
float count_recip = 1.
|
|
9
|
+
float count_recip = 1.0f / static_cast<float>(n_samples);
|
|
10
10
|
const int n_threads = static_cast<int>(omp_get_max_threads());
|
|
11
11
|
int n_elements = n_samples*output_dim;
|
|
12
12
|
int elements_per_thread = n_elements / n_threads;
|
|
@@ -34,7 +34,7 @@ float MultiRMSE::get_loss_and_gradients(const float *raw_preds, const float *raw
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
float MultiRMSE::get_loss(const float *raw_preds, const float *raw_targets, const int n_samples, const int output_dim){
|
|
37
|
-
float count_recip = 1.
|
|
37
|
+
float count_recip = 1.0f / static_cast<float>(n_samples);
|
|
38
38
|
const int n_threads = static_cast<int>(omp_get_max_threads());
|
|
39
39
|
int samples_per_thread = n_samples / n_threads;
|
|
40
40
|
int row;
|
|
@@ -73,14 +73,14 @@ void divide_mat_by_vec_inplace(float *mat, const float *vec, const int n_samples
|
|
|
73
73
|
#pragma omp simd
|
|
74
74
|
for (int i = start_idx; i < end_idx; ++i) {
|
|
75
75
|
int col = i % n_cols;
|
|
76
|
-
mat[i] /= (vec[col] + 1e-
|
|
76
|
+
mat[i] /= (vec[col] + 1e-8f);
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
} else {
|
|
80
80
|
#pragma omp simd
|
|
81
81
|
for (int i = 0; i < n_elements; ++i) {
|
|
82
82
|
int col = i % n_cols;
|
|
83
|
-
mat[i] /= (vec[col] + 1e-
|
|
83
|
+
mat[i] /= (vec[col] + 1e-8f);
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
86
|
}
|
|
@@ -139,10 +139,10 @@ void multiply_mat_by_scalar(float *mat, float scalar, const int n_samples, const
|
|
|
139
139
|
float* calculate_mean(const float *mat, const int n_samples, const int n_cols, const int par_th){
|
|
140
140
|
int n_elements = n_samples * n_cols;
|
|
141
141
|
float *mean = new float[n_cols];
|
|
142
|
-
float n_samples_recip = 1.
|
|
142
|
+
float n_samples_recip = 1.0f / static_cast<float>(n_samples);
|
|
143
143
|
#pragma omp simd
|
|
144
144
|
for (int d = 0; d < n_cols; ++d)
|
|
145
|
-
mean[d] = 0;
|
|
145
|
+
mean[d] = 0.0f;
|
|
146
146
|
int n_threads = calculate_num_threads(n_elements, par_th);
|
|
147
147
|
if (n_threads > 1){
|
|
148
148
|
omp_set_num_threads(n_threads);
|
|
@@ -150,7 +150,7 @@ float* calculate_mean(const float *mat, const int n_samples, const int n_cols, c
|
|
|
150
150
|
float *thread_mean = new float[n_threads*n_cols];
|
|
151
151
|
#pragma omp simd
|
|
152
152
|
for (int d = 0; d < n_threads*n_cols; ++d)
|
|
153
|
-
thread_mean[d] = 0;
|
|
153
|
+
thread_mean[d] = 0.0f;
|
|
154
154
|
#pragma omp parallel
|
|
155
155
|
{
|
|
156
156
|
int thread_id = omp_get_thread_num();
|
|
@@ -244,9 +244,9 @@ float* calculate_row_covariance(const float *mat_l, const float *mat_r, const in
|
|
|
244
244
|
float *cov = new float[n_cols];
|
|
245
245
|
#pragma omp simd
|
|
246
246
|
for (int d = 0; d < n_cols; ++d)
|
|
247
|
-
cov[d] = 0;
|
|
247
|
+
cov[d] = 0.0f;
|
|
248
248
|
|
|
249
|
-
float n_samples_recip = 1.
|
|
249
|
+
float n_samples_recip = 1.0f / (static_cast<float>(n_samples) - 1.0f);
|
|
250
250
|
|
|
251
251
|
int n_threads = calculate_num_threads(n_elements, par_th);
|
|
252
252
|
if (n_threads > 1){
|
|
@@ -255,7 +255,7 @@ float* calculate_row_covariance(const float *mat_l, const float *mat_r, const in
|
|
|
255
255
|
float *thread_cov = new float[n_threads*n_cols];
|
|
256
256
|
#pragma omp simd
|
|
257
257
|
for (int d = 0; d < n_threads*n_cols; ++d)
|
|
258
|
-
thread_cov[d] = 0;
|
|
258
|
+
thread_cov[d] = 0.0f;
|
|
259
259
|
#pragma omp parallel
|
|
260
260
|
{
|
|
261
261
|
int thread_id = omp_get_thread_num();
|
|
@@ -291,7 +291,7 @@ float* calculate_row_covariance(const float *mat_l, const float *mat_r, const in
|
|
|
291
291
|
float* calculate_var_and_center(float *mat, const float *mean, const int n_samples, const int n_cols, const int par_th){
|
|
292
292
|
int n_elements = n_samples * n_cols;
|
|
293
293
|
float *var = new float[n_cols];
|
|
294
|
-
float n_samples_recip = 1.
|
|
294
|
+
float n_samples_recip = 1.0f / (static_cast<float>(n_samples) - 1.0f);
|
|
295
295
|
float value;
|
|
296
296
|
|
|
297
297
|
#pragma omp simd
|
|
@@ -305,7 +305,7 @@ float* calculate_var_and_center(float *mat, const float *mean, const int n_sampl
|
|
|
305
305
|
float *thread_var = new float[n_threads*n_cols];
|
|
306
306
|
#pragma omp simd
|
|
307
307
|
for (int d = 0; d < n_threads*n_cols; ++d)
|
|
308
|
-
thread_var[d] = 0;
|
|
308
|
+
thread_var[d] = 0.0f;
|
|
309
309
|
#pragma omp parallel
|
|
310
310
|
{
|
|
311
311
|
int thread_id = omp_get_thread_num();
|
|
@@ -345,7 +345,7 @@ float* calculate_var_and_center(float *mat, const float *mean, const int n_sampl
|
|
|
345
345
|
float* calculate_std_and_center(float *mat, const float *mean, const int n_samples, const int n_cols, const int par_th){
|
|
346
346
|
int n_elements = n_samples * n_cols;
|
|
347
347
|
float *var = new float[n_cols];
|
|
348
|
-
float n_samples_recip = 1.
|
|
348
|
+
float n_samples_recip = 1.0f / (static_cast<float>(n_samples) - 1.0f);
|
|
349
349
|
float value;
|
|
350
350
|
|
|
351
351
|
#pragma omp simd
|
|
@@ -472,12 +472,12 @@ void set_zero_mat(float *mat, const int size, const int par_th){
|
|
|
472
472
|
int end_idx = (thread_id == n_threads - 1) ? size : start_idx + elements_per_thread;
|
|
473
473
|
#pragma omp simd
|
|
474
474
|
for (int i = start_idx; i < end_idx; ++i)
|
|
475
|
-
mat[i] = 0;
|
|
475
|
+
mat[i] = 0.0f;
|
|
476
476
|
}
|
|
477
477
|
} else {
|
|
478
478
|
#pragma omp simd
|
|
479
479
|
for (int i = 0; i < size; ++i)
|
|
480
|
-
mat[i] = 0;
|
|
480
|
+
mat[i] = 0.0f;
|
|
481
481
|
}
|
|
482
482
|
}
|
|
483
483
|
|
|
@@ -163,7 +163,7 @@ float TreeNode::getSplitScore(dataSet *dataset, const float parent_score, scoreF
|
|
|
163
163
|
return this->splitScoreCosineCategorical(dataset->categorical_obs, dataset->build_grads, dataset->norm_grads, parent_score, split_candidate, min_data_in_leaf);
|
|
164
164
|
}
|
|
165
165
|
default: {
|
|
166
|
-
std::cerr << "Unknown scoreFunc
|
|
166
|
+
std::cerr << "Unknown scoreFunc." << std::endl;
|
|
167
167
|
return -INFINITY;
|
|
168
168
|
}
|
|
169
169
|
}
|
|
@@ -225,7 +225,7 @@ float TreeNode::splitScoreCosine(const float *obs, const float *grads, const flo
|
|
|
225
225
|
|
|
226
226
|
|
|
227
227
|
float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
|
|
228
|
-
float left_count_recip = 1.
|
|
228
|
+
float left_count_recip = 1.0f / left_count_f, right_count_recip = 1.0f / right_count_f;
|
|
229
229
|
#pragma omp simd
|
|
230
230
|
for (int d = 0; d < n_cols; ++d){
|
|
231
231
|
left_mean[d] *= left_count_recip;
|
|
@@ -299,7 +299,7 @@ float TreeNode::splitScoreCosineCategorical(const char *obs, const float *grads,
|
|
|
299
299
|
|
|
300
300
|
|
|
301
301
|
float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
|
|
302
|
-
float left_count_recip = 1.
|
|
302
|
+
float left_count_recip = 1.0f / left_count_f, right_count_recip = 1.0f / right_count_f;
|
|
303
303
|
#pragma omp simd
|
|
304
304
|
for (int d = 0; d < n_cols; ++d){
|
|
305
305
|
left_mean[d] *= left_count_recip;
|
|
@@ -360,7 +360,7 @@ float TreeNode::splitScoreL2(const float *obs, const float *grads, const float p
|
|
|
360
360
|
}
|
|
361
361
|
|
|
362
362
|
float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
|
|
363
|
-
float left_count_recip = 1.
|
|
363
|
+
float left_count_recip = 1.0f / left_count, right_count_recip = 1.0f / right_count_f;
|
|
364
364
|
#pragma omp simd
|
|
365
365
|
for (int d = 0; d < n_cols; ++d){
|
|
366
366
|
left_mean[d] *= left_count_recip;
|
|
@@ -415,7 +415,7 @@ float TreeNode::splitScoreL2Categorical(const char *obs, const float *grads, con
|
|
|
415
415
|
}
|
|
416
416
|
|
|
417
417
|
float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
|
|
418
|
-
float left_count_recip = 1.
|
|
418
|
+
float left_count_recip = 1.0f / left_count, right_count_recip = 1.0f / right_count_f;
|
|
419
419
|
#pragma omp simd
|
|
420
420
|
for (int d = 0; d < n_cols; ++d){
|
|
421
421
|
left_mean[d] *= left_count_recip;
|