gbrl 1.0.0.dev1__tar.gz → 1.0.0.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {gbrl-1.0.0.dev1/gbrl.egg-info → gbrl-1.0.0.dev2}/PKG-INFO +1 -1
  2. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/README.md +16 -1
  3. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/fitter.cpp +4 -2
  4. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/gbrl.cpp +55 -31
  5. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/gbrl.h +3 -1
  6. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/gbrl_binding.cpp +14 -14
  7. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/loss.cpp +2 -2
  8. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/math_ops.cpp +13 -13
  9. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/node.cpp +5 -5
  10. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/optimizer.cpp +7 -7
  11. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/optimizer.h +5 -5
  12. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/predictor.cpp +3 -3
  13. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/scheduler.h +1 -1
  14. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/split_candidate_generator.cpp +5 -5
  15. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/types.cpp +10 -10
  16. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/utils.cpp +1 -1
  17. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_fitter.cu +6 -6
  18. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_loss.cu +4 -3
  19. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_predictor.cu +4 -4
  20. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_types.cu +2 -2
  21. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2/gbrl.egg-info}/PKG-INFO +1 -1
  22. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/pyproject.toml +1 -1
  23. gbrl-1.0.0.dev2/setup.py +268 -0
  24. gbrl-1.0.0.dev1/setup.py +0 -178
  25. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/LICENSE +0 -0
  26. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/MANIFEST.in +0 -0
  27. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/fitter.h +0 -0
  28. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/loss.h +0 -0
  29. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/main.cpp +0 -0
  30. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/math_ops.h +0 -0
  31. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/node.h +0 -0
  32. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/predictor.h +0 -0
  33. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/scheduler.cpp +0 -0
  34. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/split_candidate_generator.h +0 -0
  35. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/types.h +0 -0
  36. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cpp/utils.h +0 -0
  37. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_fitter.h +0 -0
  38. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_loss.h +0 -0
  39. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_predictor.h +0 -0
  40. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_preprocess.cu +0 -0
  41. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_preprocess.h +0 -0
  42. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_types.h +0 -0
  43. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_utils.cu +0 -0
  44. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl/src/cuda/cuda_utils.h +0 -0
  45. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl.egg-info/SOURCES.txt +0 -0
  46. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl.egg-info/dependency_links.txt +0 -0
  47. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl.egg-info/requires.txt +0 -0
  48. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/gbrl.egg-info/top_level.txt +0 -0
  49. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/setup.cfg +0 -0
  50. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/tests/test_gbt_multi.py +0 -0
  51. {gbrl-1.0.0.dev1 → gbrl-1.0.0.dev2}/tests/test_gbt_single.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gbrl
3
- Version: 1.0.0.dev1
3
+ Version: 1.0.0.dev2
4
4
  Summary: Gradient Boosted Trees for RL
5
5
  Author-email: Benjamin Fuhrer <bfuhrer@nvidia.com>, Chen Tesslr <ctessler@nvidia.com>, Gal Dalal <galal@nvidia.com>
6
6
  License-File: LICENSE
@@ -10,9 +10,24 @@ GBRL is a Python-based GBT library designed and optimized for reinforcement lear
10
10
 
11
11
  ## Getting started
12
12
 
13
+ ### Dependencies
14
+ llvm
15
+ openmp
16
+
17
+ #### MAC OS
18
+
19
+ Make sure to run:
20
+ ```
21
+ brew install libomp
22
+ brew install llvm
23
+ ```
24
+
25
+ xcode command line tools should be installed installed
26
+
27
+ ### Installation
13
28
  ```
14
29
  pip install gbrl
15
- ```
30
+ ```
16
31
 
17
32
  For GPU support GBRL looks for `CUDA_PATH` or `CUDA_HOME` environment variables. Unless found, GBRL will automatically compile only for CPU.
18
33
 
@@ -335,8 +335,10 @@ int Fitter::fit_oblivious_tree(dataSet *dataset, ensembleData *edata, ensembleMe
335
335
  int *root_sample_indices = new int[n_samples];
336
336
  std::iota(root_sample_indices, root_sample_indices + n_samples, 0);
337
337
 
338
- std::vector<TreeNode*> tree_nodes(1 << metadata->max_depth);
339
- std::vector<TreeNode*> child_tree_nodes(1 << metadata->max_depth);
338
+ int max_n_leaves = 1 << metadata->max_depth;
339
+
340
+ std::vector<TreeNode*> tree_nodes(max_n_leaves);
341
+ std::vector<TreeNode*> child_tree_nodes(max_n_leaves);
340
342
  TreeNode *rootNode = new TreeNode(root_sample_indices, n_samples, metadata->n_num_features, metadata->n_cat_features, metadata->output_dim, metadata->policy_dim, depth, 0);
341
343
  tree_nodes[0] = rootNode;
342
344
 
@@ -81,7 +81,7 @@ GBRL::GBRL(int output_dim, int policy_dim, int max_depth, int min_data_in_leaf,
81
81
  GBRL::GBRL(const std::string& filename){
82
82
  int status = this->loadFromFile(filename);
83
83
  if (status != 0){
84
- std::cerr << "Error loading ! " << filename << std::endl;
84
+ std::cerr << "Error loading . " << filename << std::endl;
85
85
  throw std::runtime_error("File load error");
86
86
  }
87
87
  }
@@ -127,6 +127,8 @@ GBRL::~GBRL() {
127
127
  this->metadata = nullptr;
128
128
  }
129
129
 
130
+
131
+
130
132
  void GBRL::to_device(deviceType device){
131
133
  if (device == this->device){
132
134
  std::cout << "GBRL device is already " << deviceTypeToString(device) << std::endl;
@@ -134,11 +136,19 @@ void GBRL::to_device(deviceType device){
134
136
  }
135
137
  #ifndef USE_CUDA
136
138
  if (device == gpu)
137
- std::cerr << "GBRL was not compiled for GPU! using cpu device!" << std::endl;
139
+ std::cerr << "GBRL was not compiled for GPU. Using cpu device" << std::endl;
138
140
  this->edata = ensemble_data_alloc(this->metadata);
139
141
  this->device = cpu;
140
142
  return;
141
143
  #else
144
+
145
+ if (device == gpu){
146
+ bool is_valid = valid_device();
147
+ if (!is_valid){
148
+ std::cerr << "No GPU device found. Using cpu device" << std::endl;
149
+ device = cpu;
150
+ }
151
+ }
142
152
  if (this->device == unspecified){
143
153
  if (device == cpu){
144
154
  this->edata = ensemble_data_alloc(this->metadata);
@@ -146,7 +156,6 @@ void GBRL::to_device(deviceType device){
146
156
  } else {
147
157
  this->edata = ensemble_data_alloc_cuda(this->metadata);
148
158
  this->device = gpu;
149
- return;
150
159
  }
151
160
  } else if (this->device == cpu && device == gpu){
152
161
  ensembleData* edata_gpu = ensemble_data_copy_cpu_gpu(this->metadata, this->edata);
@@ -160,7 +169,7 @@ void GBRL::to_device(deviceType device){
160
169
  this->device = cpu;
161
170
  }
162
171
  if (this->device == gpu && this->metadata->use_cv){
163
- std::cout << "Cannot use control variates with GPU! Setting use_cv to False!" << std::endl;
172
+ std::cout << "Cannot use control variates with GPU. Setting use_cv to False." << std::endl;
164
173
  this->metadata->use_cv = false;
165
174
  }
166
175
  #endif
@@ -170,7 +179,7 @@ void GBRL::to_device(deviceType device){
170
179
  void GBRL::set_bias(float *bias, const int output_dim){
171
180
  if (output_dim != this->metadata->output_dim)
172
181
  {
173
- std::cerr << "Given bias vector has different dimensions than expect! " << " Given: " << output_dim << " expected: " << this->metadata->output_dim << std::endl;
182
+ std::cerr << "Given bias vector has different dimensions than expect. " << " Given: " << output_dim << " expected: " << this->metadata->output_dim << std::endl;
174
183
  throw std::runtime_error("Incompatible dimensions");
175
184
  return;
176
185
  }
@@ -183,7 +192,7 @@ void GBRL::set_bias(float *bias, const int output_dim){
183
192
  }
184
193
 
185
194
  float* GBRL::get_bias(){
186
- // returns a copy! must deallocated new float pointer!
195
+ // returns a copy. must deallocated new float pointer!
187
196
  #ifdef USE_CUDA
188
197
  if (this->device == gpu){
189
198
  float *bias = new float[this->metadata->output_dim];
@@ -208,7 +217,7 @@ float* GBRL::predict(const float *obs, const char *categorical_obs, const int n_
208
217
  }
209
218
  if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
210
219
  delete[] preds;
211
- std::cerr << "Error! Cannot use ensemble with this dataset! Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features!";
220
+ std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
212
221
  throw std::runtime_error("Incompatible dataset");
213
222
  }
214
223
 
@@ -219,7 +228,7 @@ float* GBRL::predict(const float *obs, const char *categorical_obs, const int n_
219
228
  if (this->device == gpu){
220
229
  if (this->cuda_opt == nullptr){
221
230
  this->cuda_opt = deepCopySGDOptimizerVectorToGPU(this->opts);
222
- this->n_cuda_opts = this->opts.size();
231
+ this->n_cuda_opts = static_cast<int>(this->opts.size());
223
232
  }
224
233
  predict_cuda(&dataset, preds, this->metadata, this->edata, this->cuda_opt, this->n_cuda_opts, start_tree_idx, stop_tree_idx);
225
234
 
@@ -241,7 +250,7 @@ void GBRL::predict(const float *obs, const char *categorical_obs, float *start_p
241
250
  this->metadata->n_cat_features = n_cat_features;
242
251
  }
243
252
  if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
244
- std::cerr << "Error! Cannot use ensemble with this dataset! Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features!";
253
+ std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
245
254
  throw std::runtime_error("Incompatible dataset");
246
255
  return;
247
256
  }
@@ -252,7 +261,7 @@ void GBRL::predict(const float *obs, const char *categorical_obs, float *start_p
252
261
  if (this->device == gpu){
253
262
  if (this->cuda_opt == nullptr){
254
263
  this->cuda_opt = deepCopySGDOptimizerVectorToGPU(this->opts);
255
- this->n_cuda_opts = this->opts.size();
264
+ this->n_cuda_opts = static_cast<int>(this->opts.size());
256
265
  }
257
266
  predict_cuda(&dataset, start_preds, this->metadata, this->edata, this->cuda_opt, this->n_cuda_opts, start_tree_idx, stop_tree_idx);
258
267
  }
@@ -278,7 +287,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
278
287
  float stop_lr, int T,
279
288
  float beta_1, float beta_2, float eps = 1.0e-8, float shrinkage = 1.0e-5){
280
289
  if (this->opts.size() >= 2){
281
- std::cerr << "Already set two optimizers! This is the limit!" << std::endl;
290
+ std::cerr << "Already set two optimizers. This is the limit." << std::endl;
282
291
  throw std::runtime_error("Optimizer Limit Reached");
283
292
  return;
284
293
  }
@@ -289,7 +298,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
289
298
  if (algo == Adam){
290
299
  #ifdef USE_CUDA
291
300
  if (this->device == gpu){
292
- std::cerr << "The Adam optimizer has cpu support only!" << std::endl;
301
+ std::cerr << "The Adam optimizer has cpu support only." << std::endl;
293
302
  throw std::runtime_error("Incompatible GPU optimizer");
294
303
  return;
295
304
  }
@@ -299,7 +308,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
299
308
  } else if (scheduler_func == Linear){
300
309
  opt = new AdamOptimizer(scheduler_func, init_lr, stop_lr, T, beta_1, beta_2, eps);
301
310
  } else {
302
- std::cerr << "Unrecoginized scheduler func!" << std::endl;
311
+ std::cerr << "Unrecoginized scheduler func." << std::endl;
303
312
  throw std::runtime_error("Unrecognized scheduler func");
304
313
  opt = nullptr;
305
314
  return;
@@ -307,10 +316,10 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
307
316
 
308
317
  if (this->opts.size() == 0){
309
318
  opt->set_indices(0, this->metadata->policy_dim);
310
- std::cout << "Setting policy optimizer!" << std::endl;
319
+ std::cout << "Setting policy optimizer." << std::endl;
311
320
  } else {
312
321
  opt->set_indices(this->metadata->policy_dim, this->metadata->output_dim);
313
- std::cout << "Setting value optimizer! Warning cannot set more optimizers" << std::endl;
322
+ std::cout << "Setting value optimizer. Warning cannot set more optimizers" << std::endl;
314
323
  }
315
324
 
316
325
  this->opts.push_back(opt);
@@ -321,7 +330,7 @@ void GBRL::set_optimizer(optimizerAlgo algo, schedulerFunc scheduler_func, float
321
330
  } else if (scheduler_func == Linear){
322
331
  opt = new SGDOptimizer(scheduler_func, init_lr, stop_lr, T);
323
332
  } else {
324
- std::cerr << "Unrecoginized scheduler func!" << std::endl;
333
+ std::cerr << "Unrecoginized scheduler func." << std::endl;
325
334
  throw std::runtime_error("Unrecoginized scheduler func");
326
335
  opt = nullptr;
327
336
  return;
@@ -461,7 +470,7 @@ float GBRL::_fit_sl_gpu(dataSet *dataset, float *targets, const int n_iterations
461
470
 
462
471
  if (this->cuda_opt == nullptr){
463
472
  this->cuda_opt = deepCopySGDOptimizerVectorToGPU(this->opts);
464
- this->n_cuda_opts = this->opts.size();
473
+ this->n_cuda_opts = static_cast<int>(this->opts.size());
465
474
  }
466
475
 
467
476
  err = cudaMalloc((void**)&device_memory_block, alloc_size);
@@ -568,7 +577,7 @@ void GBRL::fit(const float *obs, const char *categorical_obs, float *grads, cons
568
577
  this->metadata->n_cat_features = n_cat_features;
569
578
  }
570
579
  if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
571
- std::cerr << "Error! Cannot use ensemble with this dataset! Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features!";
580
+ std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
572
581
  throw std::runtime_error("Incompatible dataset");
573
582
  return;
574
583
  }
@@ -590,14 +599,14 @@ float GBRL::fit_sl(float *obs, char *categorical_obs, float *targets, int iterat
590
599
  }
591
600
 
592
601
  if (n_num_features != metadata->n_num_features || n_cat_features != metadata->n_cat_features){
593
- std::cerr << "Error! Cannot use ensemble with this dataset! Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features!";
602
+ std::cerr << "Error. Cannot use ensemble with this dataset. Excepted input with " << metadata->n_num_features << " numerical features followed by " << metadata->n_cat_features << " categorical features, but received " << n_num_features << " numerical features and " << n_cat_features << " categorical features.";
594
603
  throw std::runtime_error("Incompatible dataset");
595
604
  return -INFINITY;
596
605
  }
597
606
 
598
607
  for (auto& algo: this->opts){
599
608
  if (algo->getAlgo() == Adam){
600
- std::cerr << "Adam optimizer not supported in fit_sl function! Use SGD" << std::endl;
609
+ std::cerr << "Adam optimizer not supported in fit_sl function. Use SGD" << std::endl;
601
610
  throw std::runtime_error("Unsupported optimizer");
602
611
  return 0.0;
603
612
  }
@@ -688,7 +697,7 @@ int GBRL::saveToFile(const std::string& filename){
688
697
 
689
698
  save_ensemble_data(file, this->edata, this->metadata, this->device);
690
699
 
691
- int num_opts = this->opts.size();
700
+ int num_opts = static_cast<int>(this->opts.size());
692
701
  file.write(reinterpret_cast<char*>(&num_opts), sizeof(int));
693
702
 
694
703
  for (int i = 0; i < num_opts; ++i){
@@ -701,7 +710,7 @@ int GBRL::saveToFile(const std::string& filename){
701
710
  }
702
711
 
703
712
  if (!file.good()) {
704
- std::cerr << "Error occurred at writing time!" << std::endl;
713
+ std::cerr << "Error occurred at writing time." << std::endl;
705
714
  throw std::runtime_error("Writing to file error");
706
715
  return -1;
707
716
  }
@@ -727,7 +736,7 @@ int GBRL::loadFromFile(const std::string& filename){
727
736
  this->metadata->use_cv = static_cast<bool>(byte);
728
737
 
729
738
  if (!file.good()) {
730
- std::cerr << "Error occurred while reading the file!" << std::endl;
739
+ std::cerr << "Error occurred while reading the file." << std::endl;
731
740
  throw std::runtime_error("Reading file error");
732
741
  return -1;
733
742
  }
@@ -755,7 +764,7 @@ int GBRL::loadFromFile(const std::string& filename){
755
764
 
756
765
 
757
766
  if (file.fail()) {
758
- std::cerr << "Error occurred at file closing time!" << std::endl;
767
+ std::cerr << "Error occurred at file closing time." << std::endl;
759
768
  throw std::runtime_error("File closing error");
760
769
  return -1;
761
770
  }
@@ -811,6 +820,21 @@ void GBRL::print_tree(int tree_idx){
811
820
  #endif
812
821
  }
813
822
 
823
+ #ifdef USE_CUDA
824
+ bool valid_device(){
825
+ int device_count = 0;
826
+ cudaError_t error = cudaGetDeviceCount(&device_count);
827
+
828
+ if (error != cudaSuccess) {
829
+ std::cout << "CUDA error when querying device count: " << cudaGetErrorString(error) << std::endl;
830
+ return false;
831
+ }
832
+ if (device_count == 0)
833
+ return false;
834
+ return true;
835
+ }
836
+ #endif
837
+
814
838
  #ifdef USE_GRAPHVIZ
815
839
  void GBRL::plot_tree(int tree_idx, const std::string &filename){
816
840
  ensembleData *edata_cpu = this->edata;
@@ -820,7 +844,7 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
820
844
  }
821
845
  #endif
822
846
  if (tree_idx >= this->metadata->n_trees){
823
- std::cerr << "ERROR - Tree idx: " << tree_idx << " > " << this->metadata->n_trees - 1 << " maximum index!" << std::endl;
847
+ std::cerr << "ERROR - Tree idx: " << tree_idx << " > " << this->metadata->n_trees - 1 << " maximum index." << std::endl;
824
848
  throw std::runtime_error("Invalid tree index");
825
849
  return;
826
850
  }
@@ -855,7 +879,7 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
855
879
 
856
880
  if (nodesMap.find(nodeIndex) == nodesMap.end()) { // Check if the root node already exists
857
881
  std::strcpy(buffer, std::to_string(nodeIndex).c_str());
858
- parentNode = agnode(g, buffer, TRUE);
882
+ parentNode = agnode(g, buffer, true);
859
883
  std::string nodeLabel = (is_numeric) ? std::to_string(feature_idx) + ", value > " + std::to_string(feature_value) : std::to_string(feature_idx + this->metadata->n_num_features) + ", value == " + std::string(categorical_value);
860
884
  std::strcpy(buffer, nodeLabel.c_str());
861
885
  // parentNode = agnode(g, buffer, TRUE); // Create root node or get it if already exists
@@ -878,7 +902,7 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
878
902
 
879
903
  if (nodesMap.find(nodeIndex) == nodesMap.end()) {
880
904
  std::strcpy(buffer, std::to_string(nodeIndex).c_str());
881
- currentNode = agnode(g, buffer, TRUE);
905
+ currentNode = agnode(g, buffer, true);
882
906
  std::string nodeLabel = is_numeric ? std::to_string(feature_idx) + ", value > " + std::to_string(feature_value) : std::to_string(feature_idx + this->metadata->n_num_features) + ", value == " + std::string(categorical_value) ;
883
907
  // std::cout << "printing child node: " << nodeIndex << " with label: " << nodeLabel << std::endl;
884
908
  std::strcpy(buffer, nodeLabel.c_str());
@@ -893,8 +917,8 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
893
917
 
894
918
  if (edgesSet.find(edgeKey) == edgesSet.end()) {
895
919
  std::strcpy(buffer, edgeLabel.c_str());
896
- edge = agedge(g, parentNode, currentNode, buffer, TRUE);
897
- agsafeset(edge, (char*)"label", buffer, (char*)"");
920
+ edge = agedge(g, parentNode, currentNode, buffer, true);
921
+ agsafeset(edge, (char*)"label", buffer, (char*)"");
898
922
  edgesSet.insert(edgeKey);
899
923
  }
900
924
 
@@ -907,8 +931,8 @@ void GBRL::plot_tree(int tree_idx, const std::string &filename){
907
931
  std::string leafLabel = "val = " + VectoString(edata_cpu->values + leaf_idx*this->metadata->output_dim, this->metadata->output_dim);
908
932
  std::string uniqueLeafLabel = leafLabel + "_idx_" + std::to_string(leaf_idx);
909
933
  std::strcpy(buffer, uniqueLeafLabel.c_str());
910
- currentNode = agnode(g, buffer, TRUE);
911
- edge = agedge(g, parentNode, currentNode, NULL, TRUE);
934
+ currentNode = agnode(g, buffer, true);
935
+ edge = agedge(g, parentNode, currentNode, NULL, true);
912
936
 
913
937
  agsafeset(currentNode, (char*)"label", buffer, (char*)"");
914
938
  agsafeset(currentNode, (char*)"shape", (char*)"box", (char*)"");
@@ -68,7 +68,9 @@ class GBRL {
68
68
  };
69
69
 
70
70
 
71
-
71
+ #ifdef USE_CUDA
72
+ bool valid_device();
73
+ #endif
72
74
 
73
75
 
74
76
  #endif
@@ -88,7 +88,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
88
88
  }
89
89
  py::buffer_info info_grads = grads.request();
90
90
  float* grads_ptr = static_cast<float*>(info_grads.ptr);
91
- int n_samples = info_grads.shape[0];
91
+ int n_samples = static_cast<int>(info_grads.shape[0]);
92
92
 
93
93
  const float* obs_ptr = nullptr;
94
94
  int n_num_features = 0;
@@ -98,7 +98,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
98
98
  throw std::runtime_error("Arrays must be C-contiguous");
99
99
  py::buffer_info info_obs = obs_array.request();
100
100
  obs_ptr = static_cast<const float*>(info_obs.ptr);
101
- n_num_features = info_obs.shape[1];
101
+ n_num_features = static_cast<int>(info_obs.shape[1]);
102
102
  }
103
103
  const char* cat_obs_ptr = nullptr;
104
104
  int n_cat_features = 0;
@@ -108,7 +108,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
108
108
  throw std::runtime_error("Arrays must be C-contiguous");
109
109
  py::buffer_info info_categorical_obs = py_array.request();
110
110
  cat_obs_ptr = static_cast<const char*>(info_categorical_obs.ptr);
111
- n_cat_features = info_categorical_obs.shape[1];
111
+ n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
112
112
  }
113
113
 
114
114
  py::gil_scoped_release release;
@@ -130,7 +130,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
130
130
  throw std::runtime_error("Arrays must be C-contiguous");
131
131
  py::buffer_info info_obs = obs_array.request();
132
132
  obs_ptr = static_cast<float*>(info_obs.ptr);
133
- n_num_features = info_obs.shape[1];
133
+ n_num_features = static_cast<int>(info_obs.shape[1]);
134
134
  }
135
135
 
136
136
  char* cat_obs_ptr = nullptr;
@@ -141,13 +141,13 @@ PYBIND11_MODULE(gbrl_cpp, m) {
141
141
  throw std::runtime_error("Arrays must be C-contiguous");
142
142
  py::buffer_info info_categorical_obs = py_array.request();
143
143
  cat_obs_ptr = static_cast<char*>(info_categorical_obs.ptr);
144
- n_cat_features = info_categorical_obs.shape[1];
144
+ n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
145
145
  }
146
146
 
147
147
  py::gil_scoped_release release;
148
148
  py::buffer_info info_targets = targets.request();
149
149
  float* targets_ptr = static_cast<float*>(info_targets.ptr);
150
- int n_samples = info_targets.shape[0];
150
+ int n_samples = static_cast<int>(info_targets.shape[0]);
151
151
  return self.fit_sl(obs_ptr, cat_obs_ptr, targets_ptr, iterations, n_samples, n_num_features, n_cat_features, shuffle, loss_type);
152
152
  }, py::arg("obs"),
153
153
  py::arg("categorical_obs"),
@@ -164,7 +164,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
164
164
 
165
165
  py::buffer_info info = bias.request();
166
166
  float* bias_ptr = static_cast<float*>(info.ptr);
167
- int output_dim = len(bias);
167
+ int output_dim = static_cast<int>(len(bias));
168
168
 
169
169
  self.set_bias(bias_ptr, output_dim);
170
170
  }, "Set GBRL model bias");
@@ -199,8 +199,8 @@ PYBIND11_MODULE(gbrl_cpp, m) {
199
199
  throw std::runtime_error("Arrays must be C-contiguous");
200
200
  py::buffer_info info_obs = obs_array.request();
201
201
  obs_ptr = static_cast<const float*>(info_obs.ptr);
202
- n_num_features = info_obs.shape[1];
203
- n_samples = info_obs.shape[0];
202
+ n_num_features = static_cast<int>(info_obs.shape[1]);
203
+ n_samples = static_cast<int>(info_obs.shape[0]);
204
204
  }
205
205
  int n_cat_features = 0;
206
206
  const char *cat_obs_ptr = nullptr;
@@ -211,8 +211,8 @@ PYBIND11_MODULE(gbrl_cpp, m) {
211
211
 
212
212
  py::buffer_info info_categorical_obs = py_array.request();
213
213
  cat_obs_ptr = static_cast<const char*>(info_categorical_obs.ptr);
214
- n_cat_features = info_categorical_obs.shape[1];
215
- n_samples = info_categorical_obs.shape[0];
214
+ n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
215
+ n_samples = static_cast<int>(info_categorical_obs.shape[0]);
216
216
  }
217
217
 
218
218
  py::gil_scoped_release release;
@@ -235,7 +235,7 @@ PYBIND11_MODULE(gbrl_cpp, m) {
235
235
  throw std::runtime_error("Arrays must be C-contiguous");
236
236
  py::buffer_info info_obs = obs_array.request();
237
237
  obs_ptr = static_cast<const float*>(info_obs.ptr);
238
- n_num_features = info_obs.shape[1];
238
+ n_num_features = static_cast<int>(info_obs.shape[1]);
239
239
  }
240
240
  int n_cat_features = 0;
241
241
  const char *cat_obs_ptr = nullptr;
@@ -245,12 +245,12 @@ PYBIND11_MODULE(gbrl_cpp, m) {
245
245
  throw std::runtime_error("Arrays must be C-contiguous");
246
246
  py::buffer_info info_categorical_obs = py_array.request();
247
247
  cat_obs_ptr = static_cast<const char*>(info_categorical_obs.ptr);
248
- n_cat_features = info_categorical_obs.shape[1];
248
+ n_cat_features = static_cast<int>(info_categorical_obs.shape[1]);
249
249
  }
250
250
  py::gil_scoped_release release;
251
251
  py::buffer_info info_preds = start_preds.request();
252
252
  float* preds_ptr = static_cast<float*>(info_preds.ptr);
253
- int n_samples = info_preds.shape[0];
253
+ int n_samples = static_cast<int>(info_preds.shape[0]);
254
254
  self.predict(obs_ptr, cat_obs_ptr, preds_ptr, n_samples, n_num_features, n_cat_features, start_tree_idx, stop_tree_idx);
255
255
  }, py::arg("obs"), py::arg("categorical_obs"), py::arg("start_preds"), py::arg("start_tree_idx")=0, py::arg("stop_tree_idx")=0, "Predict using the model");
256
256
  // saveToFile method
@@ -6,7 +6,7 @@
6
6
  #include "loss.h"
7
7
 
8
8
  float MultiRMSE::get_loss_and_gradients(const float *raw_preds, const float *raw_targets, float *raw_grads, const int n_samples, const int output_dim){
9
- float count_recip = 1.0 / static_cast<float>(n_samples);
9
+ float count_recip = 1.0f / static_cast<float>(n_samples);
10
10
  const int n_threads = static_cast<int>(omp_get_max_threads());
11
11
  int n_elements = n_samples*output_dim;
12
12
  int elements_per_thread = n_elements / n_threads;
@@ -34,7 +34,7 @@ float MultiRMSE::get_loss_and_gradients(const float *raw_preds, const float *raw
34
34
  }
35
35
 
36
36
  float MultiRMSE::get_loss(const float *raw_preds, const float *raw_targets, const int n_samples, const int output_dim){
37
- float count_recip = 1.0 / static_cast<float>(n_samples);
37
+ float count_recip = 1.0f / static_cast<float>(n_samples);
38
38
  const int n_threads = static_cast<int>(omp_get_max_threads());
39
39
  int samples_per_thread = n_samples / n_threads;
40
40
  int row;
@@ -73,14 +73,14 @@ void divide_mat_by_vec_inplace(float *mat, const float *vec, const int n_samples
73
73
  #pragma omp simd
74
74
  for (int i = start_idx; i < end_idx; ++i) {
75
75
  int col = i % n_cols;
76
- mat[i] /= (vec[col] + 1e-8);
76
+ mat[i] /= (vec[col] + 1e-8f);
77
77
  }
78
78
  }
79
79
  } else {
80
80
  #pragma omp simd
81
81
  for (int i = 0; i < n_elements; ++i) {
82
82
  int col = i % n_cols;
83
- mat[i] /= (vec[col] + 1e-8);
83
+ mat[i] /= (vec[col] + 1e-8f);
84
84
  }
85
85
  }
86
86
  }
@@ -139,10 +139,10 @@ void multiply_mat_by_scalar(float *mat, float scalar, const int n_samples, const
139
139
  float* calculate_mean(const float *mat, const int n_samples, const int n_cols, const int par_th){
140
140
  int n_elements = n_samples * n_cols;
141
141
  float *mean = new float[n_cols];
142
- float n_samples_recip = 1.0 / static_cast<float>(n_samples);
142
+ float n_samples_recip = 1.0f / static_cast<float>(n_samples);
143
143
  #pragma omp simd
144
144
  for (int d = 0; d < n_cols; ++d)
145
- mean[d] = 0;
145
+ mean[d] = 0.0f;
146
146
  int n_threads = calculate_num_threads(n_elements, par_th);
147
147
  if (n_threads > 1){
148
148
  omp_set_num_threads(n_threads);
@@ -150,7 +150,7 @@ float* calculate_mean(const float *mat, const int n_samples, const int n_cols, c
150
150
  float *thread_mean = new float[n_threads*n_cols];
151
151
  #pragma omp simd
152
152
  for (int d = 0; d < n_threads*n_cols; ++d)
153
- thread_mean[d] = 0;
153
+ thread_mean[d] = 0.0f;
154
154
  #pragma omp parallel
155
155
  {
156
156
  int thread_id = omp_get_thread_num();
@@ -244,9 +244,9 @@ float* calculate_row_covariance(const float *mat_l, const float *mat_r, const in
244
244
  float *cov = new float[n_cols];
245
245
  #pragma omp simd
246
246
  for (int d = 0; d < n_cols; ++d)
247
- cov[d] = 0;
247
+ cov[d] = 0.0f;
248
248
 
249
- float n_samples_recip = 1.0 / (static_cast<float>(n_samples) - 1);
249
+ float n_samples_recip = 1.0f / (static_cast<float>(n_samples) - 1.0f);
250
250
 
251
251
  int n_threads = calculate_num_threads(n_elements, par_th);
252
252
  if (n_threads > 1){
@@ -255,7 +255,7 @@ float* calculate_row_covariance(const float *mat_l, const float *mat_r, const in
255
255
  float *thread_cov = new float[n_threads*n_cols];
256
256
  #pragma omp simd
257
257
  for (int d = 0; d < n_threads*n_cols; ++d)
258
- thread_cov[d] = 0;
258
+ thread_cov[d] = 0.0f;
259
259
  #pragma omp parallel
260
260
  {
261
261
  int thread_id = omp_get_thread_num();
@@ -291,7 +291,7 @@ float* calculate_row_covariance(const float *mat_l, const float *mat_r, const in
291
291
  float* calculate_var_and_center(float *mat, const float *mean, const int n_samples, const int n_cols, const int par_th){
292
292
  int n_elements = n_samples * n_cols;
293
293
  float *var = new float[n_cols];
294
- float n_samples_recip = 1.0 / (static_cast<float>(n_samples) - 1);
294
+ float n_samples_recip = 1.0f / (static_cast<float>(n_samples) - 1.0f);
295
295
  float value;
296
296
 
297
297
  #pragma omp simd
@@ -305,7 +305,7 @@ float* calculate_var_and_center(float *mat, const float *mean, const int n_sampl
305
305
  float *thread_var = new float[n_threads*n_cols];
306
306
  #pragma omp simd
307
307
  for (int d = 0; d < n_threads*n_cols; ++d)
308
- thread_var[d] = 0;
308
+ thread_var[d] = 0.0f;
309
309
  #pragma omp parallel
310
310
  {
311
311
  int thread_id = omp_get_thread_num();
@@ -345,7 +345,7 @@ float* calculate_var_and_center(float *mat, const float *mean, const int n_sampl
345
345
  float* calculate_std_and_center(float *mat, const float *mean, const int n_samples, const int n_cols, const int par_th){
346
346
  int n_elements = n_samples * n_cols;
347
347
  float *var = new float[n_cols];
348
- float n_samples_recip = 1.0 / (static_cast<float>(n_samples) - 1);
348
+ float n_samples_recip = 1.0f / (static_cast<float>(n_samples) - 1.0f);
349
349
  float value;
350
350
 
351
351
  #pragma omp simd
@@ -472,12 +472,12 @@ void set_zero_mat(float *mat, const int size, const int par_th){
472
472
  int end_idx = (thread_id == n_threads - 1) ? size : start_idx + elements_per_thread;
473
473
  #pragma omp simd
474
474
  for (int i = start_idx; i < end_idx; ++i)
475
- mat[i] = 0;
475
+ mat[i] = 0.0f;
476
476
  }
477
477
  } else {
478
478
  #pragma omp simd
479
479
  for (int i = 0; i < size; ++i)
480
- mat[i] = 0;
480
+ mat[i] = 0.0f;
481
481
  }
482
482
  }
483
483
 
@@ -163,7 +163,7 @@ float TreeNode::getSplitScore(dataSet *dataset, const float parent_score, scoreF
163
163
  return this->splitScoreCosineCategorical(dataset->categorical_obs, dataset->build_grads, dataset->norm_grads, parent_score, split_candidate, min_data_in_leaf);
164
164
  }
165
165
  default: {
166
- std::cerr << "Unknown scoreFunc!" << std::endl;
166
+ std::cerr << "Unknown scoreFunc." << std::endl;
167
167
  return -INFINITY;
168
168
  }
169
169
  }
@@ -225,7 +225,7 @@ float TreeNode::splitScoreCosine(const float *obs, const float *grads, const flo
225
225
 
226
226
 
227
227
  float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
228
- float left_count_recip = 1.0 / left_count_f, right_count_recip = 1.0 / right_count_f;
228
+ float left_count_recip = 1.0f / left_count_f, right_count_recip = 1.0f / right_count_f;
229
229
  #pragma omp simd
230
230
  for (int d = 0; d < n_cols; ++d){
231
231
  left_mean[d] *= left_count_recip;
@@ -299,7 +299,7 @@ float TreeNode::splitScoreCosineCategorical(const char *obs, const float *grads,
299
299
 
300
300
 
301
301
  float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
302
- float left_count_recip = 1.0 / left_count_f, right_count_recip = 1.0 / right_count_f;
302
+ float left_count_recip = 1.0f / left_count_f, right_count_recip = 1.0f / right_count_f;
303
303
  #pragma omp simd
304
304
  for (int d = 0; d < n_cols; ++d){
305
305
  left_mean[d] *= left_count_recip;
@@ -360,7 +360,7 @@ float TreeNode::splitScoreL2(const float *obs, const float *grads, const float p
360
360
  }
361
361
 
362
362
  float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
363
- float left_count_recip = 1.0 / left_count, right_count_recip = 1.0 / right_count_f;
363
+ float left_count_recip = 1.0f / left_count, right_count_recip = 1.0f / right_count_f;
364
364
  #pragma omp simd
365
365
  for (int d = 0; d < n_cols; ++d){
366
366
  left_mean[d] *= left_count_recip;
@@ -415,7 +415,7 @@ float TreeNode::splitScoreL2Categorical(const char *obs, const float *grads, con
415
415
  }
416
416
 
417
417
  float left_count_f = static_cast<float>(left_count), right_count_f = static_cast<float>(right_count);
418
- float left_count_recip = 1.0 / left_count, right_count_recip = 1.0 / right_count_f;
418
+ float left_count_recip = 1.0f / left_count, right_count_recip = 1.0f / right_count_f;
419
419
  #pragma omp simd
420
420
  for (int d = 0; d < n_cols; ++d){
421
421
  left_mean[d] *= left_count_recip;