gbrl 1.0.0.dev3__tar.gz → 1.0.0.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/LICENSE +3 -3
  2. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/MANIFEST.in +0 -0
  3. {gbrl-1.0.0.dev3/gbrl.egg-info → gbrl-1.0.0.dev4}/PKG-INFO +2 -2
  4. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/README.md +98 -106
  5. gbrl-1.0.0.dev4/gbrl/__init__.py +9 -0
  6. gbrl-1.0.0.dev4/gbrl/ac_gbrl.py +581 -0
  7. gbrl-1.0.0.dev4/gbrl/config.py +3 -0
  8. gbrl-1.0.0.dev4/gbrl/gbrl_wrapper.py +446 -0
  9. gbrl-1.0.0.dev4/gbrl/gbt.py +216 -0
  10. gbrl-1.0.0.dev4/gbrl/src/cpp/config.h +9 -0
  11. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/fitter.cpp +9 -5
  12. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/fitter.h +2 -2
  13. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/gbrl.cpp +24 -20
  14. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/gbrl.h +5 -4
  15. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/gbrl_binding.cpp +4 -4
  16. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/loss.cpp +2 -2
  17. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/node.cpp +0 -1
  18. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/types.h +9 -0
  19. gbrl-1.0.0.dev4/gbrl/src/cpp/utils.cpp +63 -0
  20. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/utils.h +6 -0
  21. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_loss.cu +1 -1
  22. gbrl-1.0.0.dev4/gbrl/utils.py +60 -0
  23. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4/gbrl.egg-info}/PKG-INFO +2 -2
  24. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl.egg-info/SOURCES.txt +7 -0
  25. gbrl-1.0.0.dev4/gbrl.egg-info/top_level.txt +1 -0
  26. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/pyproject.toml +2 -3
  27. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/setup.py +12 -9
  28. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/tests/test_gbt_multi.py +155 -195
  29. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/tests/test_gbt_single.py +84 -222
  30. gbrl-1.0.0.dev3/gbrl/src/cpp/utils.cpp +0 -30
  31. gbrl-1.0.0.dev3/gbrl.egg-info/top_level.txt +0 -1
  32. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/loss.h +0 -0
  33. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/main.cpp +0 -0
  34. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/math_ops.cpp +0 -0
  35. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/math_ops.h +0 -0
  36. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/node.h +0 -0
  37. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/optimizer.cpp +0 -0
  38. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/optimizer.h +0 -0
  39. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/predictor.cpp +0 -0
  40. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/predictor.h +0 -0
  41. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/scheduler.cpp +0 -0
  42. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/scheduler.h +0 -0
  43. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/split_candidate_generator.cpp +0 -0
  44. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/split_candidate_generator.h +0 -0
  45. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cpp/types.cpp +0 -0
  46. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_fitter.cu +0 -0
  47. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_fitter.h +0 -0
  48. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_loss.h +0 -0
  49. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_predictor.cu +0 -0
  50. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_predictor.h +0 -0
  51. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_preprocess.cu +0 -0
  52. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_preprocess.h +0 -0
  53. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_types.cu +0 -0
  54. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_types.h +0 -0
  55. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_utils.cu +0 -0
  56. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl/src/cuda/cuda_utils.h +0 -0
  57. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl.egg-info/dependency_links.txt +0 -0
  58. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/gbrl.egg-info/requires.txt +0 -0
  59. {gbrl-1.0.0.dev3 → gbrl-1.0.0.dev4}/setup.cfg +0 -0
@@ -4,7 +4,6 @@ NVIDIA License
4
4
 
5
5
  “Licensor” means any person or entity that distributes its Work.
6
6
  “Work” means (a) the original work of authorship made available under this license, which may include software, documentation, or other files, and (b) any additions to or derivative works thereof that are made available under this license.
7
- “NVIDIA Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by NVIDIA or its affiliates.
8
7
  The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this license, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
9
8
  Works are “made available” under this license by including in or with the Work either (a) a copyright notice referencing the applicability of this license to the Work, or (b) a copy of this license.
10
9
 
@@ -18,7 +17,7 @@ Works are “made available” under this license by including in or with the Wo
18
17
 
19
18
  3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this license (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
20
19
 
21
- 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially and with NVIDIA Processors. Notwithstanding the foregoing, NVIDIA Corporation and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
20
+ 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. Notwithstanding the foregoing, NVIDIA Corporation and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
22
21
 
23
22
  3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this license from such Licensor (including the grant in Section 2.1) will terminate immediately.
24
23
 
@@ -33,4 +32,5 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YO
33
32
 
34
33
  5. Limitation of Liability.
35
34
 
36
- EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
35
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
36
+
File without changes
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gbrl
3
- Version: 1.0.0.dev3
3
+ Version: 1.0.0.dev4
4
4
  Summary: Gradient Boosted Trees for RL
5
- Author-email: Benjamin Fuhrer <bfuhrer@nvidia.com>, Chen Tesslr <ctessler@nvidia.com>, Gal Dalal <galal@nvidia.com>
5
+ Author-email: Benjamin Fuhrer <bfuhrer@nvidia.com>, Chen Tessler <ctessler@nvidia.com>, Gal Dalal <galal@nvidia.com>
6
6
  License-File: LICENSE
7
7
  Requires-Dist: pybind11==2.11.1
8
8
  Requires-Dist: numpy
@@ -1,106 +1,98 @@
1
- # Gradient Boosting Reinforcement Learning (GBRL)
2
- GBRL is a Python-based GBT library designed and optimized for reinforcement learning (RL). GBRL is implemented in C++/CUDA aimed to seamlessly integrate within popular RL libraries.
3
-
4
- ### Key Features:
5
- - GBTs Tailored for RL: GBRL adapts the power of Gradient Boosting Trees to the unique challenges of RL environments, including non-stationarity and delayed feedback.
6
- - Optimized Actor-Critic Architecture: GBRL features a shared tree-based structure for policy and value functions. This significantly reduces memory and computational overhead, enabling it to tackle complex, high-dimensional RL problems.
7
- - Hardware Acceleration: GBRL leverages CUDA for hardware-accelerated computation, ensuring efficiency and speed.
8
- - Seamless Integration: GBRL is designed for easy integration with popular RL libraries, making it readily accessible for practitioners.
9
-
10
-
11
- ## Getting started
12
-
13
- ### Dependencies
14
- llvm
15
- openmp
16
-
17
- #### MAC OS
18
-
19
- Make sure to run:
20
- ```
21
- brew install libomp
22
- brew install llvm
23
- ```
24
-
25
- xcode command line tools should be installed installed
26
-
27
- ### Installation
28
- ```
29
- pip install gbrl
30
- ```
31
-
32
- For GPU support GBRL looks for `CUDA_PATH` or `CUDA_HOME` environment variables. Unless found, GBRL will automatically compile only for CPU.
33
-
34
- Verify that GPU is visible by running
35
- ```
36
- import gbrl
37
-
38
- gbrl.cuda_available()
39
- ```
40
-
41
- *OPTIONAL*
42
- For tree visualization make sure graphviz is installed before compilation.
43
-
44
- ## Usage Example in an RL library
45
- ```
46
- import torch as th
47
- from gbrl import ActorCriticGBRL
48
- from stable_baselines3.common.distributions import CategoricalDistribution
49
-
50
- ### initialize model ###
51
- # define tree structure parameters
52
- tree_struct = {'max_depth': 4, 'min_data_in_leaf': 0, 'n_bins': 256, 'grow_policy': 'oblivious'}
53
- # define gbrl parameters
54
- gbrl_params = {'control_variates': False, 'split_score_func': 'cosine', 'generator_type': 'Quantile'}
55
- # actor and critic optimizers
56
- policy_optimizer = {'algo': 'SGD', 'lr': 0.05}
57
- value_optimizer = {'algo': 'SGD', lr: '0.25'}
58
-
59
- # Given action_space instance of gym.spaces.Space
60
-
61
- model = ActorCriticGBRL(tree_struct=tree_struct, output_dim=action_space.n, pg_optimizer=policy_optimizer, value_optimizer=value_optimizer, shared_tree_struct=True, gbrl_params=gbrl_params, device='cuda')
62
- model.init_model()
63
-
64
- action_dist = CategoricalDistribution(action_space.n)
65
- ### Training loop ###
66
- logits, values = model(obs) # returns numpy arrays
67
- logits = th.tensor(logits, requires_grad=True)
68
- values = th.tensor(values, requires_grad=True)
69
-
70
- distribution = action_dist.proba_distribution(logits)
71
- log_prob = distribution.log_prob(actions)
72
-
73
- loss = ... calculate loss according to RL algorithm
74
-
75
- loss.backward()
76
-
77
- self.policy.fit(rollout_data., theta_grad, values_grad)
78
-
79
- # calculate grads (GBRL fits numpy arrays)
80
- logits_grads = logits.grad.detach().cpu().numpy() * len(obs) # gradients w.r.t to the *each sample
81
- value_grads = values.grad.detach().cpu().numpy() * len(obs)
82
-
83
- model.fit(obs, logits_grads, value_grads)
84
- ```
85
-
86
- ## Current Supported Features
87
- ### Tree Fitting
88
- - Greedy (Depth-wise) tree building - (CPU/GPU)
89
- - Oblivious (Symmetric) tree building - (CPU/GPU)
90
- - L2 split score - (CPU/GPU)
91
- - Cosine split score - (CPU/GPU)
92
- - Uniform based candidate generation - (CPU/GPU)
93
- - Quantile based candidate generation - (CPU/GPU)
94
- - Supervised learning fitting / Multi-iteration fitting - (CPU/GPU)
95
- - MultiRMSE loss (only)
96
- - Categorical Inputs
97
- ### GBT Inference
98
- - SGD optimizer - (CPU/GPU)
99
- - ADAM optimizer - (CPU only)
100
- - Control Variates (gradient variance reduction technique) - (CPU only)
101
- - Shared Tree for policy and value function - (CPU/GPU)
102
- - Linear and constant learning rate scheduler - (CPU/GPU only constant)
103
- - Support for up to two different optimizers (e.g, policy/value) - **(CPU/GPU if both are SGD)
104
-
105
- ## Citing
106
-
1
+ # Gradient Boosting Reinforcement Learning (GBRL)
2
+ GBRL is a Python-based GBT library designed and optimized for reinforcement learning (RL). GBRL is implemented in C++/CUDA aimed to seamlessly integrate within popular RL libraries.
3
+
4
+ ### Key Features:
5
+ - GBTs Tailored for RL: GBRL adapts the power of Gradient Boosting Trees to the unique challenges of RL environments, including non-stationarity and delayed feedback.
6
+ - Optimized Actor-Critic Architecture: GBRL features a shared tree-based structure for policy and value functions. This significantly reduces memory and computational overhead, enabling it to tackle complex, high-dimensional RL problems.
7
+ - Hardware Acceleration: GBRL leverages CUDA for hardware-accelerated computation, ensuring efficiency and speed.
8
+ - Seamless Integration: GBRL is designed for easy integration with popular RL libraries, making it readily accessible for practitioners.
9
+
10
+
11
+ ## Getting started
12
+
13
+ ### Dependencies
14
+ llvm
15
+ openmp
16
+
17
+ #### MAC OS
18
+
19
+ Make sure to run:
20
+ ```
21
+ brew install libomp
22
+ brew install llvm
23
+ ```
24
+
25
+ xcode command line tools should be installed installed
26
+
27
+ ### Installation
28
+ ```
29
+ pip install gbrl
30
+ ```
31
+
32
+ For GPU support GBRL looks for `CUDA_PATH` or `CUDA_HOME` environment variables. Unless found, GBRL will automatically compile only for CPU.
33
+
34
+ Verify that GPU is visible by running
35
+ ```
36
+ import gbrl
37
+
38
+ gbrl.cuda_available()
39
+ ```
40
+
41
+ *OPTIONAL*
42
+ For tree visualization make sure graphviz is installed before compilation.
43
+
44
+ ## Usage Example in an RL library
45
+ ```
46
+ import torch as th
47
+ from gbrl import ActorCriticGBRL
48
+ from stable_baselines3.common.distributions import CategoricalDistribution
49
+
50
+ ### initialize model ###
51
+ # define tree structure parameters
52
+ tree_struct = {'max_depth': 4, 'min_data_in_leaf': 0, 'n_bins': 256, 'grow_policy': 'oblivious'}
53
+ # define gbrl parameters
54
+ gbrl_params = {'control_variates': False, 'split_score_func': 'cosine', 'generator_type': 'Quantile'}
55
+ # actor and critic optimizers
56
+ policy_optimizer = {'algo': 'SGD', 'lr': 0.05}
57
+ value_optimizer = {'algo': 'SGD', lr: '0.25'}
58
+
59
+ # Given action_space instance of gym.spaces.Space
60
+
61
+ model = ActorCriticGBRL(tree_struct=tree_struct, output_dim=action_space.n, pg_optimizer=policy_optimizer, value_optimizer=value_optimizer, shared_tree_struct=True, gbrl_params=gbrl_params, device='cuda')
62
+ model.init_model()
63
+
64
+ action_dist = CategoricalDistribution(action_space.n)
65
+ ### Training loop ###
66
+ logits, values = model(obs, requires_grad=True) # returns numpy arrays
67
+
68
+ distribution = action_dist.proba_distribution(logits)
69
+ log_prob = distribution.log_prob(actions)
70
+
71
+ loss = ... calculate loss according to RL algorithm
72
+
73
+ loss.backward()
74
+
75
+ self.policy.step(obs)
76
+ ```
77
+
78
+ ## Current Supported Features
79
+ ### Tree Fitting
80
+ - Greedy (Depth-wise) tree building - (CPU/GPU)
81
+ - Oblivious (Symmetric) tree building - (CPU/GPU)
82
+ - L2 split score - (CPU/GPU)
83
+ - Cosine split score - (CPU/GPU)
84
+ - Uniform based candidate generation - (CPU/GPU)
85
+ - Quantile based candidate generation - (CPU/GPU)
86
+ - Supervised learning fitting / Multi-iteration fitting - (CPU/GPU)
87
+ - MultiRMSE loss (only)
88
+ - Categorical Inputs
89
+ ### GBT Inference
90
+ - SGD optimizer - (CPU/GPU)
91
+ - ADAM optimizer - (CPU only)
92
+ - Control Variates (gradient variance reduction technique) - (CPU only)
93
+ - Shared Tree for policy and value function - (CPU/GPU)
94
+ - Linear and constant learning rate scheduler - (CPU/GPU only constant)
95
+ - Support for up to two different optimizers (e.g, policy/value) - **(CPU/GPU if both are SGD)
96
+
97
+ ## Citing
98
+
@@ -0,0 +1,9 @@
1
+ __version__ = "1.0.0"
2
+
3
+ from .ac_gbrl import (ActorCritic, GaussianActor, ContinuousCritic,
4
+ DiscreteCritic, ParametricActor)
5
+ from .gbt import GradientBoostingTrees
6
+ from .gbrl_cpp import GBRL
7
+
8
+ cuda_available = GBRL.cuda_available
9
+