cusrl 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. cusrl-1.0.0/.github/workflows/python-app.yml +47 -0
  2. cusrl-1.0.0/.gitignore +183 -0
  3. cusrl-1.0.0/.pre-commit-config.yaml +28 -0
  4. cusrl-1.0.0/PKG-INFO +109 -0
  5. cusrl-1.0.0/README.md +78 -0
  6. cusrl-1.0.0/cusrl/__init__.py +107 -0
  7. cusrl-1.0.0/cusrl/environment/__init__.py +11 -0
  8. cusrl-1.0.0/cusrl/environment/gym.py +157 -0
  9. cusrl-1.0.0/cusrl/environment/isaaclab.py +133 -0
  10. cusrl-1.0.0/cusrl/hook/__init__.py +58 -0
  11. cusrl-1.0.0/cusrl/hook/advantage.py +100 -0
  12. cusrl-1.0.0/cusrl/hook/condition.py +57 -0
  13. cusrl-1.0.0/cusrl/hook/gae.py +143 -0
  14. cusrl-1.0.0/cusrl/hook/gradient.py +48 -0
  15. cusrl-1.0.0/cusrl/hook/initialization.py +94 -0
  16. cusrl-1.0.0/cusrl/hook/lr_schedule.py +178 -0
  17. cusrl-1.0.0/cusrl/hook/normalization.py +194 -0
  18. cusrl-1.0.0/cusrl/hook/on_policy.py +35 -0
  19. cusrl-1.0.0/cusrl/hook/ppo.py +77 -0
  20. cusrl-1.0.0/cusrl/hook/representation.py +132 -0
  21. cusrl-1.0.0/cusrl/hook/rnd.py +66 -0
  22. cusrl-1.0.0/cusrl/hook/schedule.py +114 -0
  23. cusrl-1.0.0/cusrl/hook/smoothness.py +75 -0
  24. cusrl-1.0.0/cusrl/hook/statistics.py +28 -0
  25. cusrl-1.0.0/cusrl/hook/symmetry.py +233 -0
  26. cusrl-1.0.0/cusrl/hook/value.py +158 -0
  27. cusrl-1.0.0/cusrl/launch/export.py +43 -0
  28. cusrl-1.0.0/cusrl/launch/play.py +45 -0
  29. cusrl-1.0.0/cusrl/launch/train.py +62 -0
  30. cusrl-1.0.0/cusrl/logger/__init__.py +5 -0
  31. cusrl-1.0.0/cusrl/logger/make_factory.py +18 -0
  32. cusrl-1.0.0/cusrl/logger/tensorboard_logger.py +28 -0
  33. cusrl-1.0.0/cusrl/logger/wandb_logger.py +68 -0
  34. cusrl-1.0.0/cusrl/module/__init__.py +39 -0
  35. cusrl-1.0.0/cusrl/module/actor.py +203 -0
  36. cusrl-1.0.0/cusrl/module/attention.py +614 -0
  37. cusrl-1.0.0/cusrl/module/bijector.py +115 -0
  38. cusrl-1.0.0/cusrl/module/cnn.py +75 -0
  39. cusrl-1.0.0/cusrl/module/critic.py +73 -0
  40. cusrl-1.0.0/cusrl/module/distribution.py +263 -0
  41. cusrl-1.0.0/cusrl/module/inference.py +57 -0
  42. cusrl-1.0.0/cusrl/module/mlp.py +63 -0
  43. cusrl-1.0.0/cusrl/module/module.py +182 -0
  44. cusrl-1.0.0/cusrl/module/normalization.py +59 -0
  45. cusrl-1.0.0/cusrl/module/rnn.py +167 -0
  46. cusrl-1.0.0/cusrl/module/sequential.py +70 -0
  47. cusrl-1.0.0/cusrl/module/simba.py +70 -0
  48. cusrl-1.0.0/cusrl/preset/__init__.py +5 -0
  49. cusrl-1.0.0/cusrl/preset/ppo.py +216 -0
  50. cusrl-1.0.0/cusrl/sampler/__init__.py +11 -0
  51. cusrl-1.0.0/cusrl/sampler/mini_batch_sampler.py +78 -0
  52. cusrl-1.0.0/cusrl/template/__init__.py +27 -0
  53. cusrl-1.0.0/cusrl/template/actor_critic.py +321 -0
  54. cusrl-1.0.0/cusrl/template/agent.py +259 -0
  55. cusrl-1.0.0/cusrl/template/buffer.py +271 -0
  56. cusrl-1.0.0/cusrl/template/environment.py +208 -0
  57. cusrl-1.0.0/cusrl/template/hook.py +244 -0
  58. cusrl-1.0.0/cusrl/template/logger.py +76 -0
  59. cusrl-1.0.0/cusrl/template/optimizer.py +68 -0
  60. cusrl-1.0.0/cusrl/template/player.py +114 -0
  61. cusrl-1.0.0/cusrl/template/trainer.py +290 -0
  62. cusrl-1.0.0/cusrl/template/trial.py +103 -0
  63. cusrl-1.0.0/cusrl/utils/__init__.py +30 -0
  64. cusrl-1.0.0/cusrl/utils/cli.py +59 -0
  65. cusrl-1.0.0/cusrl/utils/config.py +75 -0
  66. cusrl-1.0.0/cusrl/utils/distributed.py +146 -0
  67. cusrl-1.0.0/cusrl/utils/export.py +98 -0
  68. cusrl-1.0.0/cusrl/utils/helper.py +122 -0
  69. cusrl-1.0.0/cusrl/utils/metrics.py +72 -0
  70. cusrl-1.0.0/cusrl/utils/nest.py +82 -0
  71. cusrl-1.0.0/cusrl/utils/normalizer.py +276 -0
  72. cusrl-1.0.0/cusrl/utils/recurrent.py +163 -0
  73. cusrl-1.0.0/cusrl/utils/timing.py +63 -0
  74. cusrl-1.0.0/cusrl/utils/typing.py +45 -0
  75. cusrl-1.0.0/cusrl/utils/video.py +21 -0
  76. cusrl-1.0.0/cusrl/zoo/__init__.py +8 -0
  77. cusrl-1.0.0/cusrl/zoo/experiment.py +105 -0
  78. cusrl-1.0.0/cusrl/zoo/gym/__init__.py +2 -0
  79. cusrl-1.0.0/cusrl/zoo/gym/box2d.py +63 -0
  80. cusrl-1.0.0/cusrl/zoo/gym/classic_control.py +142 -0
  81. cusrl-1.0.0/cusrl/zoo/isaaclab/__init__.py +2 -0
  82. cusrl-1.0.0/cusrl/zoo/isaaclab/classic.py +69 -0
  83. cusrl-1.0.0/cusrl/zoo/isaaclab/locomotion.py +93 -0
  84. cusrl-1.0.0/cusrl/zoo/registry.py +70 -0
  85. cusrl-1.0.0/cusrl.egg-info/PKG-INFO +109 -0
  86. cusrl-1.0.0/cusrl.egg-info/SOURCES.txt +112 -0
  87. cusrl-1.0.0/cusrl.egg-info/dependency_links.txt +1 -0
  88. cusrl-1.0.0/cusrl.egg-info/requires.txt +27 -0
  89. cusrl-1.0.0/cusrl.egg-info/top_level.txt +1 -0
  90. cusrl-1.0.0/cusrl_test/__init__.py +11 -0
  91. cusrl-1.0.0/cusrl_test/test_agent_export.py +31 -0
  92. cusrl-1.0.0/cusrl_test/test_agent_ppo_basic.py +55 -0
  93. cusrl-1.0.0/cusrl_test/test_agent_ppo_hook.py +51 -0
  94. cusrl-1.0.0/cusrl_test/test_agent_state_dict.py +102 -0
  95. cusrl-1.0.0/cusrl_test/test_env_gym.py +28 -0
  96. cusrl-1.0.0/cusrl_test/test_hook_activation.py +38 -0
  97. cusrl-1.0.0/cusrl_test/test_hook_normalization.py +102 -0
  98. cusrl-1.0.0/cusrl_test/test_hook_representation.py +25 -0
  99. cusrl-1.0.0/cusrl_test/test_hook_rnd.py +20 -0
  100. cusrl-1.0.0/cusrl_test/test_hook_smoothness.py +11 -0
  101. cusrl-1.0.0/cusrl_test/test_hook_symmetry.py +46 -0
  102. cusrl-1.0.0/cusrl_test/test_module_attention.py +87 -0
  103. cusrl-1.0.0/cusrl_test/test_module_cnn.py +39 -0
  104. cusrl-1.0.0/cusrl_test/test_module_inference.py +40 -0
  105. cusrl-1.0.0/cusrl_test/test_module_rnn.py +87 -0
  106. cusrl-1.0.0/cusrl_test/test_module_sequential.py +117 -0
  107. cusrl-1.0.0/cusrl_test/test_module_simba.py +35 -0
  108. cusrl-1.0.0/cusrl_test/test_template_player.py +10 -0
  109. cusrl-1.0.0/cusrl_test/test_util_normalizer.py +66 -0
  110. cusrl-1.0.0/cusrl_test/test_util_timing.py +25 -0
  111. cusrl-1.0.0/cusrl_test/utils.py +134 -0
  112. cusrl-1.0.0/pyproject.toml +64 -0
  113. cusrl-1.0.0/requirements.txt +5 -0
  114. cusrl-1.0.0/setup.cfg +4 -0
@@ -0,0 +1,47 @@
1
+ # This workflow will install Python dependencies, run tests and lint with a single version of Python
2
+ # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3
+
4
+ name: Python application
5
+
6
+ on:
7
+ push:
8
+ branches: [ "main" ]
9
+ pull_request:
10
+ branches: [ "main" ]
11
+
12
+ permissions:
13
+ contents: read
14
+
15
+ jobs:
16
+ build:
17
+
18
+ runs-on: ubuntu-latest
19
+ strategy:
20
+ matrix:
21
+ python-version: [ "3.10", "3.11", "3.12", "3.13" ]
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v3
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+ - name: Display Python version
29
+ run: python -c "import sys; print(sys.version)"
30
+ - name: Install dependencies
31
+ run: |
32
+ rm ~/.cache/pip -rf
33
+ python -m pip install --upgrade pip
34
+ pip install flake8 pytest
35
+ pip install --no-cache-dir .[all]
36
+ - name: Lint with flake8
37
+ run: |
38
+ # stop the build if there are Python syntax errors or undefined names
39
+ flake8 . --count --select=E9,F63,F7,F82 --extend-ignore=E203 --show-source --statistics
40
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
41
+ flake8 . --count --exit-zero --extend-ignore=E203 --max-complexity=10 --max-line-length=127 --statistics
42
+ - name: Display device and autocast availability
43
+ run: |
44
+ python -c "import cusrl; print(cusrl.device(), cusrl.utils.is_autocast_available())"
45
+ - name: Test with pytest
46
+ run: |
47
+ pytest
cusrl-1.0.0/.gitignore ADDED
@@ -0,0 +1,183 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ # IDE
177
+ .idea/
178
+ .vscode/
179
+ .zed/
180
+
181
+ # Logs
182
+ logs/
183
+ wandb/
@@ -0,0 +1,28 @@
1
+ # See https://pre-commit.com for more information
2
+ # See https://pre-commit.com/hooks.html for more hooks
3
+ repos:
4
+ - repo: https://github.com/psf/black
5
+ rev: 25.1.0
6
+ hooks:
7
+ - id: black
8
+ - repo: https://github.com/codespell-project/codespell
9
+ rev: v2.4.1
10
+ hooks:
11
+ - id: codespell
12
+ additional_dependencies: [ tomli ]
13
+ - repo: https://github.com/pycqa/isort
14
+ rev: 6.0.1
15
+ hooks:
16
+ - id: isort
17
+ - repo: https://github.com/pre-commit/pre-commit-hooks
18
+ rev: v5.0.0
19
+ hooks:
20
+ - id: check-added-large-files
21
+ - id: check-yaml
22
+ - id: end-of-file-fixer
23
+ - id: trailing-whitespace
24
+ - repo: https://github.com/asottile/pyupgrade
25
+ rev: v3.20.0
26
+ hooks:
27
+ - id: pyupgrade
28
+ args: [ "--py310-plus" ]
cusrl-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,109 @@
1
+ Metadata-Version: 2.4
2
+ Name: cusrl
3
+ Version: 1.0.0
4
+ Summary: Customizable and modular RL algorithms implemented in PyTorch
5
+ Author-email: Chengrui Zhu <jewel@zju.edu.cn>
6
+ Keywords: reinforcement-learning,pytorch,rl
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: numpy>=1.26.0
10
+ Requires-Dist: torch>=2.4.0
11
+ Requires-Dist: objprint~=0.3.0
12
+ Requires-Dist: gymnasium>=1.1.0
13
+ Requires-Dist: pyyaml~=6.0.2
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest; extra == "dev"
16
+ Provides-Extra: onnx
17
+ Requires-Dist: onnx; extra == "onnx"
18
+ Requires-Dist: onnxruntime; extra == "onnx"
19
+ Requires-Dist: onnxscript; extra == "onnx"
20
+ Provides-Extra: tensorboard
21
+ Requires-Dist: tensorboard; extra == "tensorboard"
22
+ Provides-Extra: wandb
23
+ Requires-Dist: wandb; extra == "wandb"
24
+ Provides-Extra: all
25
+ Requires-Dist: pytest; extra == "all"
26
+ Requires-Dist: onnx; extra == "all"
27
+ Requires-Dist: onnxruntime; extra == "all"
28
+ Requires-Dist: onnxscript; extra == "all"
29
+ Requires-Dist: tensorboard; extra == "all"
30
+ Requires-Dist: wandb; extra == "all"
31
+
32
+ # CusRL: Customizable Reinforcement Learning
33
+
34
+ CusRL is a flexible and modular reinforcement learning framework that emphasizes customization.
35
+ Its clean and decoupled implementation allows researchers to easily integrate new components,
36
+ which is particularly useful for advancements in robotics learning.
37
+
38
+ > **Note:** This project is under **active development**, which means the interface is unstable
39
+ and breaking changes are likely to occur frequently.
40
+
41
+ ## Installation
42
+
43
+ Requires Python >= 3.10.
44
+
45
+ ```bash
46
+ git clone https://github.com/chengruiz/cusrl.git
47
+ # Minimal installation
48
+ pip install -e . --config-settings editable_mode=strict
49
+ # Install with all optional dependencies
50
+ pip install -e .[all] --config-settings editable_mode=strict
51
+ # For development, install pre-commit (assuming you have pre-commit installed)
52
+ pre-commit install
53
+ ```
54
+
55
+ ## Quick Start
56
+
57
+ Try to train a PPO agent with CusRL and evaluate it:
58
+
59
+ ```bash
60
+ python -m cusrl.launch.train -env MountainCar-v0 -alg ppo --logger tensorboard --seed 42
61
+ python -m cusrl.launch.play --checkpoint logs/MountainCar-v0:ppo
62
+ ```
63
+
64
+ Or if you have [IssacLab](https://github.com/isaac-sim/IsaacLab) installed:
65
+
66
+ ```bash
67
+ python -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 -alg ppo \
68
+ --logger tensorboard --environment-args="--headless"
69
+ python -m cusrl.launch.play --checkpoint logs/Isaac-Velocity-Rough-Anymal-C-v0:ppo
70
+ ```
71
+
72
+ Try distributed training:
73
+
74
+ ```bash
75
+ torchrun --nproc-per-node=2 -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 \
76
+ -alg ppo --logger tensorboard --environment-args="--headless"
77
+ ```
78
+
79
+ ## Highlights
80
+
81
+ CusRL provides a modular and extensible framework for RL with the following key features:
82
+
83
+ - **Modular Design**: Components are highly decoupled, allowing for easy customization and extension
84
+ - **Diverse Network Architectures**: Support for MLP, CNN, RNNs, Transformer and custom architectures
85
+ - **Modern Training Techniques**: Built-in support for distributed and mixed-precision training
86
+
87
+ CusRL is designed for researchers and practitioners who need a clean, extensible framework for implementing
88
+ and experimenting with reinforcement learning algorithms. The architecture emphasizes clean separation of
89
+ concerns, allowing users to modify specific components without disrupting the rest of the system.
90
+
91
+ ## Implemented Algorithms
92
+
93
+ - [Proximal Policy Optimization (PPO)](https://arxiv.org/abs/1707.06347) with recurrent policy support
94
+ - [Generalized Advantage Estimation (GAE)](https://arxiv.org/abs/1506.02438)
95
+ with [distinct lambda values](https://proceedings.neurips.cc/paper_files/paper/2022/hash/e95475f5fb8edb9075bf9e25670d4013-Abstract-Conference.html)
96
+ - [Preserving Outputs Precisely, while Adaptively Rescaling Targets (Pop-Art)](https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html)
97
+ - [Random Network Distillation (RND)](https://arxiv.org/abs/1810.12894)
98
+ - Symmetry Augmentations:
99
+ [Symmetry Loss](https://dl.acm.org/doi/abs/10.1145/3197517.3201397),
100
+ [Symmetric Architecture](https://dl.acm.org/doi/abs/10.1145/3359566.3360070),
101
+ [Symmetric Data Augmentation](https://ieeexplore.ieee.org/abstract/document/10611493)
102
+
103
+ ## Cite
104
+
105
+ If you find this framework useful for your research, please consider citing our work on legged locomotion:
106
+
107
+ - [Efficient Learning of A Unified Policy For Whole-body Manipulation and Locomotion Skills](https://www.arxiv.org/abs/2507.04229), Accepted by IROS 2025
108
+ - [Learning Accurate and Robust Velocity Tracking for Quadrupedal Robots](https://www.authorea.com/doi/full/10.22541/au.173321917.73583610), Accepted by JFR
109
+ - [Learning Safe Locomotion for Quadrupedal Robots by Derived-Action Optimization](https://ieeexplore.ieee.org/abstract/document/10802725), Published in IROS 2024
cusrl-1.0.0/README.md ADDED
@@ -0,0 +1,78 @@
1
+ # CusRL: Customizable Reinforcement Learning
2
+
3
+ CusRL is a flexible and modular reinforcement learning framework that emphasizes customization.
4
+ Its clean and decoupled implementation allows researchers to easily integrate new components,
5
+ which is particularly useful for advancements in robotics learning.
6
+
7
+ > **Note:** This project is under **active development**, which means the interface is unstable
8
+ and breaking changes are likely to occur frequently.
9
+
10
+ ## Installation
11
+
12
+ Requires Python >= 3.10.
13
+
14
+ ```bash
15
+ git clone https://github.com/chengruiz/cusrl.git
16
+ # Minimal installation
17
+ pip install -e . --config-settings editable_mode=strict
18
+ # Install with all optional dependencies
19
+ pip install -e .[all] --config-settings editable_mode=strict
20
+ # For development, install pre-commit (assuming you have pre-commit installed)
21
+ pre-commit install
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ Try to train a PPO agent with CusRL and evaluate it:
27
+
28
+ ```bash
29
+ python -m cusrl.launch.train -env MountainCar-v0 -alg ppo --logger tensorboard --seed 42
30
+ python -m cusrl.launch.play --checkpoint logs/MountainCar-v0:ppo
31
+ ```
32
+
33
+ Or if you have [IssacLab](https://github.com/isaac-sim/IsaacLab) installed:
34
+
35
+ ```bash
36
+ python -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 -alg ppo \
37
+ --logger tensorboard --environment-args="--headless"
38
+ python -m cusrl.launch.play --checkpoint logs/Isaac-Velocity-Rough-Anymal-C-v0:ppo
39
+ ```
40
+
41
+ Try distributed training:
42
+
43
+ ```bash
44
+ torchrun --nproc-per-node=2 -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 \
45
+ -alg ppo --logger tensorboard --environment-args="--headless"
46
+ ```
47
+
48
+ ## Highlights
49
+
50
+ CusRL provides a modular and extensible framework for RL with the following key features:
51
+
52
+ - **Modular Design**: Components are highly decoupled, allowing for easy customization and extension
53
+ - **Diverse Network Architectures**: Support for MLP, CNN, RNNs, Transformer and custom architectures
54
+ - **Modern Training Techniques**: Built-in support for distributed and mixed-precision training
55
+
56
+ CusRL is designed for researchers and practitioners who need a clean, extensible framework for implementing
57
+ and experimenting with reinforcement learning algorithms. The architecture emphasizes clean separation of
58
+ concerns, allowing users to modify specific components without disrupting the rest of the system.
59
+
60
+ ## Implemented Algorithms
61
+
62
+ - [Proximal Policy Optimization (PPO)](https://arxiv.org/abs/1707.06347) with recurrent policy support
63
+ - [Generalized Advantage Estimation (GAE)](https://arxiv.org/abs/1506.02438)
64
+ with [distinct lambda values](https://proceedings.neurips.cc/paper_files/paper/2022/hash/e95475f5fb8edb9075bf9e25670d4013-Abstract-Conference.html)
65
+ - [Preserving Outputs Precisely, while Adaptively Rescaling Targets (Pop-Art)](https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html)
66
+ - [Random Network Distillation (RND)](https://arxiv.org/abs/1810.12894)
67
+ - Symmetry Augmentations:
68
+ [Symmetry Loss](https://dl.acm.org/doi/abs/10.1145/3197517.3201397),
69
+ [Symmetric Architecture](https://dl.acm.org/doi/abs/10.1145/3359566.3360070),
70
+ [Symmetric Data Augmentation](https://ieeexplore.ieee.org/abstract/document/10611493)
71
+
72
+ ## Cite
73
+
74
+ If you find this framework useful for your research, please consider citing our work on legged locomotion:
75
+
76
+ - [Efficient Learning of A Unified Policy For Whole-body Manipulation and Locomotion Skills](https://www.arxiv.org/abs/2507.04229), Accepted by IROS 2025
77
+ - [Learning Accurate and Robust Velocity Tracking for Quadrupedal Robots](https://www.authorea.com/doi/full/10.22541/au.173321917.73583610), Accepted by JFR
78
+ - [Learning Safe Locomotion for Quadrupedal Robots by Derived-Action Optimization](https://ieeexplore.ieee.org/abstract/document/10802725), Published in IROS 2024
@@ -0,0 +1,107 @@
1
+ from cusrl import environment, hook, logger, module, preset, sampler, template, utils, zoo
2
+ from cusrl.environment import make_gym_env, make_gym_vec, make_isaaclab_env
3
+ from cusrl.module import (
4
+ CNN,
5
+ MLP,
6
+ RNN,
7
+ Actor,
8
+ AdaptiveNormalDist,
9
+ Denormalization,
10
+ Distribution,
11
+ DistributionFactoryLike,
12
+ FeedForward,
13
+ InferenceModule,
14
+ LayerFactoryLike,
15
+ Module,
16
+ ModuleFactory,
17
+ ModuleFactoryLike,
18
+ MultiheadSelfAttention,
19
+ NormalDist,
20
+ Normalization,
21
+ OneHotCategoricalDist,
22
+ Sequential,
23
+ Simba,
24
+ TransformerEncoderLayer,
25
+ Value,
26
+ )
27
+ from cusrl.sampler import (
28
+ AutoMiniBatchSampler,
29
+ MiniBatchSampler,
30
+ TemporalMiniBatchSampler,
31
+ )
32
+ from cusrl.template import (
33
+ ActorCritic,
34
+ Agent,
35
+ Buffer,
36
+ Environment,
37
+ EnvironmentSpec,
38
+ Hook,
39
+ Logger,
40
+ LoggerFactory,
41
+ LoggerFactoryLike,
42
+ OptimizerFactory,
43
+ Player,
44
+ Sampler,
45
+ Trainer,
46
+ Trial,
47
+ )
48
+ from cusrl.utils import (
49
+ device,
50
+ set_global_seed,
51
+ )
52
+
53
+ __all__ = [
54
+ "hook",
55
+ "logger",
56
+ "module",
57
+ "preset",
58
+ "sampler",
59
+ "template",
60
+ "utils",
61
+ "zoo",
62
+ "environment",
63
+ "Actor",
64
+ "ActorCritic",
65
+ "AdaptiveNormalDist",
66
+ "Agent",
67
+ "AutoMiniBatchSampler",
68
+ "Buffer",
69
+ "CNN",
70
+ "Denormalization",
71
+ "Distribution",
72
+ "DistributionFactoryLike",
73
+ "Environment",
74
+ "EnvironmentSpec",
75
+ "FeedForward",
76
+ "Hook",
77
+ "InferenceModule",
78
+ "LayerFactoryLike",
79
+ "Logger",
80
+ "LoggerFactory",
81
+ "LoggerFactoryLike",
82
+ "MLP",
83
+ "MiniBatchSampler",
84
+ "Module",
85
+ "ModuleFactory",
86
+ "ModuleFactoryLike",
87
+ "MultiheadSelfAttention",
88
+ "NormalDist",
89
+ "Normalization",
90
+ "OneHotCategoricalDist",
91
+ "OptimizerFactory",
92
+ "Player",
93
+ "RNN",
94
+ "Sampler",
95
+ "Sequential",
96
+ "Simba",
97
+ "TemporalMiniBatchSampler",
98
+ "Trainer",
99
+ "TransformerEncoderLayer",
100
+ "Trial",
101
+ "Value",
102
+ "device",
103
+ "make_gym_env",
104
+ "make_gym_vec",
105
+ "make_isaaclab_env",
106
+ "set_global_seed",
107
+ ]
@@ -0,0 +1,11 @@
1
+ from .gym import GymEnvAdapter, GymVectorEnvAdapter, make_gym_env, make_gym_vec
2
+ from .isaaclab import IsaacLabEnvAdapter, make_isaaclab_env
3
+
4
+ __all__ = [
5
+ "GymEnvAdapter",
6
+ "GymVectorEnvAdapter",
7
+ "IsaacLabEnvAdapter",
8
+ "make_gym_env",
9
+ "make_gym_vec",
10
+ "make_isaaclab_env",
11
+ ]