cusrl 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cusrl-1.0.0/.github/workflows/python-app.yml +47 -0
- cusrl-1.0.0/.gitignore +183 -0
- cusrl-1.0.0/.pre-commit-config.yaml +28 -0
- cusrl-1.0.0/PKG-INFO +109 -0
- cusrl-1.0.0/README.md +78 -0
- cusrl-1.0.0/cusrl/__init__.py +107 -0
- cusrl-1.0.0/cusrl/environment/__init__.py +11 -0
- cusrl-1.0.0/cusrl/environment/gym.py +157 -0
- cusrl-1.0.0/cusrl/environment/isaaclab.py +133 -0
- cusrl-1.0.0/cusrl/hook/__init__.py +58 -0
- cusrl-1.0.0/cusrl/hook/advantage.py +100 -0
- cusrl-1.0.0/cusrl/hook/condition.py +57 -0
- cusrl-1.0.0/cusrl/hook/gae.py +143 -0
- cusrl-1.0.0/cusrl/hook/gradient.py +48 -0
- cusrl-1.0.0/cusrl/hook/initialization.py +94 -0
- cusrl-1.0.0/cusrl/hook/lr_schedule.py +178 -0
- cusrl-1.0.0/cusrl/hook/normalization.py +194 -0
- cusrl-1.0.0/cusrl/hook/on_policy.py +35 -0
- cusrl-1.0.0/cusrl/hook/ppo.py +77 -0
- cusrl-1.0.0/cusrl/hook/representation.py +132 -0
- cusrl-1.0.0/cusrl/hook/rnd.py +66 -0
- cusrl-1.0.0/cusrl/hook/schedule.py +114 -0
- cusrl-1.0.0/cusrl/hook/smoothness.py +75 -0
- cusrl-1.0.0/cusrl/hook/statistics.py +28 -0
- cusrl-1.0.0/cusrl/hook/symmetry.py +233 -0
- cusrl-1.0.0/cusrl/hook/value.py +158 -0
- cusrl-1.0.0/cusrl/launch/export.py +43 -0
- cusrl-1.0.0/cusrl/launch/play.py +45 -0
- cusrl-1.0.0/cusrl/launch/train.py +62 -0
- cusrl-1.0.0/cusrl/logger/__init__.py +5 -0
- cusrl-1.0.0/cusrl/logger/make_factory.py +18 -0
- cusrl-1.0.0/cusrl/logger/tensorboard_logger.py +28 -0
- cusrl-1.0.0/cusrl/logger/wandb_logger.py +68 -0
- cusrl-1.0.0/cusrl/module/__init__.py +39 -0
- cusrl-1.0.0/cusrl/module/actor.py +203 -0
- cusrl-1.0.0/cusrl/module/attention.py +614 -0
- cusrl-1.0.0/cusrl/module/bijector.py +115 -0
- cusrl-1.0.0/cusrl/module/cnn.py +75 -0
- cusrl-1.0.0/cusrl/module/critic.py +73 -0
- cusrl-1.0.0/cusrl/module/distribution.py +263 -0
- cusrl-1.0.0/cusrl/module/inference.py +57 -0
- cusrl-1.0.0/cusrl/module/mlp.py +63 -0
- cusrl-1.0.0/cusrl/module/module.py +182 -0
- cusrl-1.0.0/cusrl/module/normalization.py +59 -0
- cusrl-1.0.0/cusrl/module/rnn.py +167 -0
- cusrl-1.0.0/cusrl/module/sequential.py +70 -0
- cusrl-1.0.0/cusrl/module/simba.py +70 -0
- cusrl-1.0.0/cusrl/preset/__init__.py +5 -0
- cusrl-1.0.0/cusrl/preset/ppo.py +216 -0
- cusrl-1.0.0/cusrl/sampler/__init__.py +11 -0
- cusrl-1.0.0/cusrl/sampler/mini_batch_sampler.py +78 -0
- cusrl-1.0.0/cusrl/template/__init__.py +27 -0
- cusrl-1.0.0/cusrl/template/actor_critic.py +321 -0
- cusrl-1.0.0/cusrl/template/agent.py +259 -0
- cusrl-1.0.0/cusrl/template/buffer.py +271 -0
- cusrl-1.0.0/cusrl/template/environment.py +208 -0
- cusrl-1.0.0/cusrl/template/hook.py +244 -0
- cusrl-1.0.0/cusrl/template/logger.py +76 -0
- cusrl-1.0.0/cusrl/template/optimizer.py +68 -0
- cusrl-1.0.0/cusrl/template/player.py +114 -0
- cusrl-1.0.0/cusrl/template/trainer.py +290 -0
- cusrl-1.0.0/cusrl/template/trial.py +103 -0
- cusrl-1.0.0/cusrl/utils/__init__.py +30 -0
- cusrl-1.0.0/cusrl/utils/cli.py +59 -0
- cusrl-1.0.0/cusrl/utils/config.py +75 -0
- cusrl-1.0.0/cusrl/utils/distributed.py +146 -0
- cusrl-1.0.0/cusrl/utils/export.py +98 -0
- cusrl-1.0.0/cusrl/utils/helper.py +122 -0
- cusrl-1.0.0/cusrl/utils/metrics.py +72 -0
- cusrl-1.0.0/cusrl/utils/nest.py +82 -0
- cusrl-1.0.0/cusrl/utils/normalizer.py +276 -0
- cusrl-1.0.0/cusrl/utils/recurrent.py +163 -0
- cusrl-1.0.0/cusrl/utils/timing.py +63 -0
- cusrl-1.0.0/cusrl/utils/typing.py +45 -0
- cusrl-1.0.0/cusrl/utils/video.py +21 -0
- cusrl-1.0.0/cusrl/zoo/__init__.py +8 -0
- cusrl-1.0.0/cusrl/zoo/experiment.py +105 -0
- cusrl-1.0.0/cusrl/zoo/gym/__init__.py +2 -0
- cusrl-1.0.0/cusrl/zoo/gym/box2d.py +63 -0
- cusrl-1.0.0/cusrl/zoo/gym/classic_control.py +142 -0
- cusrl-1.0.0/cusrl/zoo/isaaclab/__init__.py +2 -0
- cusrl-1.0.0/cusrl/zoo/isaaclab/classic.py +69 -0
- cusrl-1.0.0/cusrl/zoo/isaaclab/locomotion.py +93 -0
- cusrl-1.0.0/cusrl/zoo/registry.py +70 -0
- cusrl-1.0.0/cusrl.egg-info/PKG-INFO +109 -0
- cusrl-1.0.0/cusrl.egg-info/SOURCES.txt +112 -0
- cusrl-1.0.0/cusrl.egg-info/dependency_links.txt +1 -0
- cusrl-1.0.0/cusrl.egg-info/requires.txt +27 -0
- cusrl-1.0.0/cusrl.egg-info/top_level.txt +1 -0
- cusrl-1.0.0/cusrl_test/__init__.py +11 -0
- cusrl-1.0.0/cusrl_test/test_agent_export.py +31 -0
- cusrl-1.0.0/cusrl_test/test_agent_ppo_basic.py +55 -0
- cusrl-1.0.0/cusrl_test/test_agent_ppo_hook.py +51 -0
- cusrl-1.0.0/cusrl_test/test_agent_state_dict.py +102 -0
- cusrl-1.0.0/cusrl_test/test_env_gym.py +28 -0
- cusrl-1.0.0/cusrl_test/test_hook_activation.py +38 -0
- cusrl-1.0.0/cusrl_test/test_hook_normalization.py +102 -0
- cusrl-1.0.0/cusrl_test/test_hook_representation.py +25 -0
- cusrl-1.0.0/cusrl_test/test_hook_rnd.py +20 -0
- cusrl-1.0.0/cusrl_test/test_hook_smoothness.py +11 -0
- cusrl-1.0.0/cusrl_test/test_hook_symmetry.py +46 -0
- cusrl-1.0.0/cusrl_test/test_module_attention.py +87 -0
- cusrl-1.0.0/cusrl_test/test_module_cnn.py +39 -0
- cusrl-1.0.0/cusrl_test/test_module_inference.py +40 -0
- cusrl-1.0.0/cusrl_test/test_module_rnn.py +87 -0
- cusrl-1.0.0/cusrl_test/test_module_sequential.py +117 -0
- cusrl-1.0.0/cusrl_test/test_module_simba.py +35 -0
- cusrl-1.0.0/cusrl_test/test_template_player.py +10 -0
- cusrl-1.0.0/cusrl_test/test_util_normalizer.py +66 -0
- cusrl-1.0.0/cusrl_test/test_util_timing.py +25 -0
- cusrl-1.0.0/cusrl_test/utils.py +134 -0
- cusrl-1.0.0/pyproject.toml +64 -0
- cusrl-1.0.0/requirements.txt +5 -0
- cusrl-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
|
2
|
+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
|
|
3
|
+
|
|
4
|
+
name: Python application
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
branches: [ "main" ]
|
|
9
|
+
pull_request:
|
|
10
|
+
branches: [ "main" ]
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
build:
|
|
17
|
+
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
strategy:
|
|
20
|
+
matrix:
|
|
21
|
+
python-version: [ "3.10", "3.11", "3.12", "3.13" ]
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
25
|
+
uses: actions/setup-python@v3
|
|
26
|
+
with:
|
|
27
|
+
python-version: ${{ matrix.python-version }}
|
|
28
|
+
- name: Display Python version
|
|
29
|
+
run: python -c "import sys; print(sys.version)"
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: |
|
|
32
|
+
rm ~/.cache/pip -rf
|
|
33
|
+
python -m pip install --upgrade pip
|
|
34
|
+
pip install flake8 pytest
|
|
35
|
+
pip install --no-cache-dir .[all]
|
|
36
|
+
- name: Lint with flake8
|
|
37
|
+
run: |
|
|
38
|
+
# stop the build if there are Python syntax errors or undefined names
|
|
39
|
+
flake8 . --count --select=E9,F63,F7,F82 --extend-ignore=E203 --show-source --statistics
|
|
40
|
+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
|
41
|
+
flake8 . --count --exit-zero --extend-ignore=E203 --max-complexity=10 --max-line-length=127 --statistics
|
|
42
|
+
- name: Display device and autocast availability
|
|
43
|
+
run: |
|
|
44
|
+
python -c "import cusrl; print(cusrl.device(), cusrl.utils.is_autocast_available())"
|
|
45
|
+
- name: Test with pytest
|
|
46
|
+
run: |
|
|
47
|
+
pytest
|
cusrl-1.0.0/.gitignore
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
|
|
110
|
+
# pdm
|
|
111
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
112
|
+
#pdm.lock
|
|
113
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
114
|
+
# in version control.
|
|
115
|
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
116
|
+
.pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
121
|
+
__pypackages__/
|
|
122
|
+
|
|
123
|
+
# Celery stuff
|
|
124
|
+
celerybeat-schedule
|
|
125
|
+
celerybeat.pid
|
|
126
|
+
|
|
127
|
+
# SageMath parsed files
|
|
128
|
+
*.sage.py
|
|
129
|
+
|
|
130
|
+
# Environments
|
|
131
|
+
.env
|
|
132
|
+
.venv
|
|
133
|
+
env/
|
|
134
|
+
venv/
|
|
135
|
+
ENV/
|
|
136
|
+
env.bak/
|
|
137
|
+
venv.bak/
|
|
138
|
+
|
|
139
|
+
# Spyder project settings
|
|
140
|
+
.spyderproject
|
|
141
|
+
.spyproject
|
|
142
|
+
|
|
143
|
+
# Rope project settings
|
|
144
|
+
.ropeproject
|
|
145
|
+
|
|
146
|
+
# mkdocs documentation
|
|
147
|
+
/site
|
|
148
|
+
|
|
149
|
+
# mypy
|
|
150
|
+
.mypy_cache/
|
|
151
|
+
.dmypy.json
|
|
152
|
+
dmypy.json
|
|
153
|
+
|
|
154
|
+
# Pyre type checker
|
|
155
|
+
.pyre/
|
|
156
|
+
|
|
157
|
+
# pytype static type analyzer
|
|
158
|
+
.pytype/
|
|
159
|
+
|
|
160
|
+
# Cython debug symbols
|
|
161
|
+
cython_debug/
|
|
162
|
+
|
|
163
|
+
# PyCharm
|
|
164
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
165
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
166
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
167
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
168
|
+
#.idea/
|
|
169
|
+
|
|
170
|
+
# Ruff stuff:
|
|
171
|
+
.ruff_cache/
|
|
172
|
+
|
|
173
|
+
# PyPI configuration file
|
|
174
|
+
.pypirc
|
|
175
|
+
|
|
176
|
+
# IDE
|
|
177
|
+
.idea/
|
|
178
|
+
.vscode/
|
|
179
|
+
.zed/
|
|
180
|
+
|
|
181
|
+
# Logs
|
|
182
|
+
logs/
|
|
183
|
+
wandb/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# See https://pre-commit.com for more information
|
|
2
|
+
# See https://pre-commit.com/hooks.html for more hooks
|
|
3
|
+
repos:
|
|
4
|
+
- repo: https://github.com/psf/black
|
|
5
|
+
rev: 25.1.0
|
|
6
|
+
hooks:
|
|
7
|
+
- id: black
|
|
8
|
+
- repo: https://github.com/codespell-project/codespell
|
|
9
|
+
rev: v2.4.1
|
|
10
|
+
hooks:
|
|
11
|
+
- id: codespell
|
|
12
|
+
additional_dependencies: [ tomli ]
|
|
13
|
+
- repo: https://github.com/pycqa/isort
|
|
14
|
+
rev: 6.0.1
|
|
15
|
+
hooks:
|
|
16
|
+
- id: isort
|
|
17
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
18
|
+
rev: v5.0.0
|
|
19
|
+
hooks:
|
|
20
|
+
- id: check-added-large-files
|
|
21
|
+
- id: check-yaml
|
|
22
|
+
- id: end-of-file-fixer
|
|
23
|
+
- id: trailing-whitespace
|
|
24
|
+
- repo: https://github.com/asottile/pyupgrade
|
|
25
|
+
rev: v3.20.0
|
|
26
|
+
hooks:
|
|
27
|
+
- id: pyupgrade
|
|
28
|
+
args: [ "--py310-plus" ]
|
cusrl-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cusrl
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Customizable and modular RL algorithms implemented in PyTorch
|
|
5
|
+
Author-email: Chengrui Zhu <jewel@zju.edu.cn>
|
|
6
|
+
Keywords: reinforcement-learning,pytorch,rl
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: numpy>=1.26.0
|
|
10
|
+
Requires-Dist: torch>=2.4.0
|
|
11
|
+
Requires-Dist: objprint~=0.3.0
|
|
12
|
+
Requires-Dist: gymnasium>=1.1.0
|
|
13
|
+
Requires-Dist: pyyaml~=6.0.2
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest; extra == "dev"
|
|
16
|
+
Provides-Extra: onnx
|
|
17
|
+
Requires-Dist: onnx; extra == "onnx"
|
|
18
|
+
Requires-Dist: onnxruntime; extra == "onnx"
|
|
19
|
+
Requires-Dist: onnxscript; extra == "onnx"
|
|
20
|
+
Provides-Extra: tensorboard
|
|
21
|
+
Requires-Dist: tensorboard; extra == "tensorboard"
|
|
22
|
+
Provides-Extra: wandb
|
|
23
|
+
Requires-Dist: wandb; extra == "wandb"
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: pytest; extra == "all"
|
|
26
|
+
Requires-Dist: onnx; extra == "all"
|
|
27
|
+
Requires-Dist: onnxruntime; extra == "all"
|
|
28
|
+
Requires-Dist: onnxscript; extra == "all"
|
|
29
|
+
Requires-Dist: tensorboard; extra == "all"
|
|
30
|
+
Requires-Dist: wandb; extra == "all"
|
|
31
|
+
|
|
32
|
+
# CusRL: Customizable Reinforcement Learning
|
|
33
|
+
|
|
34
|
+
CusRL is a flexible and modular reinforcement learning framework that emphasizes customization.
|
|
35
|
+
Its clean and decoupled implementation allows researchers to easily integrate new components,
|
|
36
|
+
which is particularly useful for advancements in robotics learning.
|
|
37
|
+
|
|
38
|
+
> **Note:** This project is under **active development**, which means the interface is unstable
|
|
39
|
+
and breaking changes are likely to occur frequently.
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
Requires Python >= 3.10.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
git clone https://github.com/chengruiz/cusrl.git
|
|
47
|
+
# Minimal installation
|
|
48
|
+
pip install -e . --config-settings editable_mode=strict
|
|
49
|
+
# Install with all optional dependencies
|
|
50
|
+
pip install -e .[all] --config-settings editable_mode=strict
|
|
51
|
+
# For development, install pre-commit (assuming you have pre-commit installed)
|
|
52
|
+
pre-commit install
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Quick Start
|
|
56
|
+
|
|
57
|
+
Try to train a PPO agent with CusRL and evaluate it:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
python -m cusrl.launch.train -env MountainCar-v0 -alg ppo --logger tensorboard --seed 42
|
|
61
|
+
python -m cusrl.launch.play --checkpoint logs/MountainCar-v0:ppo
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Or if you have [IssacLab](https://github.com/isaac-sim/IsaacLab) installed:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
python -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 -alg ppo \
|
|
68
|
+
--logger tensorboard --environment-args="--headless"
|
|
69
|
+
python -m cusrl.launch.play --checkpoint logs/Isaac-Velocity-Rough-Anymal-C-v0:ppo
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Try distributed training:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
torchrun --nproc-per-node=2 -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 \
|
|
76
|
+
-alg ppo --logger tensorboard --environment-args="--headless"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Highlights
|
|
80
|
+
|
|
81
|
+
CusRL provides a modular and extensible framework for RL with the following key features:
|
|
82
|
+
|
|
83
|
+
- **Modular Design**: Components are highly decoupled, allowing for easy customization and extension
|
|
84
|
+
- **Diverse Network Architectures**: Support for MLP, CNN, RNNs, Transformer and custom architectures
|
|
85
|
+
- **Modern Training Techniques**: Built-in support for distributed and mixed-precision training
|
|
86
|
+
|
|
87
|
+
CusRL is designed for researchers and practitioners who need a clean, extensible framework for implementing
|
|
88
|
+
and experimenting with reinforcement learning algorithms. The architecture emphasizes clean separation of
|
|
89
|
+
concerns, allowing users to modify specific components without disrupting the rest of the system.
|
|
90
|
+
|
|
91
|
+
## Implemented Algorithms
|
|
92
|
+
|
|
93
|
+
- [Proximal Policy Optimization (PPO)](https://arxiv.org/abs/1707.06347) with recurrent policy support
|
|
94
|
+
- [Generalized Advantage Estimation (GAE)](https://arxiv.org/abs/1506.02438)
|
|
95
|
+
with [distinct lambda values](https://proceedings.neurips.cc/paper_files/paper/2022/hash/e95475f5fb8edb9075bf9e25670d4013-Abstract-Conference.html)
|
|
96
|
+
- [Preserving Outputs Precisely, while Adaptively Rescaling Targets (Pop-Art)](https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html)
|
|
97
|
+
- [Random Network Distillation (RND)](https://arxiv.org/abs/1810.12894)
|
|
98
|
+
- Symmetry Augmentations:
|
|
99
|
+
[Symmetry Loss](https://dl.acm.org/doi/abs/10.1145/3197517.3201397),
|
|
100
|
+
[Symmetric Architecture](https://dl.acm.org/doi/abs/10.1145/3359566.3360070),
|
|
101
|
+
[Symmetric Data Augmentation](https://ieeexplore.ieee.org/abstract/document/10611493)
|
|
102
|
+
|
|
103
|
+
## Cite
|
|
104
|
+
|
|
105
|
+
If you find this framework useful for your research, please consider citing our work on legged locomotion:
|
|
106
|
+
|
|
107
|
+
- [Efficient Learning of A Unified Policy For Whole-body Manipulation and Locomotion Skills](https://www.arxiv.org/abs/2507.04229), Accepted by IROS 2025
|
|
108
|
+
- [Learning Accurate and Robust Velocity Tracking for Quadrupedal Robots](https://www.authorea.com/doi/full/10.22541/au.173321917.73583610), Accepted by JFR
|
|
109
|
+
- [Learning Safe Locomotion for Quadrupedal Robots by Derived-Action Optimization](https://ieeexplore.ieee.org/abstract/document/10802725), Published in IROS 2024
|
cusrl-1.0.0/README.md
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# CusRL: Customizable Reinforcement Learning
|
|
2
|
+
|
|
3
|
+
CusRL is a flexible and modular reinforcement learning framework that emphasizes customization.
|
|
4
|
+
Its clean and decoupled implementation allows researchers to easily integrate new components,
|
|
5
|
+
which is particularly useful for advancements in robotics learning.
|
|
6
|
+
|
|
7
|
+
> **Note:** This project is under **active development**, which means the interface is unstable
|
|
8
|
+
and breaking changes are likely to occur frequently.
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
Requires Python >= 3.10.
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
git clone https://github.com/chengruiz/cusrl.git
|
|
16
|
+
# Minimal installation
|
|
17
|
+
pip install -e . --config-settings editable_mode=strict
|
|
18
|
+
# Install with all optional dependencies
|
|
19
|
+
pip install -e .[all] --config-settings editable_mode=strict
|
|
20
|
+
# For development, install pre-commit (assuming you have pre-commit installed)
|
|
21
|
+
pre-commit install
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
Try to train a PPO agent with CusRL and evaluate it:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
python -m cusrl.launch.train -env MountainCar-v0 -alg ppo --logger tensorboard --seed 42
|
|
30
|
+
python -m cusrl.launch.play --checkpoint logs/MountainCar-v0:ppo
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Or if you have [IssacLab](https://github.com/isaac-sim/IsaacLab) installed:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
python -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 -alg ppo \
|
|
37
|
+
--logger tensorboard --environment-args="--headless"
|
|
38
|
+
python -m cusrl.launch.play --checkpoint logs/Isaac-Velocity-Rough-Anymal-C-v0:ppo
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Try distributed training:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
torchrun --nproc-per-node=2 -m cusrl.launch.train -env Isaac-Velocity-Rough-Anymal-C-v0 \
|
|
45
|
+
-alg ppo --logger tensorboard --environment-args="--headless"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Highlights
|
|
49
|
+
|
|
50
|
+
CusRL provides a modular and extensible framework for RL with the following key features:
|
|
51
|
+
|
|
52
|
+
- **Modular Design**: Components are highly decoupled, allowing for easy customization and extension
|
|
53
|
+
- **Diverse Network Architectures**: Support for MLP, CNN, RNNs, Transformer and custom architectures
|
|
54
|
+
- **Modern Training Techniques**: Built-in support for distributed and mixed-precision training
|
|
55
|
+
|
|
56
|
+
CusRL is designed for researchers and practitioners who need a clean, extensible framework for implementing
|
|
57
|
+
and experimenting with reinforcement learning algorithms. The architecture emphasizes clean separation of
|
|
58
|
+
concerns, allowing users to modify specific components without disrupting the rest of the system.
|
|
59
|
+
|
|
60
|
+
## Implemented Algorithms
|
|
61
|
+
|
|
62
|
+
- [Proximal Policy Optimization (PPO)](https://arxiv.org/abs/1707.06347) with recurrent policy support
|
|
63
|
+
- [Generalized Advantage Estimation (GAE)](https://arxiv.org/abs/1506.02438)
|
|
64
|
+
with [distinct lambda values](https://proceedings.neurips.cc/paper_files/paper/2022/hash/e95475f5fb8edb9075bf9e25670d4013-Abstract-Conference.html)
|
|
65
|
+
- [Preserving Outputs Precisely, while Adaptively Rescaling Targets (Pop-Art)](https://proceedings.neurips.cc/paper/2016/hash/5227b6aaf294f5f027273aebf16015f2-Abstract.html)
|
|
66
|
+
- [Random Network Distillation (RND)](https://arxiv.org/abs/1810.12894)
|
|
67
|
+
- Symmetry Augmentations:
|
|
68
|
+
[Symmetry Loss](https://dl.acm.org/doi/abs/10.1145/3197517.3201397),
|
|
69
|
+
[Symmetric Architecture](https://dl.acm.org/doi/abs/10.1145/3359566.3360070),
|
|
70
|
+
[Symmetric Data Augmentation](https://ieeexplore.ieee.org/abstract/document/10611493)
|
|
71
|
+
|
|
72
|
+
## Cite
|
|
73
|
+
|
|
74
|
+
If you find this framework useful for your research, please consider citing our work on legged locomotion:
|
|
75
|
+
|
|
76
|
+
- [Efficient Learning of A Unified Policy For Whole-body Manipulation and Locomotion Skills](https://www.arxiv.org/abs/2507.04229), Accepted by IROS 2025
|
|
77
|
+
- [Learning Accurate and Robust Velocity Tracking for Quadrupedal Robots](https://www.authorea.com/doi/full/10.22541/au.173321917.73583610), Accepted by JFR
|
|
78
|
+
- [Learning Safe Locomotion for Quadrupedal Robots by Derived-Action Optimization](https://ieeexplore.ieee.org/abstract/document/10802725), Published in IROS 2024
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from cusrl import environment, hook, logger, module, preset, sampler, template, utils, zoo
|
|
2
|
+
from cusrl.environment import make_gym_env, make_gym_vec, make_isaaclab_env
|
|
3
|
+
from cusrl.module import (
|
|
4
|
+
CNN,
|
|
5
|
+
MLP,
|
|
6
|
+
RNN,
|
|
7
|
+
Actor,
|
|
8
|
+
AdaptiveNormalDist,
|
|
9
|
+
Denormalization,
|
|
10
|
+
Distribution,
|
|
11
|
+
DistributionFactoryLike,
|
|
12
|
+
FeedForward,
|
|
13
|
+
InferenceModule,
|
|
14
|
+
LayerFactoryLike,
|
|
15
|
+
Module,
|
|
16
|
+
ModuleFactory,
|
|
17
|
+
ModuleFactoryLike,
|
|
18
|
+
MultiheadSelfAttention,
|
|
19
|
+
NormalDist,
|
|
20
|
+
Normalization,
|
|
21
|
+
OneHotCategoricalDist,
|
|
22
|
+
Sequential,
|
|
23
|
+
Simba,
|
|
24
|
+
TransformerEncoderLayer,
|
|
25
|
+
Value,
|
|
26
|
+
)
|
|
27
|
+
from cusrl.sampler import (
|
|
28
|
+
AutoMiniBatchSampler,
|
|
29
|
+
MiniBatchSampler,
|
|
30
|
+
TemporalMiniBatchSampler,
|
|
31
|
+
)
|
|
32
|
+
from cusrl.template import (
|
|
33
|
+
ActorCritic,
|
|
34
|
+
Agent,
|
|
35
|
+
Buffer,
|
|
36
|
+
Environment,
|
|
37
|
+
EnvironmentSpec,
|
|
38
|
+
Hook,
|
|
39
|
+
Logger,
|
|
40
|
+
LoggerFactory,
|
|
41
|
+
LoggerFactoryLike,
|
|
42
|
+
OptimizerFactory,
|
|
43
|
+
Player,
|
|
44
|
+
Sampler,
|
|
45
|
+
Trainer,
|
|
46
|
+
Trial,
|
|
47
|
+
)
|
|
48
|
+
from cusrl.utils import (
|
|
49
|
+
device,
|
|
50
|
+
set_global_seed,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
__all__ = [
|
|
54
|
+
"hook",
|
|
55
|
+
"logger",
|
|
56
|
+
"module",
|
|
57
|
+
"preset",
|
|
58
|
+
"sampler",
|
|
59
|
+
"template",
|
|
60
|
+
"utils",
|
|
61
|
+
"zoo",
|
|
62
|
+
"environment",
|
|
63
|
+
"Actor",
|
|
64
|
+
"ActorCritic",
|
|
65
|
+
"AdaptiveNormalDist",
|
|
66
|
+
"Agent",
|
|
67
|
+
"AutoMiniBatchSampler",
|
|
68
|
+
"Buffer",
|
|
69
|
+
"CNN",
|
|
70
|
+
"Denormalization",
|
|
71
|
+
"Distribution",
|
|
72
|
+
"DistributionFactoryLike",
|
|
73
|
+
"Environment",
|
|
74
|
+
"EnvironmentSpec",
|
|
75
|
+
"FeedForward",
|
|
76
|
+
"Hook",
|
|
77
|
+
"InferenceModule",
|
|
78
|
+
"LayerFactoryLike",
|
|
79
|
+
"Logger",
|
|
80
|
+
"LoggerFactory",
|
|
81
|
+
"LoggerFactoryLike",
|
|
82
|
+
"MLP",
|
|
83
|
+
"MiniBatchSampler",
|
|
84
|
+
"Module",
|
|
85
|
+
"ModuleFactory",
|
|
86
|
+
"ModuleFactoryLike",
|
|
87
|
+
"MultiheadSelfAttention",
|
|
88
|
+
"NormalDist",
|
|
89
|
+
"Normalization",
|
|
90
|
+
"OneHotCategoricalDist",
|
|
91
|
+
"OptimizerFactory",
|
|
92
|
+
"Player",
|
|
93
|
+
"RNN",
|
|
94
|
+
"Sampler",
|
|
95
|
+
"Sequential",
|
|
96
|
+
"Simba",
|
|
97
|
+
"TemporalMiniBatchSampler",
|
|
98
|
+
"Trainer",
|
|
99
|
+
"TransformerEncoderLayer",
|
|
100
|
+
"Trial",
|
|
101
|
+
"Value",
|
|
102
|
+
"device",
|
|
103
|
+
"make_gym_env",
|
|
104
|
+
"make_gym_vec",
|
|
105
|
+
"make_isaaclab_env",
|
|
106
|
+
"set_global_seed",
|
|
107
|
+
]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .gym import GymEnvAdapter, GymVectorEnvAdapter, make_gym_env, make_gym_vec
|
|
2
|
+
from .isaaclab import IsaacLabEnvAdapter, make_isaaclab_env
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"GymEnvAdapter",
|
|
6
|
+
"GymVectorEnvAdapter",
|
|
7
|
+
"IsaacLabEnvAdapter",
|
|
8
|
+
"make_gym_env",
|
|
9
|
+
"make_gym_vec",
|
|
10
|
+
"make_isaaclab_env",
|
|
11
|
+
]
|