platform-lander 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- platform_lander-0.1.0/LICENSE +22 -0
- platform_lander-0.1.0/MANIFEST.in +12 -0
- platform_lander-0.1.0/PKG-INFO +197 -0
- platform_lander-0.1.0/README.md +153 -0
- platform_lander-0.1.0/examples/demo.py +70 -0
- platform_lander-0.1.0/pyproject.toml +41 -0
- platform_lander-0.1.0/setup.cfg +4 -0
- platform_lander-0.1.0/src/platform_lander/__init__.py +12 -0
- platform_lander-0.1.0/src/platform_lander/core.py +88 -0
- platform_lander-0.1.0/src/platform_lander/platform_lander.py +834 -0
- platform_lander-0.1.0/src/platform_lander/spaces.py +136 -0
- platform_lander-0.1.0/src/platform_lander.egg-info/PKG-INFO +197 -0
- platform_lander-0.1.0/src/platform_lander.egg-info/SOURCES.txt +15 -0
- platform_lander-0.1.0/src/platform_lander.egg-info/dependency_links.txt +1 -0
- platform_lander-0.1.0/src/platform_lander.egg-info/requires.txt +9 -0
- platform_lander-0.1.0/src/platform_lander.egg-info/top_level.txt +1 -0
- platform_lander-0.1.0/tests/test_platform_lander.py +152 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2016 OpenAI
|
|
4
|
+
Copyright (c) 2022 Farama Foundation
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
recursive-include src/platform_lander *.py
|
|
4
|
+
recursive-include tests *.py
|
|
5
|
+
recursive-include examples *.py
|
|
6
|
+
prune runs
|
|
7
|
+
global-exclude __pycache__/*
|
|
8
|
+
global-exclude *.py[cod]
|
|
9
|
+
global-exclude .DS_Store
|
|
10
|
+
global-exclude *.pt
|
|
11
|
+
global-exclude *.csv
|
|
12
|
+
global-exclude *.log
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: platform_lander
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Standalone reusable-booster landing environment for reinforcement learning.
|
|
5
|
+
Author: Andriy Burkov
|
|
6
|
+
License: The MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2016 OpenAI
|
|
9
|
+
Copyright (c) 2022 Farama Foundation
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in
|
|
19
|
+
all copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
27
|
+
THE SOFTWARE.
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
|
|
30
|
+
Project-URL: Source, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
|
|
31
|
+
Project-URL: Repository, https://github.com/aburkov/theDRLbook
|
|
32
|
+
Keywords: reinforcement-learning,rl,box2d,lander,environment
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
39
|
+
Requires-Python: >=3.10
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
Provides-Extra: test
|
|
42
|
+
Provides-Extra: train
|
|
43
|
+
License-File: LICENSE
|
|
44
|
+
|
|
45
|
+
# Platform Lander
|
|
46
|
+
|
|
47
|
+
A standalone reusable-booster landing environment based on Gymnasium LunarLander v3 physics, but without importing Gymnasium. The task is to land a SpaceX-style booster upright on a moving floating platform. Missing the platform and falling into the ocean, or contacting the platform in a non-vertical position, terminates the episode as failure.
|
|
48
|
+
|
|
49
|
+
## Install
|
|
50
|
+
|
|
51
|
+
After the package has been published to PyPI:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install platform_lander
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Before the PyPI release is available, install the same package directly from
|
|
58
|
+
the book repository subdirectory:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install "platform_lander @ git+https://github.com/aburkov/theDRLbook.git#subdirectory=test_environments/platform_lander"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
For local development from this folder:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install -e .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Google Colab
|
|
71
|
+
|
|
72
|
+
Use the same install command in the first notebook cell. Colab usually needs `swig` before Box2D builds:
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
!apt-get -qq install swig
|
|
76
|
+
!pip install -q platform_lander
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Then import normally:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from platform_lander import PlatformLander
|
|
83
|
+
|
|
84
|
+
env = PlatformLander(render_mode="rgb_array", enable_wind=True, wind_power=5.0)
|
|
85
|
+
obs, info = env.reset(seed=0)
|
|
86
|
+
obs, reward, terminated, truncated, info = env.step(2)
|
|
87
|
+
frame = env.render()
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Display a rendered frame in Colab:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import matplotlib.pyplot as plt
|
|
94
|
+
|
|
95
|
+
plt.imshow(frame)
|
|
96
|
+
plt.axis("off")
|
|
97
|
+
plt.show()
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Local Script
|
|
101
|
+
|
|
102
|
+
To watch the booster in a local Pygame window, install the package in editable
|
|
103
|
+
mode and run the demo:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pip install -e .
|
|
107
|
+
python examples/demo.py
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The test file is headless, so running `pytest` or `python tests/test_platform_lander.py`
|
|
111
|
+
will not open an animation window.
|
|
112
|
+
|
|
113
|
+
To train a discrete policy with the textbook single-trajectory REINFORCE
|
|
114
|
+
algorithm and then show three animated runs:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pip install -e ".[train]"
|
|
118
|
+
python vanilla_reinforce.py
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
The repository also includes incremental REINFORCE variants:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
python rtg_reinforce.py # vanilla + per-timestep reward-to-go
|
|
125
|
+
python average_reinforcement_baseline_reinforce.py # reward-to-go + running scalar RTG baseline
|
|
126
|
+
python value_function_baseline_reinforce.py # reward-to-go + learned value-function baseline
|
|
127
|
+
python batch_reinforce.py # vanilla + trajectory batches
|
|
128
|
+
python full_reinforce.py # batches + reward-to-go + selectable scalar baseline
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Each training script writes a log, per-episode CSV data, and a checkpoint under
|
|
132
|
+
`runs/` by default, for example `runs/full_reinforce.log`,
|
|
133
|
+
`runs/full_reinforce.csv`, and `runs/full_reinforce.pt`. Override those paths
|
|
134
|
+
with `--log-file`, `--csv-file`, and `--model-file`.
|
|
135
|
+
|
|
136
|
+
To load the hardcoded `runs/full_reinforce.pt` checkpoint and watch several
|
|
137
|
+
animated policy rollouts:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
python watch_trained_policy.py
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
To generate one side-by-side results graph per variant from the saved CSV
|
|
144
|
+
files:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
python plot_reinforce_results.py
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
For a quick smoke test without opening the animation window:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
python vanilla_reinforce.py --episodes 3 --max-steps 20 --no-animation
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from platform_lander import PlatformLander
|
|
158
|
+
|
|
159
|
+
env = PlatformLander(enable_wind=True, wind_direction=(1, 0.2), wind_power=5.0)
|
|
160
|
+
obs, info = env.reset(seed=0)
|
|
161
|
+
|
|
162
|
+
for _ in range(1000):
|
|
163
|
+
action = env.action_space.sample()
|
|
164
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
165
|
+
if terminated or truncated:
|
|
166
|
+
print(info)
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
env.close()
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## API Notes
|
|
173
|
+
|
|
174
|
+
- `PlatformLander(continuous=False)` uses `Discrete(4)` actions.
|
|
175
|
+
- Actions: `0` no-op, `1` upper-left attitude jet, `2` bottom engine, `3` upper-right attitude jet.
|
|
176
|
+
- `continuous=True` uses a two-value `Box(-1, 1, shape=(2,))` action.
|
|
177
|
+
- Wind is controlled with `enable_wind`, `wind_power`, `wind_direction`, and `set_wind(...)`.
|
|
178
|
+
- The booster has 100 available jet fires by default. After they are exhausted,
|
|
179
|
+
engine commands have no effect and the booster continues ballistically.
|
|
180
|
+
- The observation includes the fraction of jet fires remaining.
|
|
181
|
+
- The package provides local `Box` and `Discrete` spaces and does not import Gymnasium.
|
|
182
|
+
|
|
183
|
+
## Publishing
|
|
184
|
+
|
|
185
|
+
Build the package from this directory:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
python -m build
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Upload the generated `dist/platform_lander-*.tar.gz` and
|
|
192
|
+
`dist/platform_lander-*.whl` files to PyPI with a PyPI account that owns the
|
|
193
|
+
`platform_lander` project name:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
python -m twine upload dist/*
|
|
197
|
+
```
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# Platform Lander
|
|
2
|
+
|
|
3
|
+
A standalone reusable-booster landing environment based on Gymnasium LunarLander v3 physics, but without importing Gymnasium. The task is to land a SpaceX-style booster upright on a moving floating platform. Missing the platform and falling into the ocean, or contacting the platform in a non-vertical position, terminates the episode as failure.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
After the package has been published to PyPI:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install platform_lander
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Before the PyPI release is available, install the same package directly from
|
|
14
|
+
the book repository subdirectory:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install "platform_lander @ git+https://github.com/aburkov/theDRLbook.git#subdirectory=test_environments/platform_lander"
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
For local development from this folder:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install -e .
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Google Colab
|
|
27
|
+
|
|
28
|
+
Use the same install command in the first notebook cell. Colab usually needs `swig` before Box2D builds:
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
!apt-get -qq install swig
|
|
32
|
+
!pip install -q platform_lander
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Then import normally:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from platform_lander import PlatformLander
|
|
39
|
+
|
|
40
|
+
env = PlatformLander(render_mode="rgb_array", enable_wind=True, wind_power=5.0)
|
|
41
|
+
obs, info = env.reset(seed=0)
|
|
42
|
+
obs, reward, terminated, truncated, info = env.step(2)
|
|
43
|
+
frame = env.render()
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Display a rendered frame in Colab:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import matplotlib.pyplot as plt
|
|
50
|
+
|
|
51
|
+
plt.imshow(frame)
|
|
52
|
+
plt.axis("off")
|
|
53
|
+
plt.show()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Local Script
|
|
57
|
+
|
|
58
|
+
To watch the booster in a local Pygame window, install the package in editable
|
|
59
|
+
mode and run the demo:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install -e .
|
|
63
|
+
python examples/demo.py
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
The test file is headless, so running `pytest` or `python tests/test_platform_lander.py`
|
|
67
|
+
will not open an animation window.
|
|
68
|
+
|
|
69
|
+
To train a discrete policy with the textbook single-trajectory REINFORCE
|
|
70
|
+
algorithm and then show three animated runs:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install -e ".[train]"
|
|
74
|
+
python vanilla_reinforce.py
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The repository also includes incremental REINFORCE variants:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python rtg_reinforce.py # vanilla + per-timestep reward-to-go
|
|
81
|
+
python average_reinforcement_baseline_reinforce.py # reward-to-go + running scalar RTG baseline
|
|
82
|
+
python value_function_baseline_reinforce.py # reward-to-go + learned value-function baseline
|
|
83
|
+
python batch_reinforce.py # vanilla + trajectory batches
|
|
84
|
+
python full_reinforce.py # batches + reward-to-go + selectable scalar baseline
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Each training script writes a log, per-episode CSV data, and a checkpoint under
|
|
88
|
+
`runs/` by default, for example `runs/full_reinforce.log`,
|
|
89
|
+
`runs/full_reinforce.csv`, and `runs/full_reinforce.pt`. Override those paths
|
|
90
|
+
with `--log-file`, `--csv-file`, and `--model-file`.
|
|
91
|
+
|
|
92
|
+
To load the hardcoded `runs/full_reinforce.pt` checkpoint and watch several
|
|
93
|
+
animated policy rollouts:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
python watch_trained_policy.py
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
To generate one side-by-side results graph per variant from the saved CSV
|
|
100
|
+
files:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
python plot_reinforce_results.py
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
For a quick smoke test without opening the animation window:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
python vanilla_reinforce.py --episodes 3 --max-steps 20 --no-animation
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from platform_lander import PlatformLander
|
|
114
|
+
|
|
115
|
+
env = PlatformLander(enable_wind=True, wind_direction=(1, 0.2), wind_power=5.0)
|
|
116
|
+
obs, info = env.reset(seed=0)
|
|
117
|
+
|
|
118
|
+
for _ in range(1000):
|
|
119
|
+
action = env.action_space.sample()
|
|
120
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
121
|
+
if terminated or truncated:
|
|
122
|
+
print(info)
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
env.close()
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## API Notes
|
|
129
|
+
|
|
130
|
+
- `PlatformLander(continuous=False)` uses `Discrete(4)` actions.
|
|
131
|
+
- Actions: `0` no-op, `1` upper-left attitude jet, `2` bottom engine, `3` upper-right attitude jet.
|
|
132
|
+
- `continuous=True` uses a two-value `Box(-1, 1, shape=(2,))` action.
|
|
133
|
+
- Wind is controlled with `enable_wind`, `wind_power`, `wind_direction`, and `set_wind(...)`.
|
|
134
|
+
- The booster has 100 available jet fires by default. After they are exhausted,
|
|
135
|
+
engine commands have no effect and the booster continues ballistically.
|
|
136
|
+
- The observation includes the fraction of jet fires remaining.
|
|
137
|
+
- The package provides local `Box` and `Discrete` spaces and does not import Gymnasium.
|
|
138
|
+
|
|
139
|
+
## Publishing
|
|
140
|
+
|
|
141
|
+
Build the package from this directory:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
python -m build
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Upload the generated `dist/platform_lander-*.tar.gz` and
|
|
148
|
+
`dist/platform_lander-*.whl` files to PyPI with a PyPI account that owns the
|
|
149
|
+
`platform_lander` project name:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
python -m twine upload dist/*
|
|
153
|
+
```
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Render PlatformLander in a local Pygame window.
|
|
2
|
+
|
|
3
|
+
Run from the project root with:
|
|
4
|
+
|
|
5
|
+
python examples/demo.py
|
|
6
|
+
|
|
7
|
+
The tests are intentionally headless; this script is for visual inspection.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
|
18
|
+
SRC = PROJECT_ROOT / "src"
|
|
19
|
+
if str(SRC) not in sys.path:
|
|
20
|
+
sys.path.insert(0, str(SRC))
|
|
21
|
+
|
|
22
|
+
from platform_lander import PlatformLander, heuristic # noqa: E402
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_args() -> argparse.Namespace:
|
|
26
|
+
parser = argparse.ArgumentParser(description="Watch the PlatformLander environment.")
|
|
27
|
+
parser.add_argument("--policy", choices=["heuristic", "random"], default="heuristic")
|
|
28
|
+
parser.add_argument("--seed", type=int, default=0)
|
|
29
|
+
parser.add_argument("--episodes", type=int, default=5)
|
|
30
|
+
parser.add_argument("--wind", action="store_true", help="Enable wind during the demo.")
|
|
31
|
+
parser.add_argument("--wind-power", type=float, default=5.0)
|
|
32
|
+
return parser.parse_args()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def main() -> None:
|
|
36
|
+
args = parse_args()
|
|
37
|
+
env = PlatformLander(
|
|
38
|
+
render_mode="human",
|
|
39
|
+
enable_wind=args.wind,
|
|
40
|
+
wind_power=args.wind_power,
|
|
41
|
+
wind_direction=(1.0, 0.0),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
for episode in range(args.episodes):
|
|
46
|
+
obs, _ = env.reset(seed=args.seed + episode)
|
|
47
|
+
total_reward = 0.0
|
|
48
|
+
|
|
49
|
+
for step in range(1000):
|
|
50
|
+
if args.policy == "random":
|
|
51
|
+
action = env.action_space.sample()
|
|
52
|
+
else:
|
|
53
|
+
action = heuristic(env, obs)
|
|
54
|
+
|
|
55
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
56
|
+
total_reward += reward
|
|
57
|
+
|
|
58
|
+
if terminated or truncated:
|
|
59
|
+
print(
|
|
60
|
+
f"episode={episode} step={step} "
|
|
61
|
+
f"reward={total_reward:.1f} info={info}"
|
|
62
|
+
)
|
|
63
|
+
break
|
|
64
|
+
finally:
|
|
65
|
+
env.close()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
main()
|
|
70
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "platform_lander"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Standalone reusable-booster landing environment for reinforcement learning."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { file = "LICENSE" }
|
|
12
|
+
authors = [{ name = "Andriy Burkov" }]
|
|
13
|
+
keywords = ["reinforcement-learning", "rl", "box2d", "lander", "environment"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.10",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"numpy>=1.21",
|
|
24
|
+
"box2d-py>=2.3.5",
|
|
25
|
+
"pygame>=2.1",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
test = ["pytest>=7"]
|
|
30
|
+
train = ["torch>=2"]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander"
|
|
34
|
+
Source = "https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander"
|
|
35
|
+
Repository = "https://github.com/aburkov/theDRLbook"
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.packages.find]
|
|
38
|
+
where = ["src"]
|
|
39
|
+
|
|
40
|
+
[tool.setuptools]
|
|
41
|
+
license-files = ["LICENSE"]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Standalone SpaceX-style platform landing environment.
|
|
2
|
+
|
|
3
|
+
This package is intentionally independent from Gymnasium. It keeps the familiar
|
|
4
|
+
``reset``/``step``/``render`` API and lightweight ``Box``/``Discrete`` spaces so
|
|
5
|
+
it can be used by RL code without importing ``gymnasium``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from platform_lander.platform_lander import PlatformLander, heuristic
|
|
9
|
+
from platform_lander.spaces import Box, Discrete
|
|
10
|
+
|
|
11
|
+
__all__ = ["PlatformLander", "heuristic", "Box", "Discrete"]
|
|
12
|
+
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Small Gymnasium-compatible core helpers used by :mod:`platform_lander`.
|
|
2
|
+
|
|
3
|
+
The environment code is adapted from Gymnasium's LunarLander v3, but this file
|
|
4
|
+
contains the minimal runtime support needed to use it without importing
|
|
5
|
+
Gymnasium.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DependencyNotInstalled(ImportError):
|
|
16
|
+
"""Raised when an optional rendering or physics dependency is missing."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Error(Exception):
|
|
20
|
+
"""Base package exception."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def np_random(seed: int | None = None) -> tuple[np.random.Generator, int]:
|
|
24
|
+
"""Return a NumPy random generator and the seed used to create it."""
|
|
25
|
+
|
|
26
|
+
if seed is not None and not (isinstance(seed, int) and seed >= 0):
|
|
27
|
+
raise Error(f"Seed must be a non-negative python integer, got {seed!r}")
|
|
28
|
+
|
|
29
|
+
seed_seq = np.random.SeedSequence(seed)
|
|
30
|
+
rng = np.random.Generator(np.random.PCG64(seed_seq))
|
|
31
|
+
return rng, int(seed_seq.entropy)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Env:
|
|
35
|
+
"""Minimal environment base class with Gymnasium-style seeding."""
|
|
36
|
+
|
|
37
|
+
metadata: dict[str, Any] = {"render_modes": []}
|
|
38
|
+
render_mode: str | None = None
|
|
39
|
+
|
|
40
|
+
_np_random: np.random.Generator | None = None
|
|
41
|
+
_np_random_seed: int | None = None
|
|
42
|
+
|
|
43
|
+
def reset(self, *, seed: int | None = None, options: dict | None = None):
|
|
44
|
+
if seed is not None:
|
|
45
|
+
self._np_random, self._np_random_seed = np_random(seed)
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def np_random(self) -> np.random.Generator:
|
|
49
|
+
if self._np_random is None:
|
|
50
|
+
self._np_random, self._np_random_seed = np_random()
|
|
51
|
+
return self._np_random
|
|
52
|
+
|
|
53
|
+
@np_random.setter
|
|
54
|
+
def np_random(self, value: np.random.Generator) -> None:
|
|
55
|
+
self._np_random = value
|
|
56
|
+
self._np_random_seed = -1
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def np_random_seed(self) -> int:
|
|
60
|
+
if self._np_random_seed is None:
|
|
61
|
+
self._np_random, self._np_random_seed = np_random()
|
|
62
|
+
return self._np_random_seed
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def unwrapped(self):
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
def close(self) -> None:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class EzPickle:
|
|
73
|
+
"""Pickle objects by replaying their constructor arguments."""
|
|
74
|
+
|
|
75
|
+
def __init__(self, *args: object, **kwargs: object) -> None:
|
|
76
|
+
self._ezpickle_args = args
|
|
77
|
+
self._ezpickle_kwargs = kwargs
|
|
78
|
+
|
|
79
|
+
def __getstate__(self) -> dict[str, Any]:
|
|
80
|
+
return {
|
|
81
|
+
"_ezpickle_args": self._ezpickle_args,
|
|
82
|
+
"_ezpickle_kwargs": self._ezpickle_kwargs,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
|
86
|
+
obj = type(self)(*state["_ezpickle_args"], **state["_ezpickle_kwargs"])
|
|
87
|
+
self.__dict__.update(obj.__dict__)
|
|
88
|
+
|