platform-lander 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2016 OpenAI
4
+ Copyright (c) 2022 Farama Foundation
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
@@ -0,0 +1,12 @@
1
+ include README.md
2
+ include LICENSE
3
+ recursive-include src/platform_lander *.py
4
+ recursive-include tests *.py
5
+ recursive-include examples *.py
6
+ prune runs
7
+ global-exclude __pycache__/*
8
+ global-exclude *.py[cod]
9
+ global-exclude .DS_Store
10
+ global-exclude *.pt
11
+ global-exclude *.csv
12
+ global-exclude *.log
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.1
2
+ Name: platform_lander
3
+ Version: 0.1.0
4
+ Summary: Standalone reusable-booster landing environment for reinforcement learning.
5
+ Author: Andriy Burkov
6
+ License: The MIT License
7
+
8
+ Copyright (c) 2016 OpenAI
9
+ Copyright (c) 2022 Farama Foundation
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in
19
+ all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
+ THE SOFTWARE.
28
+
29
+ Project-URL: Homepage, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
30
+ Project-URL: Source, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
31
+ Project-URL: Repository, https://github.com/aburkov/theDRLbook
32
+ Keywords: reinforcement-learning,rl,box2d,lander,environment
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.10
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: License :: OSI Approved :: MIT License
38
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
39
+ Requires-Python: >=3.10
40
+ Description-Content-Type: text/markdown
41
+ Provides-Extra: test
42
+ Provides-Extra: train
43
+ License-File: LICENSE
44
+
45
+ # Platform Lander
46
+
47
+ A standalone reusable-booster landing environment based on Gymnasium LunarLander v3 physics, but without importing Gymnasium. The task is to land a SpaceX-style booster upright on a moving floating platform. Missing the platform and falling into the ocean, or contacting the platform in a non-vertical position, terminates the episode as failure.
48
+
49
+ ## Install
50
+
51
+ After the package has been published to PyPI:
52
+
53
+ ```bash
54
+ pip install platform_lander
55
+ ```
56
+
57
+ Before the PyPI release is available, install the same package directly from
58
+ the book repository subdirectory:
59
+
60
+ ```bash
61
+ pip install "platform_lander @ git+https://github.com/aburkov/theDRLbook.git#subdirectory=test_environments/platform_lander"
62
+ ```
63
+
64
+ For local development from this folder:
65
+
66
+ ```bash
67
+ pip install -e .
68
+ ```
69
+
70
+ ## Google Colab
71
+
72
+ Use the same install command in the first notebook cell. Colab usually needs `swig` before Box2D builds:
73
+
74
+ ```python
75
+ !apt-get -qq install swig
76
+ !pip install -q platform_lander
77
+ ```
78
+
79
+ Then import normally:
80
+
81
+ ```python
82
+ from platform_lander import PlatformLander
83
+
84
+ env = PlatformLander(render_mode="rgb_array", enable_wind=True, wind_power=5.0)
85
+ obs, info = env.reset(seed=0)
86
+ obs, reward, terminated, truncated, info = env.step(2)
87
+ frame = env.render()
88
+ ```
89
+
90
+ Display a rendered frame in Colab:
91
+
92
+ ```python
93
+ import matplotlib.pyplot as plt
94
+
95
+ plt.imshow(frame)
96
+ plt.axis("off")
97
+ plt.show()
98
+ ```
99
+
100
+ ## Local Script
101
+
102
+ To watch the booster in a local Pygame window, install the package in editable
103
+ mode and run the demo:
104
+
105
+ ```bash
106
+ pip install -e .
107
+ python examples/demo.py
108
+ ```
109
+
110
+ The test file is headless, so running `pytest` or `python tests/test_platform_lander.py`
111
+ will not open an animation window.
112
+
113
+ To train a discrete policy with the textbook single-trajectory REINFORCE
114
+ algorithm and then show three animated runs:
115
+
116
+ ```bash
117
+ pip install -e ".[train]"
118
+ python vanilla_reinforce.py
119
+ ```
120
+
121
+ The repository also includes incremental REINFORCE variants:
122
+
123
+ ```bash
124
+ python rtg_reinforce.py # vanilla + per-timestep reward-to-go
125
+ python average_reinforcement_baseline_reinforce.py # reward-to-go + running scalar RTG baseline
126
+ python value_function_baseline_reinforce.py # reward-to-go + learned value-function baseline
127
+ python batch_reinforce.py # vanilla + trajectory batches
128
+ python full_reinforce.py # batches + reward-to-go + selectable scalar baseline
129
+ ```
130
+
131
+ Each training script writes a log, per-episode CSV data, and a checkpoint under
132
+ `runs/` by default, for example `runs/full_reinforce.log`,
133
+ `runs/full_reinforce.csv`, and `runs/full_reinforce.pt`. Override those paths
134
+ with `--log-file`, `--csv-file`, and `--model-file`.
135
+
136
+ To load the hardcoded `runs/full_reinforce.pt` checkpoint and watch several
137
+ animated policy rollouts:
138
+
139
+ ```bash
140
+ python watch_trained_policy.py
141
+ ```
142
+
143
+ To generate one side-by-side results graph per variant from the saved CSV
144
+ files:
145
+
146
+ ```bash
147
+ python plot_reinforce_results.py
148
+ ```
149
+
150
+ For a quick smoke test without opening the animation window:
151
+
152
+ ```bash
153
+ python vanilla_reinforce.py --episodes 3 --max-steps 20 --no-animation
154
+ ```
155
+
156
+ ```python
157
+ from platform_lander import PlatformLander
158
+
159
+ env = PlatformLander(enable_wind=True, wind_direction=(1, 0.2), wind_power=5.0)
160
+ obs, info = env.reset(seed=0)
161
+
162
+ for _ in range(1000):
163
+ action = env.action_space.sample()
164
+ obs, reward, terminated, truncated, info = env.step(action)
165
+ if terminated or truncated:
166
+ print(info)
167
+ break
168
+
169
+ env.close()
170
+ ```
171
+
172
+ ## API Notes
173
+
174
+ - `PlatformLander(continuous=False)` uses `Discrete(4)` actions.
175
+ - Actions: `0` no-op, `1` upper-left attitude jet, `2` bottom engine, `3` upper-right attitude jet.
176
+ - `continuous=True` uses a two-value `Box(-1, 1, shape=(2,))` action.
177
+ - Wind is controlled with `enable_wind`, `wind_power`, `wind_direction`, and `set_wind(...)`.
178
+ - The booster has 100 available jet fires by default. After they are exhausted,
179
+ engine commands have no effect and the booster continues ballistically.
180
+ - The observation includes the fraction of jet fires remaining.
181
+ - The package provides local `Box` and `Discrete` spaces and does not import Gymnasium.
182
+
183
+ ## Publishing
184
+
185
+ Build the package from this directory:
186
+
187
+ ```bash
188
+ python -m build
189
+ ```
190
+
191
+ Upload the generated `dist/platform_lander-*.tar.gz` and
192
+ `dist/platform_lander-*.whl` files to PyPI with a PyPI account that owns the
193
+ `platform_lander` project name:
194
+
195
+ ```bash
196
+ python -m twine upload dist/*
197
+ ```
@@ -0,0 +1,153 @@
1
+ # Platform Lander
2
+
3
+ A standalone reusable-booster landing environment based on Gymnasium LunarLander v3 physics, but without importing Gymnasium. The task is to land a SpaceX-style booster upright on a moving floating platform. Missing the platform and falling into the ocean, or contacting the platform in a non-vertical position, terminates the episode as failure.
4
+
5
+ ## Install
6
+
7
+ After the package has been published to PyPI:
8
+
9
+ ```bash
10
+ pip install platform_lander
11
+ ```
12
+
13
+ Before the PyPI release is available, install the same package directly from
14
+ the book repository subdirectory:
15
+
16
+ ```bash
17
+ pip install "platform_lander @ git+https://github.com/aburkov/theDRLbook.git#subdirectory=test_environments/platform_lander"
18
+ ```
19
+
20
+ For local development from this folder:
21
+
22
+ ```bash
23
+ pip install -e .
24
+ ```
25
+
26
+ ## Google Colab
27
+
28
+ Use the same install command in the first notebook cell. Colab usually needs `swig` before Box2D builds:
29
+
30
+ ```python
31
+ !apt-get -qq install swig
32
+ !pip install -q platform_lander
33
+ ```
34
+
35
+ Then import normally:
36
+
37
+ ```python
38
+ from platform_lander import PlatformLander
39
+
40
+ env = PlatformLander(render_mode="rgb_array", enable_wind=True, wind_power=5.0)
41
+ obs, info = env.reset(seed=0)
42
+ obs, reward, terminated, truncated, info = env.step(2)
43
+ frame = env.render()
44
+ ```
45
+
46
+ Display a rendered frame in Colab:
47
+
48
+ ```python
49
+ import matplotlib.pyplot as plt
50
+
51
+ plt.imshow(frame)
52
+ plt.axis("off")
53
+ plt.show()
54
+ ```
55
+
56
+ ## Local Script
57
+
58
+ To watch the booster in a local Pygame window, install the package in editable
59
+ mode and run the demo:
60
+
61
+ ```bash
62
+ pip install -e .
63
+ python examples/demo.py
64
+ ```
65
+
66
+ The test file is headless, so running `pytest` or `python tests/test_platform_lander.py`
67
+ will not open an animation window.
68
+
69
+ To train a discrete policy with the textbook single-trajectory REINFORCE
70
+ algorithm and then show three animated runs:
71
+
72
+ ```bash
73
+ pip install -e ".[train]"
74
+ python vanilla_reinforce.py
75
+ ```
76
+
77
+ The repository also includes incremental REINFORCE variants:
78
+
79
+ ```bash
80
+ python rtg_reinforce.py # vanilla + per-timestep reward-to-go
81
+ python average_reinforcement_baseline_reinforce.py # reward-to-go + running scalar RTG baseline
82
+ python value_function_baseline_reinforce.py # reward-to-go + learned value-function baseline
83
+ python batch_reinforce.py # vanilla + trajectory batches
84
+ python full_reinforce.py # batches + reward-to-go + selectable scalar baseline
85
+ ```
86
+
87
+ Each training script writes a log, per-episode CSV data, and a checkpoint under
88
+ `runs/` by default, for example `runs/full_reinforce.log`,
89
+ `runs/full_reinforce.csv`, and `runs/full_reinforce.pt`. Override those paths
90
+ with `--log-file`, `--csv-file`, and `--model-file`.
91
+
92
+ To load the hardcoded `runs/full_reinforce.pt` checkpoint and watch several
93
+ animated policy rollouts:
94
+
95
+ ```bash
96
+ python watch_trained_policy.py
97
+ ```
98
+
99
+ To generate one side-by-side results graph per variant from the saved CSV
100
+ files:
101
+
102
+ ```bash
103
+ python plot_reinforce_results.py
104
+ ```
105
+
106
+ For a quick smoke test without opening the animation window:
107
+
108
+ ```bash
109
+ python vanilla_reinforce.py --episodes 3 --max-steps 20 --no-animation
110
+ ```
111
+
112
+ ```python
113
+ from platform_lander import PlatformLander
114
+
115
+ env = PlatformLander(enable_wind=True, wind_direction=(1, 0.2), wind_power=5.0)
116
+ obs, info = env.reset(seed=0)
117
+
118
+ for _ in range(1000):
119
+ action = env.action_space.sample()
120
+ obs, reward, terminated, truncated, info = env.step(action)
121
+ if terminated or truncated:
122
+ print(info)
123
+ break
124
+
125
+ env.close()
126
+ ```
127
+
128
+ ## API Notes
129
+
130
+ - `PlatformLander(continuous=False)` uses `Discrete(4)` actions.
131
+ - Actions: `0` no-op, `1` upper-left attitude jet, `2` bottom engine, `3` upper-right attitude jet.
132
+ - `continuous=True` uses a two-value `Box(-1, 1, shape=(2,))` action.
133
+ - Wind is controlled with `enable_wind`, `wind_power`, `wind_direction`, and `set_wind(...)`.
134
+ - The booster has 100 available jet fires by default. After they are exhausted,
135
+ engine commands have no effect and the booster continues ballistically.
136
+ - The observation includes the fraction of jet fires remaining.
137
+ - The package provides local `Box` and `Discrete` spaces and does not import Gymnasium.
138
+
139
+ ## Publishing
140
+
141
+ Build the package from this directory:
142
+
143
+ ```bash
144
+ python -m build
145
+ ```
146
+
147
+ Upload the generated `dist/platform_lander-*.tar.gz` and
148
+ `dist/platform_lander-*.whl` files to PyPI with a PyPI account that owns the
149
+ `platform_lander` project name:
150
+
151
+ ```bash
152
+ python -m twine upload dist/*
153
+ ```
@@ -0,0 +1,70 @@
1
+ """Render PlatformLander in a local Pygame window.
2
+
3
+ Run from the project root with:
4
+
5
+ python examples/demo.py
6
+
7
+ The tests are intentionally headless; this script is for visual inspection.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import sys
14
+ from pathlib import Path
15
+
16
+
17
+ PROJECT_ROOT = Path(__file__).resolve().parents[1]
18
+ SRC = PROJECT_ROOT / "src"
19
+ if str(SRC) not in sys.path:
20
+ sys.path.insert(0, str(SRC))
21
+
22
+ from platform_lander import PlatformLander, heuristic # noqa: E402
23
+
24
+
25
+ def parse_args() -> argparse.Namespace:
26
+ parser = argparse.ArgumentParser(description="Watch the PlatformLander environment.")
27
+ parser.add_argument("--policy", choices=["heuristic", "random"], default="heuristic")
28
+ parser.add_argument("--seed", type=int, default=0)
29
+ parser.add_argument("--episodes", type=int, default=5)
30
+ parser.add_argument("--wind", action="store_true", help="Enable wind during the demo.")
31
+ parser.add_argument("--wind-power", type=float, default=5.0)
32
+ return parser.parse_args()
33
+
34
+
35
+ def main() -> None:
36
+ args = parse_args()
37
+ env = PlatformLander(
38
+ render_mode="human",
39
+ enable_wind=args.wind,
40
+ wind_power=args.wind_power,
41
+ wind_direction=(1.0, 0.0),
42
+ )
43
+
44
+ try:
45
+ for episode in range(args.episodes):
46
+ obs, _ = env.reset(seed=args.seed + episode)
47
+ total_reward = 0.0
48
+
49
+ for step in range(1000):
50
+ if args.policy == "random":
51
+ action = env.action_space.sample()
52
+ else:
53
+ action = heuristic(env, obs)
54
+
55
+ obs, reward, terminated, truncated, info = env.step(action)
56
+ total_reward += reward
57
+
58
+ if terminated or truncated:
59
+ print(
60
+ f"episode={episode} step={step} "
61
+ f"reward={total_reward:.1f} info={info}"
62
+ )
63
+ break
64
+ finally:
65
+ env.close()
66
+
67
+
68
+ if __name__ == "__main__":
69
+ main()
70
+
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "platform_lander"
7
+ version = "0.1.0"
8
+ description = "Standalone reusable-booster landing environment for reinforcement learning."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "Andriy Burkov" }]
13
+ keywords = ["reinforcement-learning", "rl", "box2d", "lander", "environment"]
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ ]
22
+ dependencies = [
23
+ "numpy>=1.21",
24
+ "box2d-py>=2.3.5",
25
+ "pygame>=2.1",
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ test = ["pytest>=7"]
30
+ train = ["torch>=2"]
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander"
34
+ Source = "https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander"
35
+ Repository = "https://github.com/aburkov/theDRLbook"
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["src"]
39
+
40
+ [tool.setuptools]
41
+ license-files = ["LICENSE"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,12 @@
1
+ """Standalone SpaceX-style platform landing environment.
2
+
3
+ This package is intentionally independent from Gymnasium. It keeps the familiar
4
+ ``reset``/``step``/``render`` API and lightweight ``Box``/``Discrete`` spaces so
5
+ it can be used by RL code without importing ``gymnasium``.
6
+ """
7
+
8
+ from platform_lander.platform_lander import PlatformLander, heuristic
9
+ from platform_lander.spaces import Box, Discrete
10
+
11
+ __all__ = ["PlatformLander", "heuristic", "Box", "Discrete"]
12
+
@@ -0,0 +1,88 @@
1
+ """Small Gymnasium-compatible core helpers used by :mod:`platform_lander`.
2
+
3
+ The environment code is adapted from Gymnasium's LunarLander v3, but this file
4
+ contains the minimal runtime support needed to use it without importing
5
+ Gymnasium.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+
15
+ class DependencyNotInstalled(ImportError):
16
+ """Raised when an optional rendering or physics dependency is missing."""
17
+
18
+
19
+ class Error(Exception):
20
+ """Base package exception."""
21
+
22
+
23
+ def np_random(seed: int | None = None) -> tuple[np.random.Generator, int]:
24
+ """Return a NumPy random generator and the seed used to create it."""
25
+
26
+ if seed is not None and not (isinstance(seed, int) and seed >= 0):
27
+ raise Error(f"Seed must be a non-negative python integer, got {seed!r}")
28
+
29
+ seed_seq = np.random.SeedSequence(seed)
30
+ rng = np.random.Generator(np.random.PCG64(seed_seq))
31
+ return rng, int(seed_seq.entropy)
32
+
33
+
34
+ class Env:
35
+ """Minimal environment base class with Gymnasium-style seeding."""
36
+
37
+ metadata: dict[str, Any] = {"render_modes": []}
38
+ render_mode: str | None = None
39
+
40
+ _np_random: np.random.Generator | None = None
41
+ _np_random_seed: int | None = None
42
+
43
+ def reset(self, *, seed: int | None = None, options: dict | None = None):
44
+ if seed is not None:
45
+ self._np_random, self._np_random_seed = np_random(seed)
46
+
47
+ @property
48
+ def np_random(self) -> np.random.Generator:
49
+ if self._np_random is None:
50
+ self._np_random, self._np_random_seed = np_random()
51
+ return self._np_random
52
+
53
+ @np_random.setter
54
+ def np_random(self, value: np.random.Generator) -> None:
55
+ self._np_random = value
56
+ self._np_random_seed = -1
57
+
58
+ @property
59
+ def np_random_seed(self) -> int:
60
+ if self._np_random_seed is None:
61
+ self._np_random, self._np_random_seed = np_random()
62
+ return self._np_random_seed
63
+
64
+ @property
65
+ def unwrapped(self):
66
+ return self
67
+
68
+ def close(self) -> None:
69
+ pass
70
+
71
+
72
+ class EzPickle:
73
+ """Pickle objects by replaying their constructor arguments."""
74
+
75
+ def __init__(self, *args: object, **kwargs: object) -> None:
76
+ self._ezpickle_args = args
77
+ self._ezpickle_kwargs = kwargs
78
+
79
+ def __getstate__(self) -> dict[str, Any]:
80
+ return {
81
+ "_ezpickle_args": self._ezpickle_args,
82
+ "_ezpickle_kwargs": self._ezpickle_kwargs,
83
+ }
84
+
85
+ def __setstate__(self, state: dict[str, Any]) -> None:
86
+ obj = type(self)(*state["_ezpickle_args"], **state["_ezpickle_kwargs"])
87
+ self.__dict__.update(obj.__dict__)
88
+