platform-lander 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- platform_lander/__init__.py +12 -0
- platform_lander/core.py +88 -0
- platform_lander/platform_lander.py +834 -0
- platform_lander/spaces.py +136 -0
- platform_lander-0.1.0.dist-info/LICENSE +22 -0
- platform_lander-0.1.0.dist-info/METADATA +202 -0
- platform_lander-0.1.0.dist-info/RECORD +9 -0
- platform_lander-0.1.0.dist-info/WHEEL +5 -0
- platform_lander-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Standalone SpaceX-style platform landing environment.
|
|
2
|
+
|
|
3
|
+
This package is intentionally independent from Gymnasium. It keeps the familiar
|
|
4
|
+
``reset``/``step``/``render`` API and lightweight ``Box``/``Discrete`` spaces so
|
|
5
|
+
it can be used by RL code without importing ``gymnasium``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from platform_lander.platform_lander import PlatformLander, heuristic
|
|
9
|
+
from platform_lander.spaces import Box, Discrete
|
|
10
|
+
|
|
11
|
+
__all__ = ["PlatformLander", "heuristic", "Box", "Discrete"]
|
|
12
|
+
|
platform_lander/core.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Small Gymnasium-compatible core helpers used by :mod:`platform_lander`.
|
|
2
|
+
|
|
3
|
+
The environment code is adapted from Gymnasium's LunarLander v3, but this file
|
|
4
|
+
contains the minimal runtime support needed to use it without importing
|
|
5
|
+
Gymnasium.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DependencyNotInstalled(ImportError):
|
|
16
|
+
"""Raised when an optional rendering or physics dependency is missing."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Error(Exception):
|
|
20
|
+
"""Base package exception."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def np_random(seed: int | None = None) -> tuple[np.random.Generator, int]:
|
|
24
|
+
"""Return a NumPy random generator and the seed used to create it."""
|
|
25
|
+
|
|
26
|
+
if seed is not None and not (isinstance(seed, int) and seed >= 0):
|
|
27
|
+
raise Error(f"Seed must be a non-negative python integer, got {seed!r}")
|
|
28
|
+
|
|
29
|
+
seed_seq = np.random.SeedSequence(seed)
|
|
30
|
+
rng = np.random.Generator(np.random.PCG64(seed_seq))
|
|
31
|
+
return rng, int(seed_seq.entropy)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Env:
|
|
35
|
+
"""Minimal environment base class with Gymnasium-style seeding."""
|
|
36
|
+
|
|
37
|
+
metadata: dict[str, Any] = {"render_modes": []}
|
|
38
|
+
render_mode: str | None = None
|
|
39
|
+
|
|
40
|
+
_np_random: np.random.Generator | None = None
|
|
41
|
+
_np_random_seed: int | None = None
|
|
42
|
+
|
|
43
|
+
def reset(self, *, seed: int | None = None, options: dict | None = None):
|
|
44
|
+
if seed is not None:
|
|
45
|
+
self._np_random, self._np_random_seed = np_random(seed)
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def np_random(self) -> np.random.Generator:
|
|
49
|
+
if self._np_random is None:
|
|
50
|
+
self._np_random, self._np_random_seed = np_random()
|
|
51
|
+
return self._np_random
|
|
52
|
+
|
|
53
|
+
@np_random.setter
|
|
54
|
+
def np_random(self, value: np.random.Generator) -> None:
|
|
55
|
+
self._np_random = value
|
|
56
|
+
self._np_random_seed = -1
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def np_random_seed(self) -> int:
|
|
60
|
+
if self._np_random_seed is None:
|
|
61
|
+
self._np_random, self._np_random_seed = np_random()
|
|
62
|
+
return self._np_random_seed
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def unwrapped(self):
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
def close(self) -> None:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class EzPickle:
|
|
73
|
+
"""Pickle objects by replaying their constructor arguments."""
|
|
74
|
+
|
|
75
|
+
def __init__(self, *args: object, **kwargs: object) -> None:
|
|
76
|
+
self._ezpickle_args = args
|
|
77
|
+
self._ezpickle_kwargs = kwargs
|
|
78
|
+
|
|
79
|
+
def __getstate__(self) -> dict[str, Any]:
|
|
80
|
+
return {
|
|
81
|
+
"_ezpickle_args": self._ezpickle_args,
|
|
82
|
+
"_ezpickle_kwargs": self._ezpickle_kwargs,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
|
86
|
+
obj = type(self)(*state["_ezpickle_args"], **state["_ezpickle_kwargs"])
|
|
87
|
+
self.__dict__.update(obj.__dict__)
|
|
88
|
+
|
|
@@ -0,0 +1,834 @@
|
|
|
1
|
+
"""Standalone moving-platform booster landing environment.
|
|
2
|
+
|
|
3
|
+
The physics and API are adapted from Gymnasium's LunarLander v3 under the MIT
|
|
4
|
+
license, then modified to model a vertical booster landing on a floating
|
|
5
|
+
left-right moving platform.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from platform_lander.core import DependencyNotInstalled, Env, EzPickle
|
|
16
|
+
from platform_lander.spaces import Box, Discrete
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import Box2D
|
|
20
|
+
from Box2D.b2 import (
|
|
21
|
+
circleShape,
|
|
22
|
+
contactListener,
|
|
23
|
+
edgeShape,
|
|
24
|
+
fixtureDef,
|
|
25
|
+
polygonShape,
|
|
26
|
+
)
|
|
27
|
+
except ImportError as exc: # pragma: no cover - import-time dependency check
|
|
28
|
+
raise DependencyNotInstalled(
|
|
29
|
+
"Box2D is not installed. Install it with `pip install swig box2d-py`."
|
|
30
|
+
) from exc
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
import pygame
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
FPS = 50
|
|
37
|
+
SCALE = 30.0
|
|
38
|
+
|
|
39
|
+
VIEWPORT_W = 600
|
|
40
|
+
VIEWPORT_H = 400
|
|
41
|
+
|
|
42
|
+
BOTTOM_ENGINE_POWER = 18.0
|
|
43
|
+
TOP_ENGINE_POWER = 1.25
|
|
44
|
+
INITIAL_RANDOM = 700.0
|
|
45
|
+
|
|
46
|
+
BOOSTER_HALF_WIDTH = 8
|
|
47
|
+
BOOSTER_TOP = 52
|
|
48
|
+
BOOSTER_BOTTOM = 56
|
|
49
|
+
BOOSTER_START_CLEARANCE = 0.06
|
|
50
|
+
TOP_ENGINE_Y = 43
|
|
51
|
+
TOP_ENGINE_AWAY = 9
|
|
52
|
+
BOTTOM_ENGINE_Y = 54
|
|
53
|
+
|
|
54
|
+
PLATFORM_WIDTH = 118
|
|
55
|
+
PLATFORM_HEIGHT = 14
|
|
56
|
+
PLATFORM_SPEED = 1.15
|
|
57
|
+
MAX_JET_FIRES = 200
|
|
58
|
+
LANDING_ANGLE = math.radians(8.0)
|
|
59
|
+
LANDING_VX = 0.45
|
|
60
|
+
LANDING_VY = 0.65
|
|
61
|
+
LANDING_ANGULAR_V = 0.55
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ContactDetector(contactListener):
|
|
65
|
+
"""Track booster contacts with the platform and ocean."""
|
|
66
|
+
|
|
67
|
+
def __init__(self, env: "PlatformLander") -> None:
|
|
68
|
+
contactListener.__init__(self)
|
|
69
|
+
self.env = env
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _data(contact) -> tuple[object, object]:
|
|
73
|
+
return contact.fixtureA.userData, contact.fixtureB.userData
|
|
74
|
+
|
|
75
|
+
def BeginContact(self, contact) -> None:
|
|
76
|
+
a, b = self._data(contact)
|
|
77
|
+
labels = {a, b}
|
|
78
|
+
booster_labels = {"booster_body", "left_foot", "right_foot"}
|
|
79
|
+
|
|
80
|
+
if "ocean" in labels and labels.intersection(booster_labels):
|
|
81
|
+
self.env.ocean_contact = True
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
if "platform" not in labels:
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
if "booster_body" in labels:
|
|
88
|
+
self.env.body_platform_contact = True
|
|
89
|
+
self.env.platform_contact = True
|
|
90
|
+
if "left_foot" in labels:
|
|
91
|
+
self.env.left_foot_contact = True
|
|
92
|
+
self.env.platform_contact = True
|
|
93
|
+
if "right_foot" in labels:
|
|
94
|
+
self.env.right_foot_contact = True
|
|
95
|
+
self.env.platform_contact = True
|
|
96
|
+
|
|
97
|
+
def EndContact(self, contact) -> None:
|
|
98
|
+
a, b = self._data(contact)
|
|
99
|
+
labels = {a, b}
|
|
100
|
+
if "platform" not in labels:
|
|
101
|
+
return
|
|
102
|
+
if "left_foot" in labels:
|
|
103
|
+
self.env.left_foot_contact = False
|
|
104
|
+
if "right_foot" in labels:
|
|
105
|
+
self.env.right_foot_contact = False
|
|
106
|
+
if "booster_body" in labels:
|
|
107
|
+
self.env.body_platform_contact = False
|
|
108
|
+
self.env.platform_contact = (
|
|
109
|
+
self.env.left_foot_contact
|
|
110
|
+
or self.env.right_foot_contact
|
|
111
|
+
or self.env.body_platform_contact
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class PlatformLander(Env, EzPickle):
|
|
116
|
+
"""Land a reusable booster vertically on a moving ocean platform.
|
|
117
|
+
|
|
118
|
+
Actions are ``Discrete(4)`` by default:
|
|
119
|
+
|
|
120
|
+
- 0: do nothing
|
|
121
|
+
- 1: fire the upper-left attitude jet
|
|
122
|
+
- 2: fire the bottom engine
|
|
123
|
+
- 3: fire the upper-right attitude jet
|
|
124
|
+
|
|
125
|
+
With ``continuous=True``, actions are ``Box(-1, 1, shape=(2,))`` where the
|
|
126
|
+
first value controls the bottom engine and the second controls the top jets.
|
|
127
|
+
|
|
128
|
+
Observations contain eleven float values:
|
|
129
|
+
relative x/y to the platform landing point, x/y velocity, booster angle,
|
|
130
|
+
angular velocity, left/right foot contact flags, platform x, and platform
|
|
131
|
+
velocity, and the fraction of jet fires remaining. Wind is applied from
|
|
132
|
+
``wind_direction`` with force ``wind_power``. If ``variable_wind=True`` the
|
|
133
|
+
force varies over time using LunarLander v3's deterministic wind pattern.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": FPS}
|
|
137
|
+
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
render_mode: str | None = None,
|
|
141
|
+
continuous: bool = False,
|
|
142
|
+
gravity: float = -10.0,
|
|
143
|
+
enable_wind: bool = False,
|
|
144
|
+
wind_power: float = 15.0,
|
|
145
|
+
wind_direction: float | tuple[float, float] = 0.0,
|
|
146
|
+
turbulence_power: float = 1.5,
|
|
147
|
+
variable_wind: bool = True,
|
|
148
|
+
platform_speed: float = PLATFORM_SPEED,
|
|
149
|
+
max_jet_fires: int = MAX_JET_FIRES,
|
|
150
|
+
) -> None:
|
|
151
|
+
EzPickle.__init__(
|
|
152
|
+
self,
|
|
153
|
+
render_mode,
|
|
154
|
+
continuous,
|
|
155
|
+
gravity,
|
|
156
|
+
enable_wind,
|
|
157
|
+
wind_power,
|
|
158
|
+
wind_direction,
|
|
159
|
+
turbulence_power,
|
|
160
|
+
variable_wind,
|
|
161
|
+
platform_speed,
|
|
162
|
+
max_jet_fires,
|
|
163
|
+
)
|
|
164
|
+
if not -12.0 < gravity < 0.0:
|
|
165
|
+
raise ValueError(f"gravity must be between -12 and 0, got {gravity}")
|
|
166
|
+
|
|
167
|
+
self.gravity = gravity
|
|
168
|
+
self.continuous = continuous
|
|
169
|
+
self.enable_wind = enable_wind
|
|
170
|
+
self.wind_power = float(wind_power)
|
|
171
|
+
self.wind_direction = wind_direction
|
|
172
|
+
self.turbulence_power = float(turbulence_power)
|
|
173
|
+
self.variable_wind = variable_wind
|
|
174
|
+
self.platform_speed = float(platform_speed)
|
|
175
|
+
self.max_jet_fires = int(max_jet_fires)
|
|
176
|
+
self.render_mode = render_mode
|
|
177
|
+
|
|
178
|
+
self.screen: pygame.Surface | None = None
|
|
179
|
+
self.clock = None
|
|
180
|
+
self.isopen = True
|
|
181
|
+
|
|
182
|
+
self.world = Box2D.b2World(gravity=(0, gravity))
|
|
183
|
+
self.booster: Box2D.b2Body | None = None
|
|
184
|
+
self.platform: Box2D.b2Body | None = None
|
|
185
|
+
self.ocean: Box2D.b2Body | None = None
|
|
186
|
+
self.bottom_flame_power = 0.0
|
|
187
|
+
self.top_flame_power = 0.0
|
|
188
|
+
self.top_flame_direction = 0
|
|
189
|
+
self.jet_fires_used = 0
|
|
190
|
+
self.prev_shaping = None
|
|
191
|
+
|
|
192
|
+
low = np.array(
|
|
193
|
+
[-2.5, -2.5, -10.0, -10.0, -2 * math.pi, -10.0, 0.0, 0.0, -1.0, -2.0, 0.0],
|
|
194
|
+
dtype=np.float32,
|
|
195
|
+
)
|
|
196
|
+
high = np.array(
|
|
197
|
+
[2.5, 2.5, 10.0, 10.0, 2 * math.pi, 10.0, 1.0, 1.0, 1.0, 2.0, 1.0],
|
|
198
|
+
dtype=np.float32,
|
|
199
|
+
)
|
|
200
|
+
self.observation_space = Box(low, high, dtype=np.float32)
|
|
201
|
+
self.action_space = (
|
|
202
|
+
Box(-1.0, 1.0, shape=(2,), dtype=np.float32)
|
|
203
|
+
if continuous
|
|
204
|
+
else Discrete(4)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _destroy(self) -> None:
|
|
208
|
+
self.world.contactListener = None
|
|
209
|
+
for body_name in ("booster", "platform", "ocean"):
|
|
210
|
+
body = getattr(self, body_name)
|
|
211
|
+
if body is not None:
|
|
212
|
+
self.world.DestroyBody(body)
|
|
213
|
+
setattr(self, body_name, None)
|
|
214
|
+
|
|
215
|
+
def reset(self, *, seed: int | None = None, options: dict | None = None):
|
|
216
|
+
super().reset(seed=seed)
|
|
217
|
+
self._destroy()
|
|
218
|
+
|
|
219
|
+
self.world = Box2D.b2World(gravity=(0, self.gravity))
|
|
220
|
+
self.world.contactListener_keepref = ContactDetector(self)
|
|
221
|
+
self.world.contactListener = self.world.contactListener_keepref
|
|
222
|
+
|
|
223
|
+
self.ocean_contact = False
|
|
224
|
+
self.platform_contact = False
|
|
225
|
+
self.body_platform_contact = False
|
|
226
|
+
self.left_foot_contact = False
|
|
227
|
+
self.right_foot_contact = False
|
|
228
|
+
self.failure_reason: str | None = None
|
|
229
|
+
self.prev_shaping = None
|
|
230
|
+
self.bottom_flame_power = 0.0
|
|
231
|
+
self.top_flame_power = 0.0
|
|
232
|
+
self.top_flame_direction = 0
|
|
233
|
+
self.jet_fires_used = 0
|
|
234
|
+
|
|
235
|
+
w = VIEWPORT_W / SCALE
|
|
236
|
+
h = VIEWPORT_H / SCALE
|
|
237
|
+
self.platform_y = h / 4
|
|
238
|
+
self.platform_half_width = PLATFORM_WIDTH / SCALE / 2
|
|
239
|
+
self.platform_half_height = PLATFORM_HEIGHT / SCALE / 2
|
|
240
|
+
self.platform_min_x = self.platform_half_width + 0.25
|
|
241
|
+
self.platform_max_x = w - self.platform_half_width - 0.25
|
|
242
|
+
self.platform_direction = int(self.np_random.choice([-1, 1]))
|
|
243
|
+
|
|
244
|
+
platform_x = float(self.np_random.uniform(self.platform_min_x, self.platform_max_x))
|
|
245
|
+
self.platform = self.world.CreateKinematicBody(position=(platform_x, self.platform_y))
|
|
246
|
+
platform_fixture = self.platform.CreateFixture(
|
|
247
|
+
fixtureDef(
|
|
248
|
+
shape=polygonShape(box=(self.platform_half_width, self.platform_half_height)),
|
|
249
|
+
density=0.0,
|
|
250
|
+
friction=0.9,
|
|
251
|
+
restitution=0.0,
|
|
252
|
+
categoryBits=0x0001,
|
|
253
|
+
)
|
|
254
|
+
)
|
|
255
|
+
platform_fixture.userData = "platform"
|
|
256
|
+
self.platform.color1 = (38, 42, 48)
|
|
257
|
+
self.platform.color2 = (235, 235, 235)
|
|
258
|
+
|
|
259
|
+
self.ocean_y = self.platform_y - self.platform_half_height * 0.75
|
|
260
|
+
self.ocean = self.world.CreateStaticBody()
|
|
261
|
+
ocean_fixture = self.ocean.CreateFixture(
|
|
262
|
+
fixtureDef(
|
|
263
|
+
shape=edgeShape(vertices=[(0, self.ocean_y), (w, self.ocean_y)]),
|
|
264
|
+
isSensor=True,
|
|
265
|
+
categoryBits=0x0001,
|
|
266
|
+
)
|
|
267
|
+
)
|
|
268
|
+
ocean_fixture.userData = "ocean"
|
|
269
|
+
|
|
270
|
+
initial_x = w / 2
|
|
271
|
+
initial_y = h + BOOSTER_BOTTOM / SCALE + BOOSTER_START_CLEARANCE
|
|
272
|
+
self.booster = self.world.CreateDynamicBody(position=(initial_x, initial_y), angle=0.0)
|
|
273
|
+
body_fixture = self.booster.CreateFixture(
|
|
274
|
+
fixtureDef(
|
|
275
|
+
shape=polygonShape(
|
|
276
|
+
vertices=[
|
|
277
|
+
(-BOOSTER_HALF_WIDTH / SCALE, -48 / SCALE),
|
|
278
|
+
(-BOOSTER_HALF_WIDTH / SCALE, 43 / SCALE),
|
|
279
|
+
(-5 / SCALE, BOOSTER_TOP / SCALE),
|
|
280
|
+
(5 / SCALE, BOOSTER_TOP / SCALE),
|
|
281
|
+
(BOOSTER_HALF_WIDTH / SCALE, 43 / SCALE),
|
|
282
|
+
(BOOSTER_HALF_WIDTH / SCALE, -48 / SCALE),
|
|
283
|
+
]
|
|
284
|
+
),
|
|
285
|
+
density=4.5,
|
|
286
|
+
friction=0.25,
|
|
287
|
+
categoryBits=0x0010,
|
|
288
|
+
maskBits=0x0001,
|
|
289
|
+
restitution=0.0,
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
body_fixture.userData = "booster_body"
|
|
293
|
+
|
|
294
|
+
left_foot_fixture = self.booster.CreateFixture(
|
|
295
|
+
fixtureDef(
|
|
296
|
+
shape=polygonShape(box=(8 / SCALE, 2 / SCALE, (-7 / SCALE, -54 / SCALE), 0.0)),
|
|
297
|
+
density=0.7,
|
|
298
|
+
friction=1.0,
|
|
299
|
+
categoryBits=0x0010,
|
|
300
|
+
maskBits=0x0001,
|
|
301
|
+
restitution=0.0,
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
left_foot_fixture.userData = "left_foot"
|
|
305
|
+
|
|
306
|
+
right_foot_fixture = self.booster.CreateFixture(
|
|
307
|
+
fixtureDef(
|
|
308
|
+
shape=polygonShape(box=(8 / SCALE, 2 / SCALE, (7 / SCALE, -54 / SCALE), 0.0)),
|
|
309
|
+
density=0.7,
|
|
310
|
+
friction=1.0,
|
|
311
|
+
categoryBits=0x0010,
|
|
312
|
+
maskBits=0x0001,
|
|
313
|
+
restitution=0.0,
|
|
314
|
+
)
|
|
315
|
+
)
|
|
316
|
+
right_foot_fixture.userData = "right_foot"
|
|
317
|
+
|
|
318
|
+
self.booster.color1 = (230, 232, 235)
|
|
319
|
+
self.booster.color2 = (40, 44, 52)
|
|
320
|
+
|
|
321
|
+
self.booster.ApplyForceToCenter(
|
|
322
|
+
(
|
|
323
|
+
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
|
|
324
|
+
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
|
|
325
|
+
),
|
|
326
|
+
True,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
if self.enable_wind:
|
|
330
|
+
self.wind_idx = int(self.np_random.integers(-9999, 9999))
|
|
331
|
+
self.torque_idx = int(self.np_random.integers(-9999, 9999))
|
|
332
|
+
|
|
333
|
+
self.drawlist = [self.platform, self.booster]
|
|
334
|
+
|
|
335
|
+
if self.render_mode == "human":
|
|
336
|
+
self.render()
|
|
337
|
+
return self.step(np.array([0.0, 0.0], dtype=np.float32) if self.continuous else 0)[0], {}
|
|
338
|
+
|
|
339
|
+
def _wind_unit(self) -> tuple[float, float]:
|
|
340
|
+
if isinstance(self.wind_direction, tuple):
|
|
341
|
+
x, y = self.wind_direction
|
|
342
|
+
norm = math.hypot(x, y)
|
|
343
|
+
return (0.0, 0.0) if norm == 0 else (x / norm, y / norm)
|
|
344
|
+
return math.cos(float(self.wind_direction)), math.sin(float(self.wind_direction))
|
|
345
|
+
|
|
346
|
+
def _wind_scale(self) -> float:
|
|
347
|
+
if not self.variable_wind:
|
|
348
|
+
return 1.0
|
|
349
|
+
scale = math.tanh(
|
|
350
|
+
math.sin(0.02 * self.wind_idx)
|
|
351
|
+
+ math.sin(math.pi * 0.01 * self.wind_idx)
|
|
352
|
+
)
|
|
353
|
+
self.wind_idx += 1
|
|
354
|
+
return scale
|
|
355
|
+
|
|
356
|
+
def set_wind(
|
|
357
|
+
self,
|
|
358
|
+
*,
|
|
359
|
+
power: float | None = None,
|
|
360
|
+
direction: float | tuple[float, float] | None = None,
|
|
361
|
+
enabled: bool | None = None,
|
|
362
|
+
) -> None:
|
|
363
|
+
"""Adjust wind during an episode."""
|
|
364
|
+
|
|
365
|
+
if power is not None:
|
|
366
|
+
self.wind_power = float(power)
|
|
367
|
+
if direction is not None:
|
|
368
|
+
self.wind_direction = direction
|
|
369
|
+
if enabled is not None:
|
|
370
|
+
self.enable_wind = bool(enabled)
|
|
371
|
+
|
|
372
|
+
def _update_platform(self) -> None:
|
|
373
|
+
assert self.platform is not None
|
|
374
|
+
x = self.platform.position.x
|
|
375
|
+
if x <= self.platform_min_x + 1e-6 and self.platform_direction < 0:
|
|
376
|
+
self.platform_direction = 1
|
|
377
|
+
elif x >= self.platform_max_x - 1e-6 and self.platform_direction > 0:
|
|
378
|
+
self.platform_direction = -1
|
|
379
|
+
self.platform.linearVelocity = (self.platform_speed * self.platform_direction, 0.0)
|
|
380
|
+
|
|
381
|
+
def _clamp_platform(self) -> None:
|
|
382
|
+
assert self.platform is not None
|
|
383
|
+
x = min(max(self.platform.position.x, self.platform_min_x), self.platform_max_x)
|
|
384
|
+
if x != self.platform.position.x:
|
|
385
|
+
if x <= self.platform_min_x + 1e-6:
|
|
386
|
+
self.platform_direction = 1
|
|
387
|
+
elif x >= self.platform_max_x - 1e-6:
|
|
388
|
+
self.platform_direction = -1
|
|
389
|
+
self.platform.position = (x, self.platform.position.y)
|
|
390
|
+
self.platform.linearVelocity = (self.platform_speed * self.platform_direction, 0.0)
|
|
391
|
+
|
|
392
|
+
def _apply_wind(self) -> None:
|
|
393
|
+
assert self.booster is not None
|
|
394
|
+
if not self.enable_wind or self.platform_contact:
|
|
395
|
+
return
|
|
396
|
+
wx, wy = self._wind_unit()
|
|
397
|
+
wind_mag = self.wind_power * self._wind_scale()
|
|
398
|
+
self.booster.ApplyForceToCenter((wx * wind_mag, wy * wind_mag), True)
|
|
399
|
+
|
|
400
|
+
torque_mag = math.tanh(
|
|
401
|
+
math.sin(0.02 * self.torque_idx)
|
|
402
|
+
+ math.sin(math.pi * 0.01 * self.torque_idx)
|
|
403
|
+
) * self.turbulence_power
|
|
404
|
+
self.torque_idx += 1
|
|
405
|
+
self.booster.ApplyTorque(torque_mag, True)
|
|
406
|
+
|
|
407
|
+
def _apply_engines(self, action) -> tuple[float, float]:
|
|
408
|
+
assert self.booster is not None
|
|
409
|
+
|
|
410
|
+
if self.continuous:
|
|
411
|
+
action = np.clip(action, -1, +1).astype(np.float64)
|
|
412
|
+
else:
|
|
413
|
+
if not self.action_space.contains(action):
|
|
414
|
+
raise AssertionError(f"{action!r} ({type(action)}) is not a valid action")
|
|
415
|
+
|
|
416
|
+
tip = (math.sin(self.booster.angle), math.cos(self.booster.angle))
|
|
417
|
+
side = (-tip[1], tip[0])
|
|
418
|
+
dispersion = [self.np_random.uniform(-1.0, 1.0) / SCALE for _ in range(2)]
|
|
419
|
+
|
|
420
|
+
bottom_power = 0.0
|
|
421
|
+
if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
|
|
422
|
+
if self.jet_fires_used < self.max_jet_fires:
|
|
423
|
+
bottom_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 if self.continuous else 1.0
|
|
424
|
+
self.jet_fires_used += 1
|
|
425
|
+
ox = -tip[0] * (BOTTOM_ENGINE_Y / SCALE + dispersion[0]) + side[0] * dispersion[1]
|
|
426
|
+
oy = -tip[1] * (BOTTOM_ENGINE_Y / SCALE + dispersion[0]) + side[1] * dispersion[1]
|
|
427
|
+
impulse_pos = (self.booster.position[0] + ox, self.booster.position[1] + oy)
|
|
428
|
+
impulse = (tip[0] * BOTTOM_ENGINE_POWER * bottom_power, tip[1] * BOTTOM_ENGINE_POWER * bottom_power)
|
|
429
|
+
self.booster.ApplyLinearImpulse(impulse, impulse_pos, True)
|
|
430
|
+
|
|
431
|
+
top_power = 0.0
|
|
432
|
+
top_direction = 0
|
|
433
|
+
if self.continuous and abs(action[1]) > 0.5:
|
|
434
|
+
top_direction = -1 if action[1] < 0 else 1
|
|
435
|
+
top_power = float(np.clip(abs(action[1]), 0.5, 1.0))
|
|
436
|
+
elif not self.continuous and action in (1, 3):
|
|
437
|
+
top_direction = -1 if action == 1 else 1
|
|
438
|
+
top_power = 1.0
|
|
439
|
+
|
|
440
|
+
if top_direction and self.jet_fires_used < self.max_jet_fires:
|
|
441
|
+
# top_direction < 0 means upper-left jet, > 0 means upper-right jet.
|
|
442
|
+
self.jet_fires_used += 1
|
|
443
|
+
local_side = TOP_ENGINE_AWAY / SCALE * (-top_direction)
|
|
444
|
+
impulse_sign = top_direction
|
|
445
|
+
ox = tip[0] * (TOP_ENGINE_Y / SCALE + dispersion[0]) + side[0] * local_side
|
|
446
|
+
oy = tip[1] * (TOP_ENGINE_Y / SCALE + dispersion[0]) + side[1] * local_side
|
|
447
|
+
impulse_pos = (self.booster.position[0] + ox, self.booster.position[1] + oy)
|
|
448
|
+
impulse = (
|
|
449
|
+
side[0] * impulse_sign * TOP_ENGINE_POWER * top_power,
|
|
450
|
+
side[1] * impulse_sign * TOP_ENGINE_POWER * top_power,
|
|
451
|
+
)
|
|
452
|
+
self.booster.ApplyLinearImpulse(impulse, impulse_pos, True)
|
|
453
|
+
elif top_direction:
|
|
454
|
+
top_power = 0.0
|
|
455
|
+
|
|
456
|
+
self.bottom_flame_power = max(bottom_power, self.bottom_flame_power * 0.68)
|
|
457
|
+
if top_direction:
|
|
458
|
+
self.top_flame_direction = top_direction
|
|
459
|
+
self.top_flame_power = max(top_power, self.top_flame_power * 0.68)
|
|
460
|
+
|
|
461
|
+
return float(bottom_power), float(top_power)
|
|
462
|
+
|
|
463
|
+
def _get_state(self) -> np.ndarray:
|
|
464
|
+
assert self.booster is not None
|
|
465
|
+
assert self.platform is not None
|
|
466
|
+
|
|
467
|
+
pos = self.booster.position
|
|
468
|
+
vel = self.booster.linearVelocity
|
|
469
|
+
platform_pos = self.platform.position
|
|
470
|
+
platform_vel = self.platform.linearVelocity
|
|
471
|
+
target_y = self.platform_y + self.platform_half_height + BOOSTER_BOTTOM / SCALE
|
|
472
|
+
half_w = VIEWPORT_W / SCALE / 2
|
|
473
|
+
half_h = VIEWPORT_H / SCALE / 2
|
|
474
|
+
|
|
475
|
+
state = np.array(
|
|
476
|
+
[
|
|
477
|
+
(pos.x - platform_pos.x) / half_w,
|
|
478
|
+
(pos.y - target_y) / half_h,
|
|
479
|
+
(vel.x - platform_vel.x) * half_w / FPS,
|
|
480
|
+
vel.y * half_h / FPS,
|
|
481
|
+
self._normalized_angle(self.booster.angle),
|
|
482
|
+
20.0 * self.booster.angularVelocity / FPS,
|
|
483
|
+
1.0 if self.left_foot_contact else 0.0,
|
|
484
|
+
1.0 if self.right_foot_contact else 0.0,
|
|
485
|
+
(platform_pos.x - half_w) / half_w,
|
|
486
|
+
platform_vel.x,
|
|
487
|
+
max(0.0, (self.max_jet_fires - self.jet_fires_used) / self.max_jet_fires),
|
|
488
|
+
],
|
|
489
|
+
dtype=np.float32,
|
|
490
|
+
)
|
|
491
|
+
return state
|
|
492
|
+
|
|
493
|
+
@staticmethod
|
|
494
|
+
def _normalized_angle(angle: float) -> float:
|
|
495
|
+
return (angle + math.pi) % (2 * math.pi) - math.pi
|
|
496
|
+
|
|
497
|
+
def _is_standing_landing(self, state: np.ndarray) -> bool:
|
|
498
|
+
assert self.booster is not None
|
|
499
|
+
return bool(
|
|
500
|
+
self.left_foot_contact
|
|
501
|
+
and self.right_foot_contact
|
|
502
|
+
and abs(state[0]) < (self.platform_half_width - 0.15) / (VIEWPORT_W / SCALE / 2)
|
|
503
|
+
and abs(state[2]) < LANDING_VX
|
|
504
|
+
and abs(state[3]) < LANDING_VY
|
|
505
|
+
and abs(state[4]) < LANDING_ANGLE
|
|
506
|
+
and abs(self.booster.angularVelocity) < LANDING_ANGULAR_V
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
def _terminal_status(self, state: np.ndarray) -> tuple[bool, bool, str | None]:
|
|
510
|
+
assert self.booster is not None
|
|
511
|
+
if self.ocean_contact or self.booster.position.y < self.ocean_y - 0.5:
|
|
512
|
+
return True, False, "ocean"
|
|
513
|
+
if abs(state[0]) > 2.0:
|
|
514
|
+
return True, False, "out_of_bounds"
|
|
515
|
+
if self.body_platform_contact:
|
|
516
|
+
return True, False, "booster_body_hit_platform"
|
|
517
|
+
if self.platform_contact and abs(state[4]) >= LANDING_ANGLE:
|
|
518
|
+
return True, False, "non_vertical_platform_contact"
|
|
519
|
+
if self._is_standing_landing(state) and (not self.booster.awake or abs(state[3]) < 0.08):
|
|
520
|
+
return True, True, None
|
|
521
|
+
if not self.booster.awake:
|
|
522
|
+
return True, False, "settled_not_vertical"
|
|
523
|
+
return False, False, None
|
|
524
|
+
|
|
525
|
+
def _settle_successful_landing(self) -> None:
|
|
526
|
+
assert self.booster is not None
|
|
527
|
+
assert self.platform is not None
|
|
528
|
+
target_y = self.platform_y + self.platform_half_height + BOOSTER_BOTTOM / SCALE
|
|
529
|
+
platform_x = self.platform.position.x
|
|
530
|
+
half_width = self.platform_half_width - 0.15
|
|
531
|
+
x = float(np.clip(self.booster.position.x, platform_x - half_width, platform_x + half_width))
|
|
532
|
+
|
|
533
|
+
self.booster.position = (x, target_y)
|
|
534
|
+
self.booster.angle = 0.0
|
|
535
|
+
self.booster.linearVelocity = self.platform.linearVelocity
|
|
536
|
+
self.booster.angularVelocity = 0.0
|
|
537
|
+
self.booster.awake = False
|
|
538
|
+
self.left_foot_contact = True
|
|
539
|
+
self.right_foot_contact = True
|
|
540
|
+
self.body_platform_contact = False
|
|
541
|
+
self.platform_contact = True
|
|
542
|
+
|
|
543
|
+
def step(self, action):
|
|
544
|
+
if self.booster is None:
|
|
545
|
+
raise AssertionError("You forgot to call reset()")
|
|
546
|
+
|
|
547
|
+
self._update_platform()
|
|
548
|
+
self._apply_wind()
|
|
549
|
+
bottom_power, top_power = self._apply_engines(action)
|
|
550
|
+
|
|
551
|
+
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
|
|
552
|
+
self._clamp_platform()
|
|
553
|
+
|
|
554
|
+
state = self._get_state()
|
|
555
|
+
shaping = (
|
|
556
|
+
-100 * math.sqrt(state[0] * state[0] + state[1] * state[1])
|
|
557
|
+
-100 * math.sqrt(state[2] * state[2] + state[3] * state[3])
|
|
558
|
+
-120 * abs(state[4])
|
|
559
|
+
+ 12 * state[6]
|
|
560
|
+
+ 12 * state[7]
|
|
561
|
+
)
|
|
562
|
+
reward = 0.0 if self.prev_shaping is None else float(shaping - self.prev_shaping)
|
|
563
|
+
self.prev_shaping = shaping
|
|
564
|
+
reward -= bottom_power * 0.30
|
|
565
|
+
reward -= top_power * 0.03
|
|
566
|
+
|
|
567
|
+
terminated, success, failure_reason = self._terminal_status(state)
|
|
568
|
+
self.failure_reason = failure_reason
|
|
569
|
+
if terminated:
|
|
570
|
+
reward = 100.0 if success else -100.0
|
|
571
|
+
if success:
|
|
572
|
+
self._settle_successful_landing()
|
|
573
|
+
state = self._get_state()
|
|
574
|
+
|
|
575
|
+
if self.render_mode == "human":
|
|
576
|
+
self.render()
|
|
577
|
+
|
|
578
|
+
info = {
|
|
579
|
+
"success": success,
|
|
580
|
+
"failure_reason": failure_reason,
|
|
581
|
+
"platform_x": float(self.platform.position.x) if self.platform else None,
|
|
582
|
+
"wind_power": self.wind_power if self.enable_wind else 0.0,
|
|
583
|
+
"wind_direction": self.wind_direction,
|
|
584
|
+
"jet_fires_used": self.jet_fires_used,
|
|
585
|
+
"jet_fires_remaining": max(0, self.max_jet_fires - self.jet_fires_used),
|
|
586
|
+
}
|
|
587
|
+
return state, reward, terminated, False, info
|
|
588
|
+
|
|
589
|
+
def render(self):
|
|
590
|
+
if self.render_mode is None:
|
|
591
|
+
return None
|
|
592
|
+
|
|
593
|
+
try:
|
|
594
|
+
import pygame
|
|
595
|
+
from pygame import gfxdraw
|
|
596
|
+
except ImportError as exc: # pragma: no cover - optional rendering dependency
|
|
597
|
+
raise DependencyNotInstalled("pygame is not installed. Install it with `pip install pygame`.") from exc
|
|
598
|
+
|
|
599
|
+
if self.screen is None and self.render_mode == "human":
|
|
600
|
+
pygame.init()
|
|
601
|
+
pygame.display.init()
|
|
602
|
+
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
|
|
603
|
+
if self.clock is None:
|
|
604
|
+
self.clock = pygame.time.Clock()
|
|
605
|
+
|
|
606
|
+
surf = pygame.Surface((VIEWPORT_W, VIEWPORT_H))
|
|
607
|
+
surf.fill((186, 220, 235))
|
|
608
|
+
w = VIEWPORT_W / SCALE
|
|
609
|
+
h = VIEWPORT_H / SCALE
|
|
610
|
+
|
|
611
|
+
ocean_top = (self.ocean_y if hasattr(self, "ocean_y") else h / 4) * SCALE
|
|
612
|
+
ocean_rect = pygame.Rect(0, 0, VIEWPORT_W, ocean_top)
|
|
613
|
+
pygame.draw.rect(surf, (31, 91, 133), ocean_rect)
|
|
614
|
+
for i in range(0, VIEWPORT_W, 48):
|
|
615
|
+
pygame.draw.arc(surf, (62, 132, 176), (i, ocean_top - 8, 56, 18), 0, math.pi, 2)
|
|
616
|
+
|
|
617
|
+
self._draw_flames(surf, pygame, gfxdraw)
|
|
618
|
+
|
|
619
|
+
for obj in self.drawlist:
|
|
620
|
+
for fixture in obj.fixtures:
|
|
621
|
+
if fixture.userData == "ocean":
|
|
622
|
+
continue
|
|
623
|
+
trans = fixture.body.transform
|
|
624
|
+
if type(fixture.shape) is circleShape:
|
|
625
|
+
center = trans * fixture.shape.pos * SCALE
|
|
626
|
+
radius = max(2, int(fixture.shape.radius * SCALE))
|
|
627
|
+
pygame.draw.circle(surf, obj.color1, center, radius)
|
|
628
|
+
pygame.draw.circle(surf, obj.color2, center, max(1, radius - 1))
|
|
629
|
+
continue
|
|
630
|
+
|
|
631
|
+
path = [trans * vertex * SCALE for vertex in fixture.shape.vertices]
|
|
632
|
+
color = obj.color1
|
|
633
|
+
outline = obj.color2
|
|
634
|
+
if fixture.userData == "platform":
|
|
635
|
+
color = (45, 48, 54)
|
|
636
|
+
outline = (235, 235, 235)
|
|
637
|
+
elif fixture.userData in {"left_foot", "right_foot"}:
|
|
638
|
+
color = (42, 46, 54)
|
|
639
|
+
outline = (245, 245, 245)
|
|
640
|
+
pygame.draw.polygon(surf, color, path)
|
|
641
|
+
gfxdraw.aapolygon(surf, path, color)
|
|
642
|
+
pygame.draw.aalines(surf, outline, True, path)
|
|
643
|
+
|
|
644
|
+
self._draw_booster_details(surf, pygame)
|
|
645
|
+
surf = pygame.transform.flip(surf, False, True)
|
|
646
|
+
|
|
647
|
+
if self.render_mode == "human":
|
|
648
|
+
assert self.screen is not None
|
|
649
|
+
self.screen.blit(surf, (0, 0))
|
|
650
|
+
pygame.event.pump()
|
|
651
|
+
self.clock.tick(self.metadata["render_fps"])
|
|
652
|
+
pygame.display.flip()
|
|
653
|
+
return None
|
|
654
|
+
if self.render_mode == "rgb_array":
|
|
655
|
+
return np.transpose(np.array(pygame.surfarray.pixels3d(surf)), axes=(1, 0, 2))
|
|
656
|
+
return None
|
|
657
|
+
|
|
658
|
+
def _draw_flames(self, surf, pygame, gfxdraw) -> None:
|
|
659
|
+
if self.booster is None:
|
|
660
|
+
return
|
|
661
|
+
|
|
662
|
+
layer = pygame.Surface((VIEWPORT_W, VIEWPORT_H), pygame.SRCALPHA)
|
|
663
|
+
|
|
664
|
+
if self.bottom_flame_power > 0.03:
|
|
665
|
+
nozzle, direction = self._local_flame_anchor(
|
|
666
|
+
(0.0, -BOTTOM_ENGINE_Y / SCALE),
|
|
667
|
+
(0.0, -(BOTTOM_ENGINE_Y + 1.0) / SCALE),
|
|
668
|
+
)
|
|
669
|
+
self._draw_flame_cone(
|
|
670
|
+
layer,
|
|
671
|
+
pygame,
|
|
672
|
+
gfxdraw,
|
|
673
|
+
nozzle=nozzle,
|
|
674
|
+
direction=direction,
|
|
675
|
+
power=self.bottom_flame_power,
|
|
676
|
+
length_px=34,
|
|
677
|
+
base_width_px=7,
|
|
678
|
+
end_width_px=20,
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
if self.top_flame_power > 0.03 and self.top_flame_direction:
|
|
682
|
+
local_x = TOP_ENGINE_AWAY / SCALE * (-self.top_flame_direction)
|
|
683
|
+
nozzle, direction = self._local_flame_anchor(
|
|
684
|
+
(local_x, TOP_ENGINE_Y / SCALE),
|
|
685
|
+
(
|
|
686
|
+
local_x - self.top_flame_direction / SCALE,
|
|
687
|
+
TOP_ENGINE_Y / SCALE,
|
|
688
|
+
),
|
|
689
|
+
)
|
|
690
|
+
self._draw_flame_cone(
|
|
691
|
+
layer,
|
|
692
|
+
pygame,
|
|
693
|
+
gfxdraw,
|
|
694
|
+
nozzle=nozzle,
|
|
695
|
+
direction=direction,
|
|
696
|
+
power=self.top_flame_power,
|
|
697
|
+
length_px=18,
|
|
698
|
+
base_width_px=4,
|
|
699
|
+
end_width_px=10,
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
surf.blit(layer, (0, 0))
|
|
703
|
+
|
|
704
|
+
def _local_flame_anchor(
|
|
705
|
+
self,
|
|
706
|
+
nozzle_local: tuple[float, float],
|
|
707
|
+
direction_local: tuple[float, float],
|
|
708
|
+
) -> tuple[tuple[float, float], tuple[float, float]]:
|
|
709
|
+
assert self.booster is not None
|
|
710
|
+
nozzle_world = self.booster.transform * nozzle_local
|
|
711
|
+
direction_world = self.booster.transform * direction_local
|
|
712
|
+
nozzle = (nozzle_world[0] * SCALE, nozzle_world[1] * SCALE)
|
|
713
|
+
direction = (
|
|
714
|
+
direction_world[0] * SCALE - nozzle[0],
|
|
715
|
+
direction_world[1] * SCALE - nozzle[1],
|
|
716
|
+
)
|
|
717
|
+
return nozzle, direction
|
|
718
|
+
|
|
719
|
+
@staticmethod
|
|
720
|
+
def _draw_flame_cone(
|
|
721
|
+
layer,
|
|
722
|
+
pygame,
|
|
723
|
+
gfxdraw,
|
|
724
|
+
*,
|
|
725
|
+
nozzle: tuple[float, float],
|
|
726
|
+
direction: tuple[float, float],
|
|
727
|
+
power: float,
|
|
728
|
+
length_px: float,
|
|
729
|
+
base_width_px: float,
|
|
730
|
+
end_width_px: float,
|
|
731
|
+
) -> None:
|
|
732
|
+
direction_vec = np.array(direction, dtype=np.float64)
|
|
733
|
+
norm = np.linalg.norm(direction_vec)
|
|
734
|
+
if norm == 0:
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
direction_vec /= norm
|
|
738
|
+
perp = np.array([-direction_vec[1], direction_vec[0]])
|
|
739
|
+
power = float(np.clip(power, 0.0, 1.0))
|
|
740
|
+
start = np.array(nozzle, dtype=np.float64)
|
|
741
|
+
end = start + direction_vec * length_px * (0.45 + 0.55 * power)
|
|
742
|
+
|
|
743
|
+
def poly(base_width: float, end_width: float, length_scale: float) -> list[tuple[int, int]]:
|
|
744
|
+
scaled_end = start + (end - start) * length_scale
|
|
745
|
+
return [
|
|
746
|
+
tuple(np.round(start + perp * base_width / 2).astype(int)),
|
|
747
|
+
tuple(np.round(start - perp * base_width / 2).astype(int)),
|
|
748
|
+
tuple(np.round(scaled_end - perp * end_width / 2).astype(int)),
|
|
749
|
+
tuple(np.round(scaled_end + perp * end_width / 2).astype(int)),
|
|
750
|
+
]
|
|
751
|
+
|
|
752
|
+
alpha = int(210 * power)
|
|
753
|
+
outer = poly(base_width_px, end_width_px, 1.0)
|
|
754
|
+
middle = poly(base_width_px * 0.65, end_width_px * 0.58, 0.78)
|
|
755
|
+
inner = poly(base_width_px * 0.32, end_width_px * 0.24, 0.48)
|
|
756
|
+
|
|
757
|
+
pygame.draw.polygon(layer, (69, 39, 24, int(120 * power)), outer)
|
|
758
|
+
gfxdraw.aapolygon(layer, outer, (69, 39, 24, int(120 * power)))
|
|
759
|
+
pygame.draw.polygon(layer, (255, 112, 28, alpha), middle)
|
|
760
|
+
gfxdraw.aapolygon(layer, middle, (255, 112, 28, alpha))
|
|
761
|
+
pygame.draw.polygon(layer, (255, 235, 132, int(245 * power)), inner)
|
|
762
|
+
gfxdraw.aapolygon(layer, inner, (255, 235, 132, int(245 * power)))
|
|
763
|
+
|
|
764
|
+
def _draw_booster_details(self, surf, pygame) -> None:
|
|
765
|
+
if self.booster is None:
|
|
766
|
+
return
|
|
767
|
+
trans = self.booster.transform
|
|
768
|
+
stripe_points = [
|
|
769
|
+
(-6 / SCALE, 20 / SCALE),
|
|
770
|
+
(6 / SCALE, 20 / SCALE),
|
|
771
|
+
(6 / SCALE, 28 / SCALE),
|
|
772
|
+
(-6 / SCALE, 28 / SCALE),
|
|
773
|
+
]
|
|
774
|
+
stripe = [trans * point * SCALE for point in stripe_points]
|
|
775
|
+
pygame.draw.polygon(surf, (35, 40, 48), stripe)
|
|
776
|
+
|
|
777
|
+
logo_center = trans * (0, 35 / SCALE) * SCALE
|
|
778
|
+
font_rect = pygame.Rect(0, 0, 18, 7)
|
|
779
|
+
font_rect.center = logo_center
|
|
780
|
+
pygame.draw.rect(surf, (35, 40, 48), font_rect, border_radius=1)
|
|
781
|
+
|
|
782
|
+
def close(self) -> None:
|
|
783
|
+
if self.screen is not None:
|
|
784
|
+
import pygame
|
|
785
|
+
|
|
786
|
+
pygame.display.quit()
|
|
787
|
+
pygame.quit()
|
|
788
|
+
self.isopen = False
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
def heuristic(env: PlatformLander, state: np.ndarray):
|
|
792
|
+
"""A simple controller for quick smoke tests and demos."""
|
|
793
|
+
|
|
794
|
+
angle_target = state[0] * 0.55 + state[2] * 0.9
|
|
795
|
+
angle_target = float(np.clip(angle_target, -0.35, 0.35))
|
|
796
|
+
hover_target = 0.45 * abs(state[0])
|
|
797
|
+
|
|
798
|
+
angle_todo = (angle_target - state[4]) * 0.55 - state[5] * 0.85
|
|
799
|
+
hover_todo = (hover_target - state[1]) * 0.55 - state[3] * 0.55
|
|
800
|
+
|
|
801
|
+
if state[6] or state[7]:
|
|
802
|
+
angle_todo = -state[4] * 0.9 - state[5] * 0.5
|
|
803
|
+
hover_todo = -state[3] * 0.5
|
|
804
|
+
|
|
805
|
+
if env.unwrapped.continuous:
|
|
806
|
+
action = np.array([hover_todo * 18 - 1, angle_todo * 18], dtype=np.float32)
|
|
807
|
+
return np.clip(action, -1, +1)
|
|
808
|
+
|
|
809
|
+
action = 0
|
|
810
|
+
if hover_todo > abs(angle_todo) and hover_todo > 0.04:
|
|
811
|
+
action = 2
|
|
812
|
+
elif angle_todo < -0.04:
|
|
813
|
+
action = 1
|
|
814
|
+
elif angle_todo > 0.04:
|
|
815
|
+
action = 3
|
|
816
|
+
return action
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def demo_heuristic_lander(seed: int | None = None, render: bool = False) -> float:
|
|
820
|
+
env = PlatformLander(render_mode="human" if render else None)
|
|
821
|
+
total_reward = 0.0
|
|
822
|
+
state, _ = env.reset(seed=seed)
|
|
823
|
+
for _ in range(1000):
|
|
824
|
+
action = heuristic(env, state)
|
|
825
|
+
state, reward, terminated, truncated, _ = env.step(action)
|
|
826
|
+
total_reward += reward
|
|
827
|
+
if terminated or truncated:
|
|
828
|
+
break
|
|
829
|
+
env.close()
|
|
830
|
+
return total_reward
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
if __name__ == "__main__":
|
|
834
|
+
demo_heuristic_lander(render=True)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Minimal action and observation spaces for the standalone environment."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Iterable, Sequence
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from platform_lander.core import np_random
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Space:
|
|
14
|
+
"""Small subset of Gymnasium's Space API."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
shape: Sequence[int] | None = None,
|
|
19
|
+
dtype: type | np.dtype | None = None,
|
|
20
|
+
seed: int | np.random.Generator | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
self._shape = None if shape is None else tuple(int(x) for x in shape)
|
|
23
|
+
self.dtype = None if dtype is None else np.dtype(dtype)
|
|
24
|
+
self._np_random: np.random.Generator | None = None
|
|
25
|
+
if isinstance(seed, np.random.Generator):
|
|
26
|
+
self._np_random = seed
|
|
27
|
+
elif seed is not None:
|
|
28
|
+
self.seed(seed)
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def shape(self) -> tuple[int, ...] | None:
|
|
32
|
+
return self._shape
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def np_random(self) -> np.random.Generator:
|
|
36
|
+
if self._np_random is None:
|
|
37
|
+
self.seed()
|
|
38
|
+
assert self._np_random is not None
|
|
39
|
+
return self._np_random
|
|
40
|
+
|
|
41
|
+
def seed(self, seed: int | None = None) -> int:
|
|
42
|
+
self._np_random, rng_seed = np_random(seed)
|
|
43
|
+
return rng_seed
|
|
44
|
+
|
|
45
|
+
def sample(self):
|
|
46
|
+
raise NotImplementedError
|
|
47
|
+
|
|
48
|
+
def contains(self, x: Any) -> bool:
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
|
|
51
|
+
def __contains__(self, x: Any) -> bool:
|
|
52
|
+
return self.contains(x)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Box(Space):
|
|
56
|
+
"""A closed box in Euclidean space."""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
low: int | float | np.ndarray,
|
|
61
|
+
high: int | float | np.ndarray,
|
|
62
|
+
shape: Sequence[int] | None = None,
|
|
63
|
+
dtype: type | np.dtype = np.float32,
|
|
64
|
+
seed: int | np.random.Generator | None = None,
|
|
65
|
+
) -> None:
|
|
66
|
+
self.dtype = np.dtype(dtype)
|
|
67
|
+
if shape is not None:
|
|
68
|
+
if not isinstance(shape, Iterable):
|
|
69
|
+
raise TypeError("Box shape must be iterable")
|
|
70
|
+
shape = tuple(int(dim) for dim in shape)
|
|
71
|
+
elif isinstance(low, np.ndarray):
|
|
72
|
+
shape = low.shape
|
|
73
|
+
elif isinstance(high, np.ndarray):
|
|
74
|
+
shape = high.shape
|
|
75
|
+
else:
|
|
76
|
+
shape = (1,)
|
|
77
|
+
|
|
78
|
+
self.low = np.full(shape, low, dtype=self.dtype) if np.isscalar(low) else np.asarray(low, dtype=self.dtype)
|
|
79
|
+
self.high = np.full(shape, high, dtype=self.dtype) if np.isscalar(high) else np.asarray(high, dtype=self.dtype)
|
|
80
|
+
if self.low.shape != tuple(shape) or self.high.shape != tuple(shape):
|
|
81
|
+
raise ValueError("Box low/high shapes must match the provided shape")
|
|
82
|
+
if np.any(self.low > self.high):
|
|
83
|
+
raise ValueError("Box low values must be less than or equal to high values")
|
|
84
|
+
super().__init__(shape=shape, dtype=self.dtype, seed=seed)
|
|
85
|
+
|
|
86
|
+
def sample(self) -> np.ndarray:
|
|
87
|
+
sample = self.np_random.uniform(self.low, self.high)
|
|
88
|
+
return sample.astype(self.dtype)
|
|
89
|
+
|
|
90
|
+
def contains(self, x: Any) -> bool:
|
|
91
|
+
try:
|
|
92
|
+
arr = np.asarray(x, dtype=self.dtype)
|
|
93
|
+
except (TypeError, ValueError):
|
|
94
|
+
return False
|
|
95
|
+
return arr.shape == self.shape and bool(np.all(arr >= self.low) and np.all(arr <= self.high))
|
|
96
|
+
|
|
97
|
+
def __repr__(self) -> str:
|
|
98
|
+
return f"Box({self.low}, {self.high}, {self.shape}, {self.dtype})"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class Discrete(Space):
|
|
102
|
+
"""A finite set of integers ``{start, ..., start + n - 1}``."""
|
|
103
|
+
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
n: int,
|
|
107
|
+
seed: int | np.random.Generator | None = None,
|
|
108
|
+
start: int = 0,
|
|
109
|
+
dtype: type | np.dtype = np.int64,
|
|
110
|
+
) -> None:
|
|
111
|
+
if int(n) <= 0:
|
|
112
|
+
raise ValueError("Discrete n must be positive")
|
|
113
|
+
self.dtype = np.dtype(dtype)
|
|
114
|
+
if not np.issubdtype(self.dtype, np.integer):
|
|
115
|
+
raise TypeError("Discrete dtype must be an integer dtype")
|
|
116
|
+
self.n = self.dtype.type(n)
|
|
117
|
+
self.start = self.dtype.type(start)
|
|
118
|
+
super().__init__(shape=(), dtype=self.dtype, seed=seed)
|
|
119
|
+
|
|
120
|
+
def sample(self):
|
|
121
|
+
return self.start + self.np_random.integers(self.n, dtype=self.dtype.type)
|
|
122
|
+
|
|
123
|
+
def contains(self, x: Any) -> bool:
|
|
124
|
+
if isinstance(x, int):
|
|
125
|
+
value = x
|
|
126
|
+
elif isinstance(x, np.generic) and np.issubdtype(x.dtype, np.integer):
|
|
127
|
+
value = int(x)
|
|
128
|
+
elif isinstance(x, np.ndarray) and x.shape == () and np.issubdtype(x.dtype, np.integer):
|
|
129
|
+
value = int(x)
|
|
130
|
+
else:
|
|
131
|
+
return False
|
|
132
|
+
return int(self.start) <= value < int(self.start + self.n)
|
|
133
|
+
|
|
134
|
+
def __repr__(self) -> str:
|
|
135
|
+
return f"Discrete({int(self.n)})"
|
|
136
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2016 OpenAI
|
|
4
|
+
Copyright (c) 2022 Farama Foundation
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: platform-lander
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Standalone reusable-booster landing environment for reinforcement learning.
|
|
5
|
+
Author: Andriy Burkov
|
|
6
|
+
License: The MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2016 OpenAI
|
|
9
|
+
Copyright (c) 2022 Farama Foundation
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in
|
|
19
|
+
all copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
27
|
+
THE SOFTWARE.
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
|
|
30
|
+
Project-URL: Source, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
|
|
31
|
+
Project-URL: Repository, https://github.com/aburkov/theDRLbook
|
|
32
|
+
Keywords: reinforcement-learning,rl,box2d,lander,environment
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
39
|
+
Requires-Python: >=3.10
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Requires-Dist: numpy>=1.21
|
|
43
|
+
Requires-Dist: box2d-py>=2.3.5
|
|
44
|
+
Requires-Dist: pygame>=2.1
|
|
45
|
+
Provides-Extra: test
|
|
46
|
+
Requires-Dist: pytest>=7; extra == "test"
|
|
47
|
+
Provides-Extra: train
|
|
48
|
+
Requires-Dist: torch>=2; extra == "train"
|
|
49
|
+
|
|
50
|
+
# Platform Lander
|
|
51
|
+
|
|
52
|
+
A standalone reusable-booster landing environment based on Gymnasium LunarLander v3 physics, but without importing Gymnasium. The task is to land a SpaceX-style booster upright on a moving floating platform. Missing the platform and falling into the ocean, or contacting the platform in a non-vertical position, terminates the episode as failure.
|
|
53
|
+
|
|
54
|
+
## Install
|
|
55
|
+
|
|
56
|
+
After the package has been published to PyPI:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install platform_lander
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Before the PyPI release is available, install the same package directly from
|
|
63
|
+
the book repository subdirectory:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install "platform_lander @ git+https://github.com/aburkov/theDRLbook.git#subdirectory=test_environments/platform_lander"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
For local development from this folder:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install -e .
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Google Colab
|
|
76
|
+
|
|
77
|
+
Use the same install command in the first notebook cell. Colab usually needs `swig` before Box2D builds:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
!apt-get -qq install swig
|
|
81
|
+
!pip install -q platform_lander
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Then import normally:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from platform_lander import PlatformLander
|
|
88
|
+
|
|
89
|
+
env = PlatformLander(render_mode="rgb_array", enable_wind=True, wind_power=5.0)
|
|
90
|
+
obs, info = env.reset(seed=0)
|
|
91
|
+
obs, reward, terminated, truncated, info = env.step(2)
|
|
92
|
+
frame = env.render()
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Display a rendered frame in Colab:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
import matplotlib.pyplot as plt
|
|
99
|
+
|
|
100
|
+
plt.imshow(frame)
|
|
101
|
+
plt.axis("off")
|
|
102
|
+
plt.show()
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Local Script
|
|
106
|
+
|
|
107
|
+
To watch the booster in a local Pygame window, install the package in editable
|
|
108
|
+
mode and run the demo:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pip install -e .
|
|
112
|
+
python examples/demo.py
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
The test file is headless, so running `pytest` or `python tests/test_platform_lander.py`
|
|
116
|
+
will not open an animation window.
|
|
117
|
+
|
|
118
|
+
To train a discrete policy with the textbook single-trajectory REINFORCE
|
|
119
|
+
algorithm and then show three animated runs:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
pip install -e ".[train]"
|
|
123
|
+
python vanilla_reinforce.py
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
The repository also includes incremental REINFORCE variants:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
python rtg_reinforce.py # vanilla + per-timestep reward-to-go
|
|
130
|
+
python average_reinforcement_baseline_reinforce.py # reward-to-go + running scalar RTG baseline
|
|
131
|
+
python value_function_baseline_reinforce.py # reward-to-go + learned value-function baseline
|
|
132
|
+
python batch_reinforce.py # vanilla + trajectory batches
|
|
133
|
+
python full_reinforce.py # batches + reward-to-go + selectable scalar baseline
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Each training script writes a log, per-episode CSV data, and a checkpoint under
|
|
137
|
+
`runs/` by default, for example `runs/full_reinforce.log`,
|
|
138
|
+
`runs/full_reinforce.csv`, and `runs/full_reinforce.pt`. Override those paths
|
|
139
|
+
with `--log-file`, `--csv-file`, and `--model-file`.
|
|
140
|
+
|
|
141
|
+
To load the hardcoded `runs/full_reinforce.pt` checkpoint and watch several
|
|
142
|
+
animated policy rollouts:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
python watch_trained_policy.py
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
To generate one side-by-side results graph per variant from the saved CSV
|
|
149
|
+
files:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
python plot_reinforce_results.py
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
For a quick smoke test without opening the animation window:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
python vanilla_reinforce.py --episodes 3 --max-steps 20 --no-animation
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from platform_lander import PlatformLander
|
|
163
|
+
|
|
164
|
+
env = PlatformLander(enable_wind=True, wind_direction=(1, 0.2), wind_power=5.0)
|
|
165
|
+
obs, info = env.reset(seed=0)
|
|
166
|
+
|
|
167
|
+
for _ in range(1000):
|
|
168
|
+
action = env.action_space.sample()
|
|
169
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
170
|
+
if terminated or truncated:
|
|
171
|
+
print(info)
|
|
172
|
+
break
|
|
173
|
+
|
|
174
|
+
env.close()
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## API Notes
|
|
178
|
+
|
|
179
|
+
- `PlatformLander(continuous=False)` uses `Discrete(4)` actions.
|
|
180
|
+
- Actions: `0` no-op, `1` upper-left attitude jet, `2` bottom engine, `3` upper-right attitude jet.
|
|
181
|
+
- `continuous=True` uses a two-value `Box(-1, 1, shape=(2,))` action.
|
|
182
|
+
- Wind is controlled with `enable_wind`, `wind_power`, `wind_direction`, and `set_wind(...)`.
|
|
183
|
+
- The booster has 100 available jet fires by default. After they are exhausted,
|
|
184
|
+
engine commands have no effect and the booster continues ballistically.
|
|
185
|
+
- The observation includes the fraction of jet fires remaining.
|
|
186
|
+
- The package provides local `Box` and `Discrete` spaces and does not import Gymnasium.
|
|
187
|
+
|
|
188
|
+
## Publishing
|
|
189
|
+
|
|
190
|
+
Build the package from this directory:
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
python -m build
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Upload the generated `dist/platform_lander-*.tar.gz` and
|
|
197
|
+
`dist/platform_lander-*.whl` files to PyPI with a PyPI account that owns the
|
|
198
|
+
`platform_lander` project name:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
python -m twine upload dist/*
|
|
202
|
+
```
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
platform_lander/__init__.py,sha256=lUiruv3t2_46dbWc4d38XftrKBkg_MVMrt3D8FU56UA,465
|
|
2
|
+
platform_lander/core.py,sha256=bqmJyoeA_iClTlWSn88c368v0UE94-743FotLFkHim4,2616
|
|
3
|
+
platform_lander/platform_lander.py,sha256=DM53Cn1Zg0DliTtc8NPCt41wcXRgclck5IgmekAcDnw,31301
|
|
4
|
+
platform_lander/spaces.py,sha256=rLw10Bzuvu-zuzQaZ5OazROjOKJNmP7YOPCh9UDU1LY,4613
|
|
5
|
+
platform_lander-0.1.0.dist-info/LICENSE,sha256=fayql3LoVq7mlDsy72Y9NjTZHXLse7x00TaUNnP5Hhg,1104
|
|
6
|
+
platform_lander-0.1.0.dist-info/METADATA,sha256=A0NtnvuuWsAQzFeA35x-YtNQvTJupmfmZEpE8kgptcM,6862
|
|
7
|
+
platform_lander-0.1.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
8
|
+
platform_lander-0.1.0.dist-info/top_level.txt,sha256=uLxoi4U_a9YT4dVtkyH-7OHQ1soVjmCqtAZ9jxXK6_c,16
|
|
9
|
+
platform_lander-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
platform_lander
|