platform-lander 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ """Standalone SpaceX-style platform landing environment.
2
+
3
+ This package is intentionally independent from Gymnasium. It keeps the familiar
4
+ ``reset``/``step``/``render`` API and lightweight ``Box``/``Discrete`` spaces so
5
+ it can be used by RL code without importing ``gymnasium``.
6
+ """
7
+
8
+ from platform_lander.platform_lander import PlatformLander, heuristic
9
+ from platform_lander.spaces import Box, Discrete
10
+
11
+ __all__ = ["PlatformLander", "heuristic", "Box", "Discrete"]
12
+
@@ -0,0 +1,88 @@
1
+ """Small Gymnasium-compatible core helpers used by :mod:`platform_lander`.
2
+
3
+ The environment code is adapted from Gymnasium's LunarLander v3, but this file
4
+ contains the minimal runtime support needed to use it without importing
5
+ Gymnasium.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+
15
+ class DependencyNotInstalled(ImportError):
16
+ """Raised when an optional rendering or physics dependency is missing."""
17
+
18
+
19
+ class Error(Exception):
20
+ """Base package exception."""
21
+
22
+
23
+ def np_random(seed: int | None = None) -> tuple[np.random.Generator, int]:
24
+ """Return a NumPy random generator and the seed used to create it."""
25
+
26
+ if seed is not None and not (isinstance(seed, int) and seed >= 0):
27
+ raise Error(f"Seed must be a non-negative python integer, got {seed!r}")
28
+
29
+ seed_seq = np.random.SeedSequence(seed)
30
+ rng = np.random.Generator(np.random.PCG64(seed_seq))
31
+ return rng, int(seed_seq.entropy)
32
+
33
+
34
+ class Env:
35
+ """Minimal environment base class with Gymnasium-style seeding."""
36
+
37
+ metadata: dict[str, Any] = {"render_modes": []}
38
+ render_mode: str | None = None
39
+
40
+ _np_random: np.random.Generator | None = None
41
+ _np_random_seed: int | None = None
42
+
43
+ def reset(self, *, seed: int | None = None, options: dict | None = None):
44
+ if seed is not None:
45
+ self._np_random, self._np_random_seed = np_random(seed)
46
+
47
+ @property
48
+ def np_random(self) -> np.random.Generator:
49
+ if self._np_random is None:
50
+ self._np_random, self._np_random_seed = np_random()
51
+ return self._np_random
52
+
53
+ @np_random.setter
54
+ def np_random(self, value: np.random.Generator) -> None:
55
+ self._np_random = value
56
+ self._np_random_seed = -1
57
+
58
+ @property
59
+ def np_random_seed(self) -> int:
60
+ if self._np_random_seed is None:
61
+ self._np_random, self._np_random_seed = np_random()
62
+ return self._np_random_seed
63
+
64
+ @property
65
+ def unwrapped(self):
66
+ return self
67
+
68
+ def close(self) -> None:
69
+ pass
70
+
71
+
72
+ class EzPickle:
73
+ """Pickle objects by replaying their constructor arguments."""
74
+
75
+ def __init__(self, *args: object, **kwargs: object) -> None:
76
+ self._ezpickle_args = args
77
+ self._ezpickle_kwargs = kwargs
78
+
79
+ def __getstate__(self) -> dict[str, Any]:
80
+ return {
81
+ "_ezpickle_args": self._ezpickle_args,
82
+ "_ezpickle_kwargs": self._ezpickle_kwargs,
83
+ }
84
+
85
+ def __setstate__(self, state: dict[str, Any]) -> None:
86
+ obj = type(self)(*state["_ezpickle_args"], **state["_ezpickle_kwargs"])
87
+ self.__dict__.update(obj.__dict__)
88
+
@@ -0,0 +1,834 @@
1
+ """Standalone moving-platform booster landing environment.
2
+
3
+ The physics and API are adapted from Gymnasium's LunarLander v3 under the MIT
4
+ license, then modified to model a vertical booster landing on a floating
5
+ left-right moving platform.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import math
11
+ from typing import TYPE_CHECKING
12
+
13
+ import numpy as np
14
+
15
+ from platform_lander.core import DependencyNotInstalled, Env, EzPickle
16
+ from platform_lander.spaces import Box, Discrete
17
+
18
+ try:
19
+ import Box2D
20
+ from Box2D.b2 import (
21
+ circleShape,
22
+ contactListener,
23
+ edgeShape,
24
+ fixtureDef,
25
+ polygonShape,
26
+ )
27
+ except ImportError as exc: # pragma: no cover - import-time dependency check
28
+ raise DependencyNotInstalled(
29
+ "Box2D is not installed. Install it with `pip install swig box2d-py`."
30
+ ) from exc
31
+
32
+ if TYPE_CHECKING:
33
+ import pygame
34
+
35
+
36
+ FPS = 50
37
+ SCALE = 30.0
38
+
39
+ VIEWPORT_W = 600
40
+ VIEWPORT_H = 400
41
+
42
+ BOTTOM_ENGINE_POWER = 18.0
43
+ TOP_ENGINE_POWER = 1.25
44
+ INITIAL_RANDOM = 700.0
45
+
46
+ BOOSTER_HALF_WIDTH = 8
47
+ BOOSTER_TOP = 52
48
+ BOOSTER_BOTTOM = 56
49
+ BOOSTER_START_CLEARANCE = 0.06
50
+ TOP_ENGINE_Y = 43
51
+ TOP_ENGINE_AWAY = 9
52
+ BOTTOM_ENGINE_Y = 54
53
+
54
+ PLATFORM_WIDTH = 118
55
+ PLATFORM_HEIGHT = 14
56
+ PLATFORM_SPEED = 1.15
57
+ MAX_JET_FIRES = 200
58
+ LANDING_ANGLE = math.radians(8.0)
59
+ LANDING_VX = 0.45
60
+ LANDING_VY = 0.65
61
+ LANDING_ANGULAR_V = 0.55
62
+
63
+
64
+ class ContactDetector(contactListener):
65
+ """Track booster contacts with the platform and ocean."""
66
+
67
+ def __init__(self, env: "PlatformLander") -> None:
68
+ contactListener.__init__(self)
69
+ self.env = env
70
+
71
+ @staticmethod
72
+ def _data(contact) -> tuple[object, object]:
73
+ return contact.fixtureA.userData, contact.fixtureB.userData
74
+
75
+ def BeginContact(self, contact) -> None:
76
+ a, b = self._data(contact)
77
+ labels = {a, b}
78
+ booster_labels = {"booster_body", "left_foot", "right_foot"}
79
+
80
+ if "ocean" in labels and labels.intersection(booster_labels):
81
+ self.env.ocean_contact = True
82
+ return
83
+
84
+ if "platform" not in labels:
85
+ return
86
+
87
+ if "booster_body" in labels:
88
+ self.env.body_platform_contact = True
89
+ self.env.platform_contact = True
90
+ if "left_foot" in labels:
91
+ self.env.left_foot_contact = True
92
+ self.env.platform_contact = True
93
+ if "right_foot" in labels:
94
+ self.env.right_foot_contact = True
95
+ self.env.platform_contact = True
96
+
97
+ def EndContact(self, contact) -> None:
98
+ a, b = self._data(contact)
99
+ labels = {a, b}
100
+ if "platform" not in labels:
101
+ return
102
+ if "left_foot" in labels:
103
+ self.env.left_foot_contact = False
104
+ if "right_foot" in labels:
105
+ self.env.right_foot_contact = False
106
+ if "booster_body" in labels:
107
+ self.env.body_platform_contact = False
108
+ self.env.platform_contact = (
109
+ self.env.left_foot_contact
110
+ or self.env.right_foot_contact
111
+ or self.env.body_platform_contact
112
+ )
113
+
114
+
115
+ class PlatformLander(Env, EzPickle):
116
+ """Land a reusable booster vertically on a moving ocean platform.
117
+
118
+ Actions are ``Discrete(4)`` by default:
119
+
120
+ - 0: do nothing
121
+ - 1: fire the upper-left attitude jet
122
+ - 2: fire the bottom engine
123
+ - 3: fire the upper-right attitude jet
124
+
125
+ With ``continuous=True``, actions are ``Box(-1, 1, shape=(2,))`` where the
126
+ first value controls the bottom engine and the second controls the top jets.
127
+
128
+ Observations contain eleven float values:
129
+ relative x/y to the platform landing point, x/y velocity, booster angle,
130
+ angular velocity, left/right foot contact flags, platform x, and platform
131
+ velocity, and the fraction of jet fires remaining. Wind is applied from
132
+ ``wind_direction`` with force ``wind_power``. If ``variable_wind=True`` the
133
+ force varies over time using LunarLander v3's deterministic wind pattern.
134
+ """
135
+
136
+ metadata = {"render_modes": ["human", "rgb_array"], "render_fps": FPS}
137
+
138
+ def __init__(
139
+ self,
140
+ render_mode: str | None = None,
141
+ continuous: bool = False,
142
+ gravity: float = -10.0,
143
+ enable_wind: bool = False,
144
+ wind_power: float = 15.0,
145
+ wind_direction: float | tuple[float, float] = 0.0,
146
+ turbulence_power: float = 1.5,
147
+ variable_wind: bool = True,
148
+ platform_speed: float = PLATFORM_SPEED,
149
+ max_jet_fires: int = MAX_JET_FIRES,
150
+ ) -> None:
151
+ EzPickle.__init__(
152
+ self,
153
+ render_mode,
154
+ continuous,
155
+ gravity,
156
+ enable_wind,
157
+ wind_power,
158
+ wind_direction,
159
+ turbulence_power,
160
+ variable_wind,
161
+ platform_speed,
162
+ max_jet_fires,
163
+ )
164
+ if not -12.0 < gravity < 0.0:
165
+ raise ValueError(f"gravity must be between -12 and 0, got {gravity}")
166
+
167
+ self.gravity = gravity
168
+ self.continuous = continuous
169
+ self.enable_wind = enable_wind
170
+ self.wind_power = float(wind_power)
171
+ self.wind_direction = wind_direction
172
+ self.turbulence_power = float(turbulence_power)
173
+ self.variable_wind = variable_wind
174
+ self.platform_speed = float(platform_speed)
175
+ self.max_jet_fires = int(max_jet_fires)
176
+ self.render_mode = render_mode
177
+
178
+ self.screen: pygame.Surface | None = None
179
+ self.clock = None
180
+ self.isopen = True
181
+
182
+ self.world = Box2D.b2World(gravity=(0, gravity))
183
+ self.booster: Box2D.b2Body | None = None
184
+ self.platform: Box2D.b2Body | None = None
185
+ self.ocean: Box2D.b2Body | None = None
186
+ self.bottom_flame_power = 0.0
187
+ self.top_flame_power = 0.0
188
+ self.top_flame_direction = 0
189
+ self.jet_fires_used = 0
190
+ self.prev_shaping = None
191
+
192
+ low = np.array(
193
+ [-2.5, -2.5, -10.0, -10.0, -2 * math.pi, -10.0, 0.0, 0.0, -1.0, -2.0, 0.0],
194
+ dtype=np.float32,
195
+ )
196
+ high = np.array(
197
+ [2.5, 2.5, 10.0, 10.0, 2 * math.pi, 10.0, 1.0, 1.0, 1.0, 2.0, 1.0],
198
+ dtype=np.float32,
199
+ )
200
+ self.observation_space = Box(low, high, dtype=np.float32)
201
+ self.action_space = (
202
+ Box(-1.0, 1.0, shape=(2,), dtype=np.float32)
203
+ if continuous
204
+ else Discrete(4)
205
+ )
206
+
207
+ def _destroy(self) -> None:
208
+ self.world.contactListener = None
209
+ for body_name in ("booster", "platform", "ocean"):
210
+ body = getattr(self, body_name)
211
+ if body is not None:
212
+ self.world.DestroyBody(body)
213
+ setattr(self, body_name, None)
214
+
215
+ def reset(self, *, seed: int | None = None, options: dict | None = None):
216
+ super().reset(seed=seed)
217
+ self._destroy()
218
+
219
+ self.world = Box2D.b2World(gravity=(0, self.gravity))
220
+ self.world.contactListener_keepref = ContactDetector(self)
221
+ self.world.contactListener = self.world.contactListener_keepref
222
+
223
+ self.ocean_contact = False
224
+ self.platform_contact = False
225
+ self.body_platform_contact = False
226
+ self.left_foot_contact = False
227
+ self.right_foot_contact = False
228
+ self.failure_reason: str | None = None
229
+ self.prev_shaping = None
230
+ self.bottom_flame_power = 0.0
231
+ self.top_flame_power = 0.0
232
+ self.top_flame_direction = 0
233
+ self.jet_fires_used = 0
234
+
235
+ w = VIEWPORT_W / SCALE
236
+ h = VIEWPORT_H / SCALE
237
+ self.platform_y = h / 4
238
+ self.platform_half_width = PLATFORM_WIDTH / SCALE / 2
239
+ self.platform_half_height = PLATFORM_HEIGHT / SCALE / 2
240
+ self.platform_min_x = self.platform_half_width + 0.25
241
+ self.platform_max_x = w - self.platform_half_width - 0.25
242
+ self.platform_direction = int(self.np_random.choice([-1, 1]))
243
+
244
+ platform_x = float(self.np_random.uniform(self.platform_min_x, self.platform_max_x))
245
+ self.platform = self.world.CreateKinematicBody(position=(platform_x, self.platform_y))
246
+ platform_fixture = self.platform.CreateFixture(
247
+ fixtureDef(
248
+ shape=polygonShape(box=(self.platform_half_width, self.platform_half_height)),
249
+ density=0.0,
250
+ friction=0.9,
251
+ restitution=0.0,
252
+ categoryBits=0x0001,
253
+ )
254
+ )
255
+ platform_fixture.userData = "platform"
256
+ self.platform.color1 = (38, 42, 48)
257
+ self.platform.color2 = (235, 235, 235)
258
+
259
+ self.ocean_y = self.platform_y - self.platform_half_height * 0.75
260
+ self.ocean = self.world.CreateStaticBody()
261
+ ocean_fixture = self.ocean.CreateFixture(
262
+ fixtureDef(
263
+ shape=edgeShape(vertices=[(0, self.ocean_y), (w, self.ocean_y)]),
264
+ isSensor=True,
265
+ categoryBits=0x0001,
266
+ )
267
+ )
268
+ ocean_fixture.userData = "ocean"
269
+
270
+ initial_x = w / 2
271
+ initial_y = h + BOOSTER_BOTTOM / SCALE + BOOSTER_START_CLEARANCE
272
+ self.booster = self.world.CreateDynamicBody(position=(initial_x, initial_y), angle=0.0)
273
+ body_fixture = self.booster.CreateFixture(
274
+ fixtureDef(
275
+ shape=polygonShape(
276
+ vertices=[
277
+ (-BOOSTER_HALF_WIDTH / SCALE, -48 / SCALE),
278
+ (-BOOSTER_HALF_WIDTH / SCALE, 43 / SCALE),
279
+ (-5 / SCALE, BOOSTER_TOP / SCALE),
280
+ (5 / SCALE, BOOSTER_TOP / SCALE),
281
+ (BOOSTER_HALF_WIDTH / SCALE, 43 / SCALE),
282
+ (BOOSTER_HALF_WIDTH / SCALE, -48 / SCALE),
283
+ ]
284
+ ),
285
+ density=4.5,
286
+ friction=0.25,
287
+ categoryBits=0x0010,
288
+ maskBits=0x0001,
289
+ restitution=0.0,
290
+ )
291
+ )
292
+ body_fixture.userData = "booster_body"
293
+
294
+ left_foot_fixture = self.booster.CreateFixture(
295
+ fixtureDef(
296
+ shape=polygonShape(box=(8 / SCALE, 2 / SCALE, (-7 / SCALE, -54 / SCALE), 0.0)),
297
+ density=0.7,
298
+ friction=1.0,
299
+ categoryBits=0x0010,
300
+ maskBits=0x0001,
301
+ restitution=0.0,
302
+ )
303
+ )
304
+ left_foot_fixture.userData = "left_foot"
305
+
306
+ right_foot_fixture = self.booster.CreateFixture(
307
+ fixtureDef(
308
+ shape=polygonShape(box=(8 / SCALE, 2 / SCALE, (7 / SCALE, -54 / SCALE), 0.0)),
309
+ density=0.7,
310
+ friction=1.0,
311
+ categoryBits=0x0010,
312
+ maskBits=0x0001,
313
+ restitution=0.0,
314
+ )
315
+ )
316
+ right_foot_fixture.userData = "right_foot"
317
+
318
+ self.booster.color1 = (230, 232, 235)
319
+ self.booster.color2 = (40, 44, 52)
320
+
321
+ self.booster.ApplyForceToCenter(
322
+ (
323
+ self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
324
+ self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
325
+ ),
326
+ True,
327
+ )
328
+
329
+ if self.enable_wind:
330
+ self.wind_idx = int(self.np_random.integers(-9999, 9999))
331
+ self.torque_idx = int(self.np_random.integers(-9999, 9999))
332
+
333
+ self.drawlist = [self.platform, self.booster]
334
+
335
+ if self.render_mode == "human":
336
+ self.render()
337
+ return self.step(np.array([0.0, 0.0], dtype=np.float32) if self.continuous else 0)[0], {}
338
+
339
+ def _wind_unit(self) -> tuple[float, float]:
340
+ if isinstance(self.wind_direction, tuple):
341
+ x, y = self.wind_direction
342
+ norm = math.hypot(x, y)
343
+ return (0.0, 0.0) if norm == 0 else (x / norm, y / norm)
344
+ return math.cos(float(self.wind_direction)), math.sin(float(self.wind_direction))
345
+
346
+ def _wind_scale(self) -> float:
347
+ if not self.variable_wind:
348
+ return 1.0
349
+ scale = math.tanh(
350
+ math.sin(0.02 * self.wind_idx)
351
+ + math.sin(math.pi * 0.01 * self.wind_idx)
352
+ )
353
+ self.wind_idx += 1
354
+ return scale
355
+
356
+ def set_wind(
357
+ self,
358
+ *,
359
+ power: float | None = None,
360
+ direction: float | tuple[float, float] | None = None,
361
+ enabled: bool | None = None,
362
+ ) -> None:
363
+ """Adjust wind during an episode."""
364
+
365
+ if power is not None:
366
+ self.wind_power = float(power)
367
+ if direction is not None:
368
+ self.wind_direction = direction
369
+ if enabled is not None:
370
+ self.enable_wind = bool(enabled)
371
+
372
+ def _update_platform(self) -> None:
373
+ assert self.platform is not None
374
+ x = self.platform.position.x
375
+ if x <= self.platform_min_x + 1e-6 and self.platform_direction < 0:
376
+ self.platform_direction = 1
377
+ elif x >= self.platform_max_x - 1e-6 and self.platform_direction > 0:
378
+ self.platform_direction = -1
379
+ self.platform.linearVelocity = (self.platform_speed * self.platform_direction, 0.0)
380
+
381
+ def _clamp_platform(self) -> None:
382
+ assert self.platform is not None
383
+ x = min(max(self.platform.position.x, self.platform_min_x), self.platform_max_x)
384
+ if x != self.platform.position.x:
385
+ if x <= self.platform_min_x + 1e-6:
386
+ self.platform_direction = 1
387
+ elif x >= self.platform_max_x - 1e-6:
388
+ self.platform_direction = -1
389
+ self.platform.position = (x, self.platform.position.y)
390
+ self.platform.linearVelocity = (self.platform_speed * self.platform_direction, 0.0)
391
+
392
+ def _apply_wind(self) -> None:
393
+ assert self.booster is not None
394
+ if not self.enable_wind or self.platform_contact:
395
+ return
396
+ wx, wy = self._wind_unit()
397
+ wind_mag = self.wind_power * self._wind_scale()
398
+ self.booster.ApplyForceToCenter((wx * wind_mag, wy * wind_mag), True)
399
+
400
+ torque_mag = math.tanh(
401
+ math.sin(0.02 * self.torque_idx)
402
+ + math.sin(math.pi * 0.01 * self.torque_idx)
403
+ ) * self.turbulence_power
404
+ self.torque_idx += 1
405
+ self.booster.ApplyTorque(torque_mag, True)
406
+
407
+ def _apply_engines(self, action) -> tuple[float, float]:
408
+ assert self.booster is not None
409
+
410
+ if self.continuous:
411
+ action = np.clip(action, -1, +1).astype(np.float64)
412
+ else:
413
+ if not self.action_space.contains(action):
414
+ raise AssertionError(f"{action!r} ({type(action)}) is not a valid action")
415
+
416
+ tip = (math.sin(self.booster.angle), math.cos(self.booster.angle))
417
+ side = (-tip[1], tip[0])
418
+ dispersion = [self.np_random.uniform(-1.0, 1.0) / SCALE for _ in range(2)]
419
+
420
+ bottom_power = 0.0
421
+ if (self.continuous and action[0] > 0.0) or (not self.continuous and action == 2):
422
+ if self.jet_fires_used < self.max_jet_fires:
423
+ bottom_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 if self.continuous else 1.0
424
+ self.jet_fires_used += 1
425
+ ox = -tip[0] * (BOTTOM_ENGINE_Y / SCALE + dispersion[0]) + side[0] * dispersion[1]
426
+ oy = -tip[1] * (BOTTOM_ENGINE_Y / SCALE + dispersion[0]) + side[1] * dispersion[1]
427
+ impulse_pos = (self.booster.position[0] + ox, self.booster.position[1] + oy)
428
+ impulse = (tip[0] * BOTTOM_ENGINE_POWER * bottom_power, tip[1] * BOTTOM_ENGINE_POWER * bottom_power)
429
+ self.booster.ApplyLinearImpulse(impulse, impulse_pos, True)
430
+
431
+ top_power = 0.0
432
+ top_direction = 0
433
+ if self.continuous and abs(action[1]) > 0.5:
434
+ top_direction = -1 if action[1] < 0 else 1
435
+ top_power = float(np.clip(abs(action[1]), 0.5, 1.0))
436
+ elif not self.continuous and action in (1, 3):
437
+ top_direction = -1 if action == 1 else 1
438
+ top_power = 1.0
439
+
440
+ if top_direction and self.jet_fires_used < self.max_jet_fires:
441
+ # top_direction < 0 means upper-left jet, > 0 means upper-right jet.
442
+ self.jet_fires_used += 1
443
+ local_side = TOP_ENGINE_AWAY / SCALE * (-top_direction)
444
+ impulse_sign = top_direction
445
+ ox = tip[0] * (TOP_ENGINE_Y / SCALE + dispersion[0]) + side[0] * local_side
446
+ oy = tip[1] * (TOP_ENGINE_Y / SCALE + dispersion[0]) + side[1] * local_side
447
+ impulse_pos = (self.booster.position[0] + ox, self.booster.position[1] + oy)
448
+ impulse = (
449
+ side[0] * impulse_sign * TOP_ENGINE_POWER * top_power,
450
+ side[1] * impulse_sign * TOP_ENGINE_POWER * top_power,
451
+ )
452
+ self.booster.ApplyLinearImpulse(impulse, impulse_pos, True)
453
+ elif top_direction:
454
+ top_power = 0.0
455
+
456
+ self.bottom_flame_power = max(bottom_power, self.bottom_flame_power * 0.68)
457
+ if top_direction:
458
+ self.top_flame_direction = top_direction
459
+ self.top_flame_power = max(top_power, self.top_flame_power * 0.68)
460
+
461
+ return float(bottom_power), float(top_power)
462
+
463
+ def _get_state(self) -> np.ndarray:
464
+ assert self.booster is not None
465
+ assert self.platform is not None
466
+
467
+ pos = self.booster.position
468
+ vel = self.booster.linearVelocity
469
+ platform_pos = self.platform.position
470
+ platform_vel = self.platform.linearVelocity
471
+ target_y = self.platform_y + self.platform_half_height + BOOSTER_BOTTOM / SCALE
472
+ half_w = VIEWPORT_W / SCALE / 2
473
+ half_h = VIEWPORT_H / SCALE / 2
474
+
475
+ state = np.array(
476
+ [
477
+ (pos.x - platform_pos.x) / half_w,
478
+ (pos.y - target_y) / half_h,
479
+ (vel.x - platform_vel.x) * half_w / FPS,
480
+ vel.y * half_h / FPS,
481
+ self._normalized_angle(self.booster.angle),
482
+ 20.0 * self.booster.angularVelocity / FPS,
483
+ 1.0 if self.left_foot_contact else 0.0,
484
+ 1.0 if self.right_foot_contact else 0.0,
485
+ (platform_pos.x - half_w) / half_w,
486
+ platform_vel.x,
487
+ max(0.0, (self.max_jet_fires - self.jet_fires_used) / self.max_jet_fires),
488
+ ],
489
+ dtype=np.float32,
490
+ )
491
+ return state
492
+
493
+ @staticmethod
494
+ def _normalized_angle(angle: float) -> float:
495
+ return (angle + math.pi) % (2 * math.pi) - math.pi
496
+
497
+ def _is_standing_landing(self, state: np.ndarray) -> bool:
498
+ assert self.booster is not None
499
+ return bool(
500
+ self.left_foot_contact
501
+ and self.right_foot_contact
502
+ and abs(state[0]) < (self.platform_half_width - 0.15) / (VIEWPORT_W / SCALE / 2)
503
+ and abs(state[2]) < LANDING_VX
504
+ and abs(state[3]) < LANDING_VY
505
+ and abs(state[4]) < LANDING_ANGLE
506
+ and abs(self.booster.angularVelocity) < LANDING_ANGULAR_V
507
+ )
508
+
509
+ def _terminal_status(self, state: np.ndarray) -> tuple[bool, bool, str | None]:
510
+ assert self.booster is not None
511
+ if self.ocean_contact or self.booster.position.y < self.ocean_y - 0.5:
512
+ return True, False, "ocean"
513
+ if abs(state[0]) > 2.0:
514
+ return True, False, "out_of_bounds"
515
+ if self.body_platform_contact:
516
+ return True, False, "booster_body_hit_platform"
517
+ if self.platform_contact and abs(state[4]) >= LANDING_ANGLE:
518
+ return True, False, "non_vertical_platform_contact"
519
+ if self._is_standing_landing(state) and (not self.booster.awake or abs(state[3]) < 0.08):
520
+ return True, True, None
521
+ if not self.booster.awake:
522
+ return True, False, "settled_not_vertical"
523
+ return False, False, None
524
+
525
+ def _settle_successful_landing(self) -> None:
526
+ assert self.booster is not None
527
+ assert self.platform is not None
528
+ target_y = self.platform_y + self.platform_half_height + BOOSTER_BOTTOM / SCALE
529
+ platform_x = self.platform.position.x
530
+ half_width = self.platform_half_width - 0.15
531
+ x = float(np.clip(self.booster.position.x, platform_x - half_width, platform_x + half_width))
532
+
533
+ self.booster.position = (x, target_y)
534
+ self.booster.angle = 0.0
535
+ self.booster.linearVelocity = self.platform.linearVelocity
536
+ self.booster.angularVelocity = 0.0
537
+ self.booster.awake = False
538
+ self.left_foot_contact = True
539
+ self.right_foot_contact = True
540
+ self.body_platform_contact = False
541
+ self.platform_contact = True
542
+
543
+ def step(self, action):
544
+ if self.booster is None:
545
+ raise AssertionError("You forgot to call reset()")
546
+
547
+ self._update_platform()
548
+ self._apply_wind()
549
+ bottom_power, top_power = self._apply_engines(action)
550
+
551
+ self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
552
+ self._clamp_platform()
553
+
554
+ state = self._get_state()
555
+ shaping = (
556
+ -100 * math.sqrt(state[0] * state[0] + state[1] * state[1])
557
+ -100 * math.sqrt(state[2] * state[2] + state[3] * state[3])
558
+ -120 * abs(state[4])
559
+ + 12 * state[6]
560
+ + 12 * state[7]
561
+ )
562
+ reward = 0.0 if self.prev_shaping is None else float(shaping - self.prev_shaping)
563
+ self.prev_shaping = shaping
564
+ reward -= bottom_power * 0.30
565
+ reward -= top_power * 0.03
566
+
567
+ terminated, success, failure_reason = self._terminal_status(state)
568
+ self.failure_reason = failure_reason
569
+ if terminated:
570
+ reward = 100.0 if success else -100.0
571
+ if success:
572
+ self._settle_successful_landing()
573
+ state = self._get_state()
574
+
575
+ if self.render_mode == "human":
576
+ self.render()
577
+
578
+ info = {
579
+ "success": success,
580
+ "failure_reason": failure_reason,
581
+ "platform_x": float(self.platform.position.x) if self.platform else None,
582
+ "wind_power": self.wind_power if self.enable_wind else 0.0,
583
+ "wind_direction": self.wind_direction,
584
+ "jet_fires_used": self.jet_fires_used,
585
+ "jet_fires_remaining": max(0, self.max_jet_fires - self.jet_fires_used),
586
+ }
587
+ return state, reward, terminated, False, info
588
+
589
+ def render(self):
590
+ if self.render_mode is None:
591
+ return None
592
+
593
+ try:
594
+ import pygame
595
+ from pygame import gfxdraw
596
+ except ImportError as exc: # pragma: no cover - optional rendering dependency
597
+ raise DependencyNotInstalled("pygame is not installed. Install it with `pip install pygame`.") from exc
598
+
599
+ if self.screen is None and self.render_mode == "human":
600
+ pygame.init()
601
+ pygame.display.init()
602
+ self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
603
+ if self.clock is None:
604
+ self.clock = pygame.time.Clock()
605
+
606
+ surf = pygame.Surface((VIEWPORT_W, VIEWPORT_H))
607
+ surf.fill((186, 220, 235))
608
+ w = VIEWPORT_W / SCALE
609
+ h = VIEWPORT_H / SCALE
610
+
611
+ ocean_top = (self.ocean_y if hasattr(self, "ocean_y") else h / 4) * SCALE
612
+ ocean_rect = pygame.Rect(0, 0, VIEWPORT_W, ocean_top)
613
+ pygame.draw.rect(surf, (31, 91, 133), ocean_rect)
614
+ for i in range(0, VIEWPORT_W, 48):
615
+ pygame.draw.arc(surf, (62, 132, 176), (i, ocean_top - 8, 56, 18), 0, math.pi, 2)
616
+
617
+ self._draw_flames(surf, pygame, gfxdraw)
618
+
619
+ for obj in self.drawlist:
620
+ for fixture in obj.fixtures:
621
+ if fixture.userData == "ocean":
622
+ continue
623
+ trans = fixture.body.transform
624
+ if type(fixture.shape) is circleShape:
625
+ center = trans * fixture.shape.pos * SCALE
626
+ radius = max(2, int(fixture.shape.radius * SCALE))
627
+ pygame.draw.circle(surf, obj.color1, center, radius)
628
+ pygame.draw.circle(surf, obj.color2, center, max(1, radius - 1))
629
+ continue
630
+
631
+ path = [trans * vertex * SCALE for vertex in fixture.shape.vertices]
632
+ color = obj.color1
633
+ outline = obj.color2
634
+ if fixture.userData == "platform":
635
+ color = (45, 48, 54)
636
+ outline = (235, 235, 235)
637
+ elif fixture.userData in {"left_foot", "right_foot"}:
638
+ color = (42, 46, 54)
639
+ outline = (245, 245, 245)
640
+ pygame.draw.polygon(surf, color, path)
641
+ gfxdraw.aapolygon(surf, path, color)
642
+ pygame.draw.aalines(surf, outline, True, path)
643
+
644
+ self._draw_booster_details(surf, pygame)
645
+ surf = pygame.transform.flip(surf, False, True)
646
+
647
+ if self.render_mode == "human":
648
+ assert self.screen is not None
649
+ self.screen.blit(surf, (0, 0))
650
+ pygame.event.pump()
651
+ self.clock.tick(self.metadata["render_fps"])
652
+ pygame.display.flip()
653
+ return None
654
+ if self.render_mode == "rgb_array":
655
+ return np.transpose(np.array(pygame.surfarray.pixels3d(surf)), axes=(1, 0, 2))
656
+ return None
657
+
658
+ def _draw_flames(self, surf, pygame, gfxdraw) -> None:
659
+ if self.booster is None:
660
+ return
661
+
662
+ layer = pygame.Surface((VIEWPORT_W, VIEWPORT_H), pygame.SRCALPHA)
663
+
664
+ if self.bottom_flame_power > 0.03:
665
+ nozzle, direction = self._local_flame_anchor(
666
+ (0.0, -BOTTOM_ENGINE_Y / SCALE),
667
+ (0.0, -(BOTTOM_ENGINE_Y + 1.0) / SCALE),
668
+ )
669
+ self._draw_flame_cone(
670
+ layer,
671
+ pygame,
672
+ gfxdraw,
673
+ nozzle=nozzle,
674
+ direction=direction,
675
+ power=self.bottom_flame_power,
676
+ length_px=34,
677
+ base_width_px=7,
678
+ end_width_px=20,
679
+ )
680
+
681
+ if self.top_flame_power > 0.03 and self.top_flame_direction:
682
+ local_x = TOP_ENGINE_AWAY / SCALE * (-self.top_flame_direction)
683
+ nozzle, direction = self._local_flame_anchor(
684
+ (local_x, TOP_ENGINE_Y / SCALE),
685
+ (
686
+ local_x - self.top_flame_direction / SCALE,
687
+ TOP_ENGINE_Y / SCALE,
688
+ ),
689
+ )
690
+ self._draw_flame_cone(
691
+ layer,
692
+ pygame,
693
+ gfxdraw,
694
+ nozzle=nozzle,
695
+ direction=direction,
696
+ power=self.top_flame_power,
697
+ length_px=18,
698
+ base_width_px=4,
699
+ end_width_px=10,
700
+ )
701
+
702
+ surf.blit(layer, (0, 0))
703
+
704
+ def _local_flame_anchor(
705
+ self,
706
+ nozzle_local: tuple[float, float],
707
+ direction_local: tuple[float, float],
708
+ ) -> tuple[tuple[float, float], tuple[float, float]]:
709
+ assert self.booster is not None
710
+ nozzle_world = self.booster.transform * nozzle_local
711
+ direction_world = self.booster.transform * direction_local
712
+ nozzle = (nozzle_world[0] * SCALE, nozzle_world[1] * SCALE)
713
+ direction = (
714
+ direction_world[0] * SCALE - nozzle[0],
715
+ direction_world[1] * SCALE - nozzle[1],
716
+ )
717
+ return nozzle, direction
718
+
719
+ @staticmethod
720
+ def _draw_flame_cone(
721
+ layer,
722
+ pygame,
723
+ gfxdraw,
724
+ *,
725
+ nozzle: tuple[float, float],
726
+ direction: tuple[float, float],
727
+ power: float,
728
+ length_px: float,
729
+ base_width_px: float,
730
+ end_width_px: float,
731
+ ) -> None:
732
+ direction_vec = np.array(direction, dtype=np.float64)
733
+ norm = np.linalg.norm(direction_vec)
734
+ if norm == 0:
735
+ return
736
+
737
+ direction_vec /= norm
738
+ perp = np.array([-direction_vec[1], direction_vec[0]])
739
+ power = float(np.clip(power, 0.0, 1.0))
740
+ start = np.array(nozzle, dtype=np.float64)
741
+ end = start + direction_vec * length_px * (0.45 + 0.55 * power)
742
+
743
+ def poly(base_width: float, end_width: float, length_scale: float) -> list[tuple[int, int]]:
744
+ scaled_end = start + (end - start) * length_scale
745
+ return [
746
+ tuple(np.round(start + perp * base_width / 2).astype(int)),
747
+ tuple(np.round(start - perp * base_width / 2).astype(int)),
748
+ tuple(np.round(scaled_end - perp * end_width / 2).astype(int)),
749
+ tuple(np.round(scaled_end + perp * end_width / 2).astype(int)),
750
+ ]
751
+
752
+ alpha = int(210 * power)
753
+ outer = poly(base_width_px, end_width_px, 1.0)
754
+ middle = poly(base_width_px * 0.65, end_width_px * 0.58, 0.78)
755
+ inner = poly(base_width_px * 0.32, end_width_px * 0.24, 0.48)
756
+
757
+ pygame.draw.polygon(layer, (69, 39, 24, int(120 * power)), outer)
758
+ gfxdraw.aapolygon(layer, outer, (69, 39, 24, int(120 * power)))
759
+ pygame.draw.polygon(layer, (255, 112, 28, alpha), middle)
760
+ gfxdraw.aapolygon(layer, middle, (255, 112, 28, alpha))
761
+ pygame.draw.polygon(layer, (255, 235, 132, int(245 * power)), inner)
762
+ gfxdraw.aapolygon(layer, inner, (255, 235, 132, int(245 * power)))
763
+
764
+ def _draw_booster_details(self, surf, pygame) -> None:
765
+ if self.booster is None:
766
+ return
767
+ trans = self.booster.transform
768
+ stripe_points = [
769
+ (-6 / SCALE, 20 / SCALE),
770
+ (6 / SCALE, 20 / SCALE),
771
+ (6 / SCALE, 28 / SCALE),
772
+ (-6 / SCALE, 28 / SCALE),
773
+ ]
774
+ stripe = [trans * point * SCALE for point in stripe_points]
775
+ pygame.draw.polygon(surf, (35, 40, 48), stripe)
776
+
777
+ logo_center = trans * (0, 35 / SCALE) * SCALE
778
+ font_rect = pygame.Rect(0, 0, 18, 7)
779
+ font_rect.center = logo_center
780
+ pygame.draw.rect(surf, (35, 40, 48), font_rect, border_radius=1)
781
+
782
+ def close(self) -> None:
783
+ if self.screen is not None:
784
+ import pygame
785
+
786
+ pygame.display.quit()
787
+ pygame.quit()
788
+ self.isopen = False
789
+
790
+
791
+ def heuristic(env: PlatformLander, state: np.ndarray):
792
+ """A simple controller for quick smoke tests and demos."""
793
+
794
+ angle_target = state[0] * 0.55 + state[2] * 0.9
795
+ angle_target = float(np.clip(angle_target, -0.35, 0.35))
796
+ hover_target = 0.45 * abs(state[0])
797
+
798
+ angle_todo = (angle_target - state[4]) * 0.55 - state[5] * 0.85
799
+ hover_todo = (hover_target - state[1]) * 0.55 - state[3] * 0.55
800
+
801
+ if state[6] or state[7]:
802
+ angle_todo = -state[4] * 0.9 - state[5] * 0.5
803
+ hover_todo = -state[3] * 0.5
804
+
805
+ if env.unwrapped.continuous:
806
+ action = np.array([hover_todo * 18 - 1, angle_todo * 18], dtype=np.float32)
807
+ return np.clip(action, -1, +1)
808
+
809
+ action = 0
810
+ if hover_todo > abs(angle_todo) and hover_todo > 0.04:
811
+ action = 2
812
+ elif angle_todo < -0.04:
813
+ action = 1
814
+ elif angle_todo > 0.04:
815
+ action = 3
816
+ return action
817
+
818
+
819
+ def demo_heuristic_lander(seed: int | None = None, render: bool = False) -> float:
820
+ env = PlatformLander(render_mode="human" if render else None)
821
+ total_reward = 0.0
822
+ state, _ = env.reset(seed=seed)
823
+ for _ in range(1000):
824
+ action = heuristic(env, state)
825
+ state, reward, terminated, truncated, _ = env.step(action)
826
+ total_reward += reward
827
+ if terminated or truncated:
828
+ break
829
+ env.close()
830
+ return total_reward
831
+
832
+
833
+ if __name__ == "__main__":
834
+ demo_heuristic_lander(render=True)
@@ -0,0 +1,136 @@
1
+ """Minimal action and observation spaces for the standalone environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterable, Sequence
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+
10
+ from platform_lander.core import np_random
11
+
12
+
13
+ class Space:
14
+ """Small subset of Gymnasium's Space API."""
15
+
16
+ def __init__(
17
+ self,
18
+ shape: Sequence[int] | None = None,
19
+ dtype: type | np.dtype | None = None,
20
+ seed: int | np.random.Generator | None = None,
21
+ ) -> None:
22
+ self._shape = None if shape is None else tuple(int(x) for x in shape)
23
+ self.dtype = None if dtype is None else np.dtype(dtype)
24
+ self._np_random: np.random.Generator | None = None
25
+ if isinstance(seed, np.random.Generator):
26
+ self._np_random = seed
27
+ elif seed is not None:
28
+ self.seed(seed)
29
+
30
+ @property
31
+ def shape(self) -> tuple[int, ...] | None:
32
+ return self._shape
33
+
34
+ @property
35
+ def np_random(self) -> np.random.Generator:
36
+ if self._np_random is None:
37
+ self.seed()
38
+ assert self._np_random is not None
39
+ return self._np_random
40
+
41
+ def seed(self, seed: int | None = None) -> int:
42
+ self._np_random, rng_seed = np_random(seed)
43
+ return rng_seed
44
+
45
+ def sample(self):
46
+ raise NotImplementedError
47
+
48
+ def contains(self, x: Any) -> bool:
49
+ raise NotImplementedError
50
+
51
+ def __contains__(self, x: Any) -> bool:
52
+ return self.contains(x)
53
+
54
+
55
+ class Box(Space):
56
+ """A closed box in Euclidean space."""
57
+
58
+ def __init__(
59
+ self,
60
+ low: int | float | np.ndarray,
61
+ high: int | float | np.ndarray,
62
+ shape: Sequence[int] | None = None,
63
+ dtype: type | np.dtype = np.float32,
64
+ seed: int | np.random.Generator | None = None,
65
+ ) -> None:
66
+ self.dtype = np.dtype(dtype)
67
+ if shape is not None:
68
+ if not isinstance(shape, Iterable):
69
+ raise TypeError("Box shape must be iterable")
70
+ shape = tuple(int(dim) for dim in shape)
71
+ elif isinstance(low, np.ndarray):
72
+ shape = low.shape
73
+ elif isinstance(high, np.ndarray):
74
+ shape = high.shape
75
+ else:
76
+ shape = (1,)
77
+
78
+ self.low = np.full(shape, low, dtype=self.dtype) if np.isscalar(low) else np.asarray(low, dtype=self.dtype)
79
+ self.high = np.full(shape, high, dtype=self.dtype) if np.isscalar(high) else np.asarray(high, dtype=self.dtype)
80
+ if self.low.shape != tuple(shape) or self.high.shape != tuple(shape):
81
+ raise ValueError("Box low/high shapes must match the provided shape")
82
+ if np.any(self.low > self.high):
83
+ raise ValueError("Box low values must be less than or equal to high values")
84
+ super().__init__(shape=shape, dtype=self.dtype, seed=seed)
85
+
86
+ def sample(self) -> np.ndarray:
87
+ sample = self.np_random.uniform(self.low, self.high)
88
+ return sample.astype(self.dtype)
89
+
90
+ def contains(self, x: Any) -> bool:
91
+ try:
92
+ arr = np.asarray(x, dtype=self.dtype)
93
+ except (TypeError, ValueError):
94
+ return False
95
+ return arr.shape == self.shape and bool(np.all(arr >= self.low) and np.all(arr <= self.high))
96
+
97
+ def __repr__(self) -> str:
98
+ return f"Box({self.low}, {self.high}, {self.shape}, {self.dtype})"
99
+
100
+
101
+ class Discrete(Space):
102
+ """A finite set of integers ``{start, ..., start + n - 1}``."""
103
+
104
+ def __init__(
105
+ self,
106
+ n: int,
107
+ seed: int | np.random.Generator | None = None,
108
+ start: int = 0,
109
+ dtype: type | np.dtype = np.int64,
110
+ ) -> None:
111
+ if int(n) <= 0:
112
+ raise ValueError("Discrete n must be positive")
113
+ self.dtype = np.dtype(dtype)
114
+ if not np.issubdtype(self.dtype, np.integer):
115
+ raise TypeError("Discrete dtype must be an integer dtype")
116
+ self.n = self.dtype.type(n)
117
+ self.start = self.dtype.type(start)
118
+ super().__init__(shape=(), dtype=self.dtype, seed=seed)
119
+
120
+ def sample(self):
121
+ return self.start + self.np_random.integers(self.n, dtype=self.dtype.type)
122
+
123
+ def contains(self, x: Any) -> bool:
124
+ if isinstance(x, int):
125
+ value = x
126
+ elif isinstance(x, np.generic) and np.issubdtype(x.dtype, np.integer):
127
+ value = int(x)
128
+ elif isinstance(x, np.ndarray) and x.shape == () and np.issubdtype(x.dtype, np.integer):
129
+ value = int(x)
130
+ else:
131
+ return False
132
+ return int(self.start) <= value < int(self.start + self.n)
133
+
134
+ def __repr__(self) -> str:
135
+ return f"Discrete({int(self.n)})"
136
+
@@ -0,0 +1,22 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2016 OpenAI
4
+ Copyright (c) 2022 Farama Foundation
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.1
2
+ Name: platform-lander
3
+ Version: 0.1.0
4
+ Summary: Standalone reusable-booster landing environment for reinforcement learning.
5
+ Author: Andriy Burkov
6
+ License: The MIT License
7
+
8
+ Copyright (c) 2016 OpenAI
9
+ Copyright (c) 2022 Farama Foundation
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in
19
+ all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
+ THE SOFTWARE.
28
+
29
+ Project-URL: Homepage, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
30
+ Project-URL: Source, https://github.com/aburkov/theDRLbook/tree/main/test_environments/platform_lander
31
+ Project-URL: Repository, https://github.com/aburkov/theDRLbook
32
+ Keywords: reinforcement-learning,rl,box2d,lander,environment
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.10
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: License :: OSI Approved :: MIT License
38
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
39
+ Requires-Python: >=3.10
40
+ Description-Content-Type: text/markdown
41
+ License-File: LICENSE
42
+ Requires-Dist: numpy>=1.21
43
+ Requires-Dist: box2d-py>=2.3.5
44
+ Requires-Dist: pygame>=2.1
45
+ Provides-Extra: test
46
+ Requires-Dist: pytest>=7; extra == "test"
47
+ Provides-Extra: train
48
+ Requires-Dist: torch>=2; extra == "train"
49
+
50
+ # Platform Lander
51
+
52
+ A standalone reusable-booster landing environment based on Gymnasium LunarLander v3 physics, but without importing Gymnasium. The task is to land a SpaceX-style booster upright on a moving floating platform. Missing the platform and falling into the ocean, or contacting the platform in a non-vertical position, terminates the episode as failure.
53
+
54
+ ## Install
55
+
56
+ After the package has been published to PyPI:
57
+
58
+ ```bash
59
+ pip install platform_lander
60
+ ```
61
+
62
+ Before the PyPI release is available, install the same package directly from
63
+ the book repository subdirectory:
64
+
65
+ ```bash
66
+ pip install "platform_lander @ git+https://github.com/aburkov/theDRLbook.git#subdirectory=test_environments/platform_lander"
67
+ ```
68
+
69
+ For local development from this folder:
70
+
71
+ ```bash
72
+ pip install -e .
73
+ ```
74
+
75
+ ## Google Colab
76
+
77
+ Use the same install command in the first notebook cell. Colab usually needs `swig` before Box2D builds:
78
+
79
+ ```python
80
+ !apt-get -qq install swig
81
+ !pip install -q platform_lander
82
+ ```
83
+
84
+ Then import normally:
85
+
86
+ ```python
87
+ from platform_lander import PlatformLander
88
+
89
+ env = PlatformLander(render_mode="rgb_array", enable_wind=True, wind_power=5.0)
90
+ obs, info = env.reset(seed=0)
91
+ obs, reward, terminated, truncated, info = env.step(2)
92
+ frame = env.render()
93
+ ```
94
+
95
+ Display a rendered frame in Colab:
96
+
97
+ ```python
98
+ import matplotlib.pyplot as plt
99
+
100
+ plt.imshow(frame)
101
+ plt.axis("off")
102
+ plt.show()
103
+ ```
104
+
105
+ ## Local Script
106
+
107
+ To watch the booster in a local Pygame window, install the package in editable
108
+ mode and run the demo:
109
+
110
+ ```bash
111
+ pip install -e .
112
+ python examples/demo.py
113
+ ```
114
+
115
+ The test file is headless, so running `pytest` or `python tests/test_platform_lander.py`
116
+ will not open an animation window.
117
+
118
+ To train a discrete policy with the textbook single-trajectory REINFORCE
119
+ algorithm and then show three animated runs:
120
+
121
+ ```bash
122
+ pip install -e ".[train]"
123
+ python vanilla_reinforce.py
124
+ ```
125
+
126
+ The repository also includes incremental REINFORCE variants:
127
+
128
+ ```bash
129
+ python rtg_reinforce.py # vanilla + per-timestep reward-to-go
130
+ python average_reinforcement_baseline_reinforce.py # reward-to-go + running scalar RTG baseline
131
+ python value_function_baseline_reinforce.py # reward-to-go + learned value-function baseline
132
+ python batch_reinforce.py # vanilla + trajectory batches
133
+ python full_reinforce.py # batches + reward-to-go + selectable scalar baseline
134
+ ```
135
+
136
+ Each training script writes a log, per-episode CSV data, and a checkpoint under
137
+ `runs/` by default, for example `runs/full_reinforce.log`,
138
+ `runs/full_reinforce.csv`, and `runs/full_reinforce.pt`. Override those paths
139
+ with `--log-file`, `--csv-file`, and `--model-file`.
140
+
141
+ To load the hardcoded `runs/full_reinforce.pt` checkpoint and watch several
142
+ animated policy rollouts:
143
+
144
+ ```bash
145
+ python watch_trained_policy.py
146
+ ```
147
+
148
+ To generate one side-by-side results graph per variant from the saved CSV
149
+ files:
150
+
151
+ ```bash
152
+ python plot_reinforce_results.py
153
+ ```
154
+
155
+ For a quick smoke test without opening the animation window:
156
+
157
+ ```bash
158
+ python vanilla_reinforce.py --episodes 3 --max-steps 20 --no-animation
159
+ ```
160
+
161
+ ```python
162
+ from platform_lander import PlatformLander
163
+
164
+ env = PlatformLander(enable_wind=True, wind_direction=(1, 0.2), wind_power=5.0)
165
+ obs, info = env.reset(seed=0)
166
+
167
+ for _ in range(1000):
168
+ action = env.action_space.sample()
169
+ obs, reward, terminated, truncated, info = env.step(action)
170
+ if terminated or truncated:
171
+ print(info)
172
+ break
173
+
174
+ env.close()
175
+ ```
176
+
177
+ ## API Notes
178
+
179
+ - `PlatformLander(continuous=False)` uses `Discrete(4)` actions.
180
+ - Actions: `0` no-op, `1` upper-left attitude jet, `2` bottom engine, `3` upper-right attitude jet.
181
+ - `continuous=True` uses a two-value `Box(-1, 1, shape=(2,))` action.
182
+ - Wind is controlled with `enable_wind`, `wind_power`, `wind_direction`, and `set_wind(...)`.
183
+ - The booster has 100 available jet fires by default. After they are exhausted,
184
+ engine commands have no effect and the booster continues ballistically.
185
+ - The observation includes the fraction of jet fires remaining.
186
+ - The package provides local `Box` and `Discrete` spaces and does not import Gymnasium.
187
+
188
+ ## Publishing
189
+
190
+ Build the package from this directory:
191
+
192
+ ```bash
193
+ python -m build
194
+ ```
195
+
196
+ Upload the generated `dist/platform_lander-*.tar.gz` and
197
+ `dist/platform_lander-*.whl` files to PyPI with a PyPI account that owns the
198
+ `platform_lander` project name:
199
+
200
+ ```bash
201
+ python -m twine upload dist/*
202
+ ```
@@ -0,0 +1,9 @@
1
+ platform_lander/__init__.py,sha256=lUiruv3t2_46dbWc4d38XftrKBkg_MVMrt3D8FU56UA,465
2
+ platform_lander/core.py,sha256=bqmJyoeA_iClTlWSn88c368v0UE94-743FotLFkHim4,2616
3
+ platform_lander/platform_lander.py,sha256=DM53Cn1Zg0DliTtc8NPCt41wcXRgclck5IgmekAcDnw,31301
4
+ platform_lander/spaces.py,sha256=rLw10Bzuvu-zuzQaZ5OazROjOKJNmP7YOPCh9UDU1LY,4613
5
+ platform_lander-0.1.0.dist-info/LICENSE,sha256=fayql3LoVq7mlDsy72Y9NjTZHXLse7x00TaUNnP5Hhg,1104
6
+ platform_lander-0.1.0.dist-info/METADATA,sha256=A0NtnvuuWsAQzFeA35x-YtNQvTJupmfmZEpE8kgptcM,6862
7
+ platform_lander-0.1.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
8
+ platform_lander-0.1.0.dist-info/top_level.txt,sha256=uLxoi4U_a9YT4dVtkyH-7OHQ1soVjmCqtAZ9jxXK6_c,16
9
+ platform_lander-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.45.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ platform_lander