phantomrt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. atlas/__init__.py +3 -0
  2. atlas/agents/__init__.py +8 -0
  3. atlas/agents/command_space.py +227 -0
  4. atlas/analysis/__init__.py +3 -0
  5. atlas/analysis/binary_agent.py +488 -0
  6. atlas/analysis/binary_fuzz.py +389 -0
  7. atlas/analysis/frida_live.py +261 -0
  8. atlas/analysis/graph_annotator.py +147 -0
  9. atlas/analysis/spectrida_bridge.py +84 -0
  10. atlas/analysis/unicorn_harness.py +337 -0
  11. atlas/core/__init__.py +14 -0
  12. atlas/core/decoder.py +65 -0
  13. atlas/core/dynamics.py +217 -0
  14. atlas/core/encoder.py +120 -0
  15. atlas/core/surprise.py +145 -0
  16. atlas/core/world_model.py +334 -0
  17. atlas/environments/__init__.py +5 -0
  18. atlas/environments/base.py +51 -0
  19. atlas/environments/grid_world.py +219 -0
  20. atlas/environments/physics_2d.py +283 -0
  21. atlas/environments/vm_world.py +168 -0
  22. atlas/knowledge/__init__.py +3 -0
  23. atlas/knowledge/instruction_vocab.py +534 -0
  24. atlas/monitor/__init__.py +5 -0
  25. atlas/monitor/execution_monitor.py +518 -0
  26. atlas/optimization/__init__.py +6 -0
  27. atlas/optimization/speed.py +457 -0
  28. atlas/planning/__init__.py +4 -0
  29. atlas/planning/goal.py +100 -0
  30. atlas/planning/mcts.py +228 -0
  31. atlas/training/__init__.py +4 -0
  32. atlas/training/continual.py +392 -0
  33. atlas/training/growth.py +213 -0
  34. atlas/training/loop.py +306 -0
  35. atlas/training/losses.py +101 -0
  36. atlas/training/self_train.py +307 -0
  37. atlas/utils/__init__.py +4 -0
  38. atlas/utils/logging.py +33 -0
  39. atlas/utils/math_helpers.py +30 -0
  40. atlas/utils/viz.py +136 -0
  41. atlas/vm/__init__.py +4 -0
  42. atlas/vm/wsl_vm.py +249 -0
  43. phantomrt-0.1.0.dist-info/METADATA +75 -0
  44. phantomrt-0.1.0.dist-info/RECORD +48 -0
  45. phantomrt-0.1.0.dist-info/WHEEL +5 -0
  46. phantomrt-0.1.0.dist-info/entry_points.txt +3 -0
  47. phantomrt-0.1.0.dist-info/licenses/LICENSE +21 -0
  48. phantomrt-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,334 @@
1
+ """
2
+ World Model: The Complete Brain
3
+
4
+ Combines encoder, dynamics, and decoder into one coherent system
5
+ that can:
6
+ 1. Encode observations into understanding
7
+ 2. Simulate the future in imagination
8
+ 3. Predict what it would see
9
+ 4. Detect surprises and learn from them
10
+ 5. Plan actions by imagining outcomes
11
+
12
+ This is the central class that everything else interacts with.
13
+ """
14
+
15
+ import torch
16
+ import torch.nn as nn
17
+ import torch.nn.functional as F
18
+ from dataclasses import dataclass
19
+ from typing import Optional
20
+
21
+ from .encoder import Encoder
22
+ from .decoder import Decoder
23
+ from .dynamics import DynamicsFunction, NeuralODE
24
+ from .surprise import SurpriseDetector
25
+
26
+
27
+ @dataclass
28
+ class WorldModelOutput:
29
+ """Container for world model outputs."""
30
+ # Current state
31
+ latent_state: torch.Tensor # [batch, latent_dim]
32
+ encoder_mean: torch.Tensor # [batch, latent_dim]
33
+ encoder_log_var: torch.Tensor # [batch, latent_dim]
34
+
35
+ # Reconstruction
36
+ reconstructed_obs: torch.Tensor # [batch, obs_dim]
37
+
38
+ # Losses
39
+ reconstruction_loss: torch.Tensor # scalar
40
+ kl_loss: torch.Tensor # scalar
41
+ surprise_loss: torch.Tensor # scalar
42
+ total_loss: torch.Tensor # scalar
43
+
44
+ # Surprise info
45
+ is_surprising: bool
46
+ surprise_score: float
47
+
48
+
49
+ @dataclass
50
+ class RolloutOutput:
51
+ """Container for imagined trajectory outputs."""
52
+ trajectory: torch.Tensor # [batch, steps+1, latent_dim]
53
+ predicted_observations: torch.Tensor # [batch, steps+1, obs_dim]
54
+ rewards: Optional[torch.Tensor] # [batch, steps+1] if reward predictor exists
55
+
56
+
57
+ class WorldModel(nn.Module):
58
+ """
59
+ The complete world model brain.
60
+
61
+ Architecture:
62
+ observation → [Encoder] → latent_state
63
+
64
+ [Neural ODE Dynamics] → future_state
65
+
66
+ [Decoder] → predicted_observation
67
+
68
+ [Surprise] → learn or confirm
69
+
70
+ Key capabilities:
71
+ - encode(): understand what we're seeing
72
+ - imagine(): simulate future states
73
+ - predict(): see what we'd observe from a state
74
+ - plan(): find best action by imagining outcomes
75
+ - learn(): update from surprising experiences
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ obs_dim: int,
81
+ action_dim: int,
82
+ latent_dim: int = 256,
83
+ hidden_dim: int = 512,
84
+ encoder_hidden_dims: list = None,
85
+ decoder_hidden_dims: list = None,
86
+ dynamics_layers: int = 3,
87
+ dynamics_solver: str = "dopri5",
88
+ dynamics_dt: float = 0.05,
89
+ dropout: float = 0.1,
90
+ surprise_threshold: float = 0.1,
91
+ ):
92
+ super().__init__()
93
+
94
+ self.obs_dim = obs_dim
95
+ self.action_dim = action_dim
96
+ self.latent_dim = latent_dim
97
+
98
+ if encoder_hidden_dims is None:
99
+ encoder_hidden_dims = [hidden_dim, hidden_dim]
100
+ if decoder_hidden_dims is None:
101
+ decoder_hidden_dims = [hidden_dim, hidden_dim]
102
+
103
+ # === Core Components ===
104
+ self.encoder = Encoder(
105
+ obs_dim=obs_dim,
106
+ latent_dim=latent_dim,
107
+ hidden_dims=encoder_hidden_dims,
108
+ dropout=dropout,
109
+ )
110
+
111
+ self.dynamics_fn = DynamicsFunction(
112
+ state_dim=latent_dim,
113
+ action_dim=action_dim,
114
+ hidden_dim=hidden_dim,
115
+ num_layers=dynamics_layers,
116
+ )
117
+
118
+ self.neural_ode = NeuralODE(
119
+ dynamics_fn=self.dynamics_fn,
120
+ solver=dynamics_solver,
121
+ dt=dynamics_dt,
122
+ )
123
+
124
+ self.decoder = Decoder(
125
+ latent_dim=latent_dim,
126
+ obs_dim=obs_dim,
127
+ hidden_dims=decoder_hidden_dims,
128
+ )
129
+
130
+ # === Surprise System ===
131
+ self.surprise_detector = SurpriseDetector(
132
+ initial_threshold=surprise_threshold,
133
+ )
134
+
135
+ # === Reward Predictor (for planning) ===
136
+ self.reward_predictor = nn.Sequential(
137
+ nn.Linear(latent_dim, hidden_dim),
138
+ nn.SiLU(),
139
+ nn.Linear(hidden_dim, 1),
140
+ )
141
+
142
+ def encode(self, observation: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
143
+ """
144
+ Encode observation into latent state.
145
+
146
+ Args:
147
+ observation: [batch, obs_dim]
148
+
149
+ Returns:
150
+ (mean, log_var, sampled_state): each [batch, latent_dim]
151
+ """
152
+ return self.encoder.encode(observation)
153
+
154
+ def predict(self, latent_state: torch.Tensor) -> torch.Tensor:
155
+ """
156
+ Decode latent state into predicted observation.
157
+
158
+ Args:
159
+ latent_state: [batch, latent_dim]
160
+
161
+ Returns:
162
+ predicted_observation: [batch, obs_dim]
163
+ """
164
+ return self.decoder(latent_state)
165
+
166
+ def imagine(
167
+ self,
168
+ initial_observation: torch.Tensor,
169
+ actions: torch.Tensor,
170
+ ) -> RolloutOutput:
171
+ """
172
+ Imagine a future by rolling out dynamics in latent space.
173
+
174
+ This is the CORE capability — the model simulates
175
+ what WOULD happen if it took certain actions,
176
+ WITHOUT touching the real world.
177
+
178
+ Args:
179
+ initial_observation: [batch, obs_dim]
180
+ actions: [batch, num_steps, action_dim]
181
+
182
+ Returns:
183
+ RolloutOutput with trajectory and predicted observations
184
+ """
185
+ # Encode current observation
186
+ _, _, initial_state = self.encode(initial_observation)
187
+
188
+ # Roll out dynamics in latent space
189
+ latent_trajectory = self.neural_ode(initial_state, actions)
190
+ # shape: [batch, num_steps+1, latent_dim]
191
+
192
+ # Decode each state into predicted observations
193
+ batch_size, num_timesteps, _ = latent_trajectory.shape
194
+ flat_states = latent_trajectory.reshape(-1, self.latent_dim)
195
+ predicted_obs = self.decoder(flat_states)
196
+ predicted_obs = predicted_obs.reshape(batch_size, num_timesteps, self.obs_dim)
197
+
198
+ # Predict rewards for each state
199
+ flat_rewards = self.reward_predictor(flat_states)
200
+ rewards = flat_rewards.reshape(batch_size, num_timesteps, 1)
201
+
202
+ return RolloutOutput(
203
+ trajectory=latent_trajectory,
204
+ predicted_observations=predicted_obs,
205
+ rewards=rewards,
206
+ )
207
+
208
+ def step_dynamics(
209
+ self,
210
+ latent_state: torch.Tensor,
211
+ action: torch.Tensor,
212
+ ) -> torch.Tensor:
213
+ """
214
+ Single-step dynamics prediction.
215
+
216
+ Args:
217
+ latent_state: [batch, latent_dim]
218
+ action: [batch, action_dim]
219
+
220
+ Returns:
221
+ next_state: [batch, latent_dim]
222
+ """
223
+ return self.neural_ode.single_step(latent_state, action)
224
+
225
+ def forward(
226
+ self,
227
+ observation: torch.Tensor,
228
+ actions: Optional[torch.Tensor] = None,
229
+ rollout_steps: int = 10,
230
+ ) -> WorldModelOutput:
231
+ """
232
+ Full forward pass: encode → reconstruct → compute losses.
233
+
234
+ If actions are provided, also rolls out dynamics and
235
+ computes prediction losses.
236
+
237
+ Args:
238
+ observation: [batch, obs_dim]
239
+ actions: optional [batch, rollout_steps, action_dim]
240
+ rollout_steps: how many steps to predict
241
+
242
+ Returns:
243
+ WorldModelOutput with all losses and info
244
+ """
245
+ # 1. Encode
246
+ mean, log_var, latent_state = self.encode(observation)
247
+
248
+ # 2. Reconstruct
249
+ reconstructed = self.predict(latent_state)
250
+
251
+ # 3. Compute reconstruction loss
252
+ recon_loss = F.mse_loss(reconstructed, observation)
253
+
254
+ # 4. KL divergence (regularize latent space)
255
+ kl_loss = self.encoder.kl_divergence(mean, log_var)
256
+
257
+ # 5. Surprise detection
258
+ surprise_loss, is_surprising = self.surprise_detector.compute_surprise(
259
+ observation, reconstructed
260
+ )
261
+
262
+ # 6. Total loss (weighted combination)
263
+ total_loss = recon_loss + 0.01 * kl_loss
264
+
265
+ # If actions provided, add dynamics prediction loss
266
+ if actions is not None:
267
+ dynamics_loss = self._compute_dynamics_loss(observation, actions)
268
+ total_loss = total_loss + 0.1 * dynamics_loss
269
+
270
+ return WorldModelOutput(
271
+ latent_state=latent_state,
272
+ encoder_mean=mean,
273
+ encoder_log_var=log_var,
274
+ reconstructed_obs=reconstructed,
275
+ reconstruction_loss=recon_loss,
276
+ kl_loss=kl_loss,
277
+ surprise_loss=surprise_loss,
278
+ total_loss=total_loss,
279
+ is_surprising=is_surprising,
280
+ surprise_score=surprise_loss.item(),
281
+ )
282
+
283
+ def _compute_dynamics_loss(
284
+ self,
285
+ observation: torch.Tensor,
286
+ actions: torch.Tensor,
287
+ ) -> torch.Tensor:
288
+ """
289
+ Compute dynamics prediction loss.
290
+
291
+ Roll out the model's predictions and compare against
292
+ what actually happened.
293
+ """
294
+ # Encode initial state
295
+ _, _, initial_state = self.encode(observation)
296
+
297
+ # Roll out dynamics
298
+ latent_trajectory = self.neural_ode(initial_state, actions)
299
+
300
+ # For each step, reconstruct and compare with next observation
301
+ # (We don't have future observations here in the simplified case,
302
+ # so we use self-consistency: predicted future should predict itself)
303
+ total_loss = torch.tensor(0.0, device=observation.device)
304
+
305
+ for t in range(latent_trajectory.shape[1] - 1):
306
+ current_state = latent_trajectory[:, t]
307
+ next_state = latent_trajectory[:, t + 1]
308
+ action = actions[:, t]
309
+
310
+ # Predict next state from current
311
+ predicted_next = self.neural_ode.single_step(current_state, action)
312
+
313
+ # Loss: predicted next should match actual next
314
+ step_loss = F.mse_loss(predicted_next, next_state.detach())
315
+ total_loss = total_loss + step_loss
316
+
317
+ return total_loss / max(latent_trajectory.shape[1] - 1, 1)
318
+
319
+ def get_latent_representation(self, observation: torch.Tensor) -> torch.Tensor:
320
+ """
321
+ Get the deterministic latent state (mean, no sampling).
322
+
323
+ Useful for planning and visualization.
324
+ """
325
+ mean, _ = self.encoder(observation)
326
+ return mean
327
+
328
+ def compute_reward(self, latent_state: torch.Tensor) -> torch.Tensor:
329
+ """Predict reward for a given state."""
330
+ return self.reward_predictor(latent_state)
331
+
332
+ def get_surprise_stats(self) -> dict:
333
+ """Get surprise detection statistics."""
334
+ return self.surprise_detector.get_stats()
@@ -0,0 +1,5 @@
1
+ from .base import BaseEnvironment
2
+ from .grid_world import GridWorld
3
+ from .physics_2d import Physics2DEnvironment
4
+
5
+ __all__ = ["BaseEnvironment", "GridWorld", "Physics2DEnvironment"]
@@ -0,0 +1,51 @@
1
+ """
2
+ Base Environment Interface
3
+
4
+ All environments inherit from this.
5
+ Provides a consistent API for the world model to interact with.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ import numpy as np
10
+ from typing import Optional
11
+
12
+
13
+ class BaseEnvironment(ABC):
14
+ """Abstract base class for all environments."""
15
+
16
+ @abstractmethod
17
+ def reset(self) -> np.ndarray:
18
+ """Reset environment to initial state. Returns observation."""
19
+ pass
20
+
21
+ @abstractmethod
22
+ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, dict]:
23
+ """
24
+ Take an action in the environment.
25
+
26
+ Returns:
27
+ observation: next observation
28
+ reward: reward signal
29
+ done: whether episode ended
30
+ info: additional information
31
+ """
32
+ pass
33
+
34
+ @abstractmethod
35
+ def get_observation_dim(self) -> int:
36
+ """Returns the dimensionality of observations."""
37
+ pass
38
+
39
+ @abstractmethod
40
+ def get_action_dim(self) -> int:
41
+ """Returns the dimensionality of actions."""
42
+ pass
43
+
44
+ @abstractmethod
45
+ def render(self) -> Optional[np.ndarray]:
46
+ """Render the environment. Returns RGB array or None."""
47
+ pass
48
+
49
+ def close(self):
50
+ """Clean up resources."""
51
+ pass
@@ -0,0 +1,219 @@
1
+ """
2
+ Grid World Environment
3
+
4
+ A simple 2D grid where an agent navigates to reach a goal.
5
+ Objects can be placed on the grid and have basic properties.
6
+
7
+ This is the TESTBED for Phase 1 — simple enough to learn,
8
+ complex enough to test understanding.
9
+
10
+ Observation space: [agent_x, agent_y, goal_x, goal_y, obj1_x, obj1_y, obj1_type, ...]
11
+ Action space: [dx, dy] continuous movement
12
+ """
13
+
14
+ import numpy as np
15
+ from typing import Optional
16
+ from .base import BaseEnvironment
17
+
18
+
19
+ class GridWorld(BaseEnvironment):
20
+ """
21
+ 2D grid world with:
22
+ - Agent (orange) that moves around
23
+ - Goal (green) to reach
24
+ - Obstacles (red) that block movement
25
+ - Collectibles (blue) that give reward
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ grid_size: int = 8,
31
+ num_obstacles: int = 3,
32
+ num_collectibles: int = 2,
33
+ max_steps: int = 100,
34
+ seed: Optional[int] = None,
35
+ ):
36
+ self.grid_size = grid_size
37
+ self.num_obstacles = num_obstacles
38
+ self.num_collectibles = num_collectibles
39
+ self.max_steps = max_steps
40
+
41
+ self.rng = np.random.RandomState(seed)
42
+
43
+ # State
44
+ self.agent_pos = None
45
+ self.goal_pos = None
46
+ self.obstacles = None
47
+ self.collectibles = None
48
+ self.collected = None
49
+ self.steps = 0
50
+
51
+ # Observation: [agent_x, agent_y, goal_x, goal_y,
52
+ # obs1_x, obs1_y, obs1_exists,
53
+ # obs2_x, obs2_y, obs2_exists, ...]
54
+ # For each object: x, y, exists (3 values)
55
+ self._obs_dim = 4 + 3 * (num_obstacles + num_collectibles)
56
+ self._action_dim = 2 # dx, dy
57
+
58
+ def get_observation_dim(self) -> int:
59
+ return self._obs_dim
60
+
61
+ def get_action_dim(self) -> int:
62
+ return self._action_dim
63
+
64
+ def reset(self) -> np.ndarray:
65
+ """Reset to a random configuration."""
66
+ self.steps = 0
67
+ self.collected = [False] * self.num_collectibles
68
+
69
+ # Place agent at random position
70
+ self.agent_pos = self.rng.randint(0, self.grid_size, size=2).astype(float)
71
+
72
+ # Place goal at random position (not on agent)
73
+ while True:
74
+ self.goal_pos = self.rng.randint(0, self.grid_size, size=2).astype(float)
75
+ if not np.array_equal(self.agent_pos, self.goal_pos):
76
+ break
77
+
78
+ # Place obstacles
79
+ self.obstacles = []
80
+ for _ in range(self.num_obstacles):
81
+ while True:
82
+ pos = self.rng.randint(0, self.grid_size, size=2).astype(float)
83
+ if (not np.array_equal(pos, self.agent_pos) and
84
+ not np.array_equal(pos, self.goal_pos) and
85
+ not any(np.array_equal(pos, o) for o in self.obstacles)):
86
+ self.obstacles.append(pos)
87
+ break
88
+
89
+ # Place collectibles
90
+ self.collectibles = []
91
+ for _ in range(self.num_collectibles):
92
+ while True:
93
+ pos = self.rng.randint(0, self.grid_size, size=2).astype(float)
94
+ if (not np.array_equal(pos, self.agent_pos) and
95
+ not np.array_equal(pos, self.goal_pos) and
96
+ not any(np.array_equal(pos, o) for o in self.obstacles) and
97
+ not any(np.array_equal(pos, c) for c in self.collectibles)):
98
+ self.collectibles.append(pos)
99
+ break
100
+
101
+ return self._get_observation()
102
+
103
+ def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, dict]:
104
+ """Take a movement action."""
105
+ self.steps += 1
106
+
107
+ # Clip and apply action
108
+ action = np.clip(action, -1.0, 1.0)
109
+ new_pos = self.agent_pos + action
110
+
111
+ # Clip to grid bounds
112
+ new_pos = np.clip(new_pos, 0, self.grid_size - 1)
113
+
114
+ # Check obstacle collision
115
+ hit_obstacle = False
116
+ for obs_pos in self.obstacles:
117
+ if self._check_collision(new_pos, obs_pos):
118
+ hit_obstacle = True
119
+ new_pos = self.agent_pos.copy() # don't move
120
+ break
121
+
122
+ self.agent_pos = new_pos
123
+
124
+ # Compute reward
125
+ reward = 0.0
126
+
127
+ # Check goal reached
128
+ reached_goal = self._check_reached(self.agent_pos, self.goal_pos)
129
+ if reached_goal:
130
+ reward += 10.0
131
+
132
+ # Check collectible pickup
133
+ for i, coll_pos in enumerate(self.collectibles):
134
+ if not self.collected[i] and self._check_reached(self.agent_pos, coll_pos):
135
+ self.collected[i] = True
136
+ reward += 1.0
137
+
138
+ # Small step penalty (encourages efficiency)
139
+ reward -= 0.01
140
+
141
+ # Collision penalty
142
+ if hit_obstacle:
143
+ reward -= 0.5
144
+
145
+ # Check done
146
+ done = reached_goal or self.steps >= self.max_steps
147
+
148
+ info = {
149
+ "reached_goal": reached_goal,
150
+ "hit_obstacle": hit_obstacle,
151
+ "steps": self.steps,
152
+ "collected": sum(self.collected),
153
+ }
154
+
155
+ return self._get_observation(), reward, done, info
156
+
157
+ def _check_collision(self, pos1: np.ndarray, pos2: np.ndarray, threshold: float = 0.5) -> bool:
158
+ """Check if two positions are close enough to collide."""
159
+ return np.linalg.norm(pos1 - pos2) < threshold
160
+
161
+ def _check_reached(self, pos: np.ndarray, target: np.ndarray, threshold: float = 0.5) -> bool:
162
+ """Check if position has reached the target."""
163
+ return np.linalg.norm(pos - target) < threshold
164
+
165
+ def _get_observation(self) -> np.ndarray:
166
+ """Build observation vector from current state."""
167
+ obs = []
168
+
169
+ # Agent and goal
170
+ obs.extend(self.agent_pos / self.grid_size) # normalize to [0, 1]
171
+ obs.extend(self.goal_pos / self.grid_size)
172
+
173
+ # Obstacles
174
+ for obs_pos in self.obstacles:
175
+ obs.extend(obs_pos / self.grid_size)
176
+ obs.append(1.0) # exists
177
+
178
+ # Collectibles
179
+ for i, coll_pos in enumerate(self.collectibles):
180
+ obs.extend(coll_pos / self.grid_size)
181
+ obs.append(0.0 if self.collected[i] else 1.0) # exists (0 if collected)
182
+
183
+ return np.array(obs, dtype=np.float32)
184
+
185
+ def render(self) -> np.ndarray:
186
+ """Render grid as RGB image."""
187
+ img = np.ones((self.grid_size, self.grid_size, 3), dtype=np.uint8) * 240 # light gray background
188
+
189
+ # Draw obstacles (red)
190
+ for obs_pos in self.obstacles:
191
+ x, y = int(obs_pos[0]), int(obs_pos[1])
192
+ img[y, x] = [220, 50, 50]
193
+
194
+ # Draw collectibles (blue) - only if not collected
195
+ for i, coll_pos in enumerate(self.collectibles):
196
+ if not self.collected[i]:
197
+ x, y = int(coll_pos[0]), int(coll_pos[1])
198
+ img[y, x] = [50, 50, 220]
199
+
200
+ # Draw goal (green)
201
+ x, y = int(self.goal_pos[0]), int(self.goal_pos[1])
202
+ img[y, x] = [50, 200, 50]
203
+
204
+ # Draw agent (orange)
205
+ x, y = int(self.agent_pos[0]), int(self.agent_pos[1])
206
+ img[y, x] = [255, 165, 0]
207
+
208
+ return img
209
+
210
+ def get_state(self) -> dict:
211
+ """Get full state for debugging."""
212
+ return {
213
+ "agent_pos": self.agent_pos.copy(),
214
+ "goal_pos": self.goal_pos.copy(),
215
+ "obstacles": [o.copy() for o in self.obstacles],
216
+ "collectibles": [c.copy() for c in self.collectibles],
217
+ "collected": self.collected.copy(),
218
+ "steps": self.steps,
219
+ }