cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. cogames_agents/__init__.py +0 -0
  2. cogames_agents/evals/__init__.py +5 -0
  3. cogames_agents/evals/planky_evals.py +415 -0
  4. cogames_agents/policy/__init__.py +0 -0
  5. cogames_agents/policy/evolution/__init__.py +0 -0
  6. cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
  7. cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
  8. cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
  9. cogames_agents/policy/nim_agents/__init__.py +20 -0
  10. cogames_agents/policy/nim_agents/agents.py +98 -0
  11. cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
  12. cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
  13. cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
  14. cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
  15. cogames_agents/policy/nim_agents/common.nim +1054 -0
  16. cogames_agents/policy/nim_agents/install.sh +1 -0
  17. cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
  18. cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
  19. cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
  20. cogames_agents/policy/nim_agents/nimby.lock +3 -0
  21. cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
  22. cogames_agents/policy/nim_agents/random_agents.nim +68 -0
  23. cogames_agents/policy/nim_agents/test_agents.py +53 -0
  24. cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
  25. cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
  26. cogames_agents/policy/scripted_agent/README.md +360 -0
  27. cogames_agents/policy/scripted_agent/__init__.py +0 -0
  28. cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
  29. cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
  30. cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
  31. cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
  32. cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
  33. cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
  34. cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
  35. cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
  36. cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
  37. cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
  38. cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
  39. cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
  40. cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
  41. cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
  42. cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
  43. cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
  44. cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
  45. cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
  46. cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
  47. cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
  48. cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
  49. cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
  50. cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
  51. cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
  52. cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
  53. cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
  54. cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
  55. cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
  56. cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
  57. cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
  58. cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
  59. cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
  60. cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
  61. cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
  62. cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
  63. cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
  64. cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
  65. cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
  66. cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
  67. cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
  68. cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
  69. cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
  70. cogames_agents/policy/scripted_agent/common/roles.py +34 -0
  71. cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
  72. cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
  73. cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
  74. cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
  75. cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
  76. cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
  77. cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
  78. cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
  79. cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
  80. cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
  81. cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
  82. cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
  83. cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
  84. cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
  85. cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
  86. cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
  87. cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
  88. cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
  89. cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
  90. cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
  91. cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
  92. cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
  93. cogames_agents/policy/scripted_agent/planky/README.md +214 -0
  94. cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
  95. cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
  96. cogames_agents/policy/scripted_agent/planky/context.py +68 -0
  97. cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
  98. cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
  99. cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
  100. cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
  101. cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
  102. cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
  103. cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
  104. cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
  105. cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
  106. cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
  107. cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
  108. cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
  109. cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
  110. cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
  111. cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
  112. cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
  113. cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
  114. cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
  115. cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
  116. cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
  117. cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
  118. cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
  119. cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
  120. cogames_agents/policy/scripted_agent/types.py +239 -0
  121. cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
  122. cogames_agents/policy/scripted_agent/utils.py +381 -0
  123. cogames_agents/policy/scripted_registry.py +80 -0
  124. cogames_agents/py.typed +0 -0
  125. cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
  126. cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
  127. cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
  128. cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,864 @@
1
+ """
2
+ Navigator service for Pinky policy.
3
+
4
+ Handles pathfinding, movement, stuck detection, and exploration.
5
+ Uses A* pathfinding with dynamic agent avoidance.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import heapq
11
+ import random
12
+ from typing import TYPE_CHECKING, Optional
13
+
14
+ from cogames_agents.policy.scripted_agent.pinky.types import DEBUG, CellType
15
+ from mettagrid.simulator import Action
16
+
17
+ if TYPE_CHECKING:
18
+ from cogames_agents.policy.scripted_agent.pinky.state import AgentState
19
+ from mettagrid.policy.policy_env_interface import PolicyEnvInterface
20
+
21
+
22
+ class Navigator:
23
+ """Handles all movement decisions - pathfinding, stuck detection, collision avoidance."""
24
+
25
+ MOVE_DELTAS = {
26
+ "north": (-1, 0),
27
+ "south": (1, 0),
28
+ "east": (0, 1),
29
+ "west": (0, -1),
30
+ }
31
+
32
+ DIRECTIONS = ["north", "south", "east", "west"]
33
+
34
+ # Stuck detection thresholds (balanced)
35
+ STUCK_THRESHOLD = 10 # Consecutive steps at same position
36
+ POSITION_HISTORY_SIZE = 20 # How many positions to track for circular detection
37
+ CIRCULAR_STUCK_THRESHOLD = 5 # Revisiting same position this many times = stuck
38
+ TIGHT_LOOP_HISTORY = 15 # Check this many recent positions for tight loops
39
+ TIGHT_LOOP_UNIQUE_MIN = 4 # If fewer unique positions than this, we're stuck
40
+
41
+ # Escape mode settings
42
+ ESCAPE_COMMITMENT_STEPS = 4 # When stuck, commit to escaping for this many steps
43
+
44
+ def __init__(self, policy_env_info: PolicyEnvInterface):
45
+ # Store action names for potential validation
46
+ self._action_names = policy_env_info.action_names
47
+
48
+ def move_to(
49
+ self,
50
+ state: AgentState,
51
+ target: tuple[int, int],
52
+ reach_adjacent: bool = False,
53
+ ) -> Action:
54
+ """Pathfind toward a target position using the internal map.
55
+
56
+ Uses A* pathfinding with the map built from previous observations.
57
+ Navigates around other agents dynamically. First tries to find a path
58
+ through known (explored) terrain. If no known path exists, allows traversal
59
+ through unknown cells to reach the target.
60
+
61
+ Args:
62
+ state: Current agent state
63
+ target: Target position to reach
64
+ reach_adjacent: If True, stop when adjacent to target instead of on it
65
+
66
+ Returns:
67
+ Action to move toward target, or noop if stuck/unreachable
68
+ """
69
+ # Check for stuck loop first
70
+ if self._is_stuck(state):
71
+ action = self._break_stuck(state)
72
+ if action:
73
+ return action
74
+
75
+ start = state.pos
76
+ if start == target and not reach_adjacent:
77
+ return Action(name="noop")
78
+
79
+ # Compute goal cells
80
+ goal_cells = self._compute_goal_cells(state, target, reach_adjacent)
81
+ if not goal_cells:
82
+ if DEBUG:
83
+ print(f"[A{state.agent_id}] NAV: No goal cells for {target}")
84
+ return Action(name="noop")
85
+
86
+ # Check cached path (invalidate if path goes through now-blocked cells or agents)
87
+ path = self._get_cached_path(state, target, reach_adjacent)
88
+
89
+ # Compute new path if needed
90
+ if path is None:
91
+ # First try to find path through known terrain, avoiding agents
92
+ path = self._shortest_path(state, start, goal_cells, allow_unknown=False, avoid_agents=True)
93
+
94
+ # If no path avoiding agents, try allowing agent cells (they may move)
95
+ if not path and state.map.agent_occupancy:
96
+ if DEBUG:
97
+ print(f"[A{state.agent_id}] NAV: No path avoiding agents, trying through agent cells")
98
+ path = self._shortest_path(state, start, goal_cells, allow_unknown=False, avoid_agents=False)
99
+
100
+ # If still no known path, try allowing unknown cells (exploration)
101
+ if not path:
102
+ if DEBUG:
103
+ print(f"[A{state.agent_id}] NAV: No known path to {target}, trying through unknown")
104
+ path = self._shortest_path(state, start, goal_cells, allow_unknown=True, avoid_agents=True)
105
+
106
+ # Last resort: allow both unknown and agent cells
107
+ if not path and state.map.agent_occupancy:
108
+ path = self._shortest_path(state, start, goal_cells, allow_unknown=True, avoid_agents=False)
109
+
110
+ state.nav.cached_path = path.copy() if path else None
111
+ state.nav.cached_path_target = target
112
+ state.nav.cached_path_reach_adjacent = reach_adjacent
113
+
114
+ if not path:
115
+ if DEBUG:
116
+ print(f"[A{state.agent_id}] NAV: No path to {target}, exploring")
117
+ return self.explore(state)
118
+
119
+ next_pos = path[0]
120
+
121
+ # Check if next position is blocked by an agent
122
+ if next_pos in state.map.agent_occupancy:
123
+ # Try to find an immediate sidestep around the blocking agent
124
+ sidestep = self._find_sidestep(state, next_pos, target)
125
+ if sidestep:
126
+ if DEBUG:
127
+ print(f"[A{state.agent_id}] NAV: Agent at {next_pos}, sidestepping to {sidestep}")
128
+ # Clear cached path since we're deviating
129
+ state.nav.cached_path = None
130
+ state.nav.cached_path_target = None
131
+ return self._move_toward(state, sidestep)
132
+ else:
133
+ # No sidestep available, wait by doing noop (agent may move next step)
134
+ if DEBUG:
135
+ print(f"[A{state.agent_id}] NAV: Agent blocking at {next_pos}, waiting")
136
+ return Action(name="noop")
137
+
138
+ # Advance cached path
139
+ if state.nav.cached_path:
140
+ state.nav.cached_path = state.nav.cached_path[1:]
141
+ if not state.nav.cached_path:
142
+ state.nav.cached_path = None
143
+ state.nav.cached_path_target = None
144
+
145
+ return self._move_toward(state, next_pos)
146
+
147
+ def _find_sidestep(
148
+ self, state: AgentState, blocked_pos: tuple[int, int], target: tuple[int, int]
149
+ ) -> Optional[tuple[int, int]]:
150
+ """Find an immediate sidestep around a blocking agent.
151
+
152
+ Tries to find an adjacent free cell that still makes progress toward the target.
153
+
154
+ Args:
155
+ state: Agent state
156
+ blocked_pos: The position blocked by an agent
157
+ target: Ultimate target we're trying to reach
158
+
159
+ Returns:
160
+ A position to sidestep to, or None if no good sidestep available
161
+ """
162
+ current = state.pos
163
+ current_dist = abs(target[0] - current[0]) + abs(target[1] - current[1])
164
+
165
+ candidates: list[tuple[int, tuple[int, int]]] = []
166
+
167
+ for direction in self.DIRECTIONS:
168
+ dr, dc = self.MOVE_DELTAS[direction]
169
+ nr, nc = current[0] + dr, current[1] + dc
170
+ neighbor = (nr, nc)
171
+
172
+ # Skip the blocked position
173
+ if neighbor == blocked_pos:
174
+ continue
175
+
176
+ # Check if this cell is traversable
177
+ if not self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
178
+ continue
179
+
180
+ # Calculate distance to target from this position
181
+ new_dist = abs(target[0] - nr) + abs(target[1] - nc)
182
+
183
+ # Prefer cells that maintain or improve distance to target
184
+ # Score: lower is better (distance increase as cost)
185
+ score = new_dist - current_dist
186
+ candidates.append((score, neighbor))
187
+
188
+ if not candidates:
189
+ return None
190
+
191
+ # Sort by score (prefer cells that don't increase distance much)
192
+ candidates.sort(key=lambda x: x[0])
193
+
194
+ # Only take sidesteps that don't increase distance by more than 2
195
+ # (otherwise we might be going backwards)
196
+ if candidates[0][0] <= 2:
197
+ return candidates[0][1]
198
+
199
+ return None
200
+
201
+ def explore(self, state: AgentState, direction_bias: Optional[str] = None) -> Action:
202
+ """Explore by navigating toward unexplored frontier cells.
203
+
204
+ Uses the map's explored grid to find the nearest unexplored cell
205
+ adjacent to known territory, then pathfinds toward it.
206
+
207
+ Args:
208
+ state: Agent state with map knowledge
209
+ direction_bias: Optional direction preference to spread agents
210
+ """
211
+ # Check for stuck loop
212
+ if self._is_stuck(state):
213
+ action = self._break_stuck(state)
214
+ if action:
215
+ return action
216
+
217
+ # Find nearest unexplored frontier cell
218
+ # Use agent_id to bias direction so agents spread out
219
+ if direction_bias is None:
220
+ directions = ["north", "east", "south", "west"]
221
+ direction_bias = directions[state.agent_id % 4]
222
+
223
+ frontier = state.map.find_nearest_unexplored(state.pos, max_dist=50, direction_bias=direction_bias)
224
+
225
+ if frontier is not None:
226
+ # Navigate toward the frontier cell
227
+ return self.move_to(state, frontier)
228
+
229
+ # No frontier found - fall back to expanding box pattern
230
+ if state.nav.explore_origin is None:
231
+ state.nav.explore_origin = state.pos
232
+ state.nav.explore_start_step = state.step
233
+
234
+ origin = state.nav.explore_origin
235
+ explore_step = state.step - state.nav.explore_start_step
236
+
237
+ # Calculate target position using expanding box pattern
238
+ target = self._get_explore_target(origin, explore_step)
239
+
240
+ # Move toward target
241
+ dr = target[0] - state.row
242
+ dc = target[1] - state.col
243
+
244
+ # If at target, advance to next step
245
+ if dr == 0 and dc == 0:
246
+ state.nav.explore_start_step = state.step - explore_step - 1
247
+ return self.explore(state, direction_bias)
248
+
249
+ # Pick direction toward target, prioritizing larger delta
250
+ direction = None
251
+ if abs(dr) >= abs(dc):
252
+ if dr > 0:
253
+ direction = "south"
254
+ elif dr < 0:
255
+ direction = "north"
256
+ elif dc > 0:
257
+ direction = "east"
258
+ elif dc < 0:
259
+ direction = "west"
260
+ else:
261
+ if dc > 0:
262
+ direction = "east"
263
+ elif dc < 0:
264
+ direction = "west"
265
+ elif dr > 0:
266
+ direction = "south"
267
+ elif dr < 0:
268
+ direction = "north"
269
+
270
+ if direction:
271
+ move_dr, move_dc = self.MOVE_DELTAS[direction]
272
+ next_r, next_c = state.row + move_dr, state.col + move_dc
273
+ if self._is_traversable(state, next_r, next_c, allow_unknown=True, check_agents=True):
274
+ return Action(name=f"move_{direction}")
275
+
276
+ # Primary direction blocked (possibly by agent) - try perpendicular directions
277
+ if direction in ("north", "south"):
278
+ alternatives = ["east", "west"]
279
+ else:
280
+ alternatives = ["north", "south"]
281
+
282
+ for alt_dir in alternatives:
283
+ alt_dr, alt_dc = self.MOVE_DELTAS[alt_dir]
284
+ alt_r, alt_c = state.row + alt_dr, state.col + alt_dc
285
+ if self._is_traversable(state, alt_r, alt_c, allow_unknown=True, check_agents=True):
286
+ return Action(name=f"move_{alt_dir}")
287
+
288
+ # All directions blocked by obstacles or agents - try any traversable direction
289
+ for fallback_dir in self.DIRECTIONS:
290
+ fb_dr, fb_dc = self.MOVE_DELTAS[fallback_dir]
291
+ fb_r, fb_c = state.row + fb_dr, state.col + fb_dc
292
+ if self._is_traversable(state, fb_r, fb_c, allow_unknown=True, check_agents=True):
293
+ return Action(name=f"move_{fallback_dir}")
294
+
295
+ # Completely blocked - wait (agents may move)
296
+ if DEBUG:
297
+ print(f"[A{state.agent_id}] NAV: Explore blocked, waiting")
298
+ return Action(name="noop")
299
+
300
+ def _get_explore_target(self, origin: tuple[int, int], step: int) -> tuple[int, int]:
301
+ """Calculate target position for expanding box exploration.
302
+
303
+ Creates waypoints in a clockwise expanding box pattern:
304
+ Ring 1: E(5) → S(5) → W(10) → N(10)
305
+ Ring 2: E(10) → S(10) → W(15) → N(15)
306
+ etc.
307
+ """
308
+ segment_base = 5 # Base segment length (accounts for movement cooldowns)
309
+ ring = 1
310
+ cumulative_steps = 0
311
+
312
+ while True:
313
+ seg_len = segment_base * ring
314
+ # Each ring has 4 segments: E, S, W, N
315
+ # E and S use seg_len, W and N use seg_len + segment_base (to complete the box)
316
+ ring_segments = [
317
+ ("east", seg_len),
318
+ ("south", seg_len),
319
+ ("west", seg_len + segment_base),
320
+ ("north", seg_len + segment_base),
321
+ ]
322
+
323
+ for direction, length in ring_segments:
324
+ if cumulative_steps + length > step:
325
+ # We're in this segment
326
+ progress = step - cumulative_steps
327
+ dr, dc = self.MOVE_DELTAS[direction]
328
+ # Calculate position at start of this segment
329
+ # then add progress along segment
330
+ seg_start = self._get_segment_start(origin, ring, direction, segment_base)
331
+ return (seg_start[0] + dr * progress, seg_start[1] + dc * progress)
332
+ cumulative_steps += length
333
+
334
+ ring += 1
335
+ if ring > 10: # Safety limit - reset to ring 1
336
+ ring = 1
337
+ cumulative_steps = 0
338
+
339
+ def _get_segment_start(
340
+ self, origin: tuple[int, int], ring: int, direction: str, segment_base: int
341
+ ) -> tuple[int, int]:
342
+ """Get starting position for a segment in the expanding box."""
343
+ # Calculate corner positions for this ring
344
+ # After completing rings 1..ring-1, we're at the start of ring `ring`
345
+ offset = segment_base * ring
346
+ r, c = origin
347
+
348
+ if direction == "east":
349
+ # Start of E segment: NE corner of previous ring (or origin for ring 1)
350
+ if ring == 1:
351
+ return origin
352
+ return (r - offset + segment_base, c + offset - segment_base)
353
+ elif direction == "south":
354
+ # Start of S segment: after going E
355
+ return (r - offset + segment_base, c + offset)
356
+ elif direction == "west":
357
+ # Start of W segment: SE corner
358
+ return (r + offset, c + offset)
359
+ elif direction == "north":
360
+ # Start of N segment: SW corner
361
+ return (r + offset, c - offset)
362
+ return origin
363
+
364
+ def use_object_at(self, state: AgentState, target: tuple[int, int]) -> Action:
365
+ """Move toward an object cell to interact with it.
366
+
367
+ In mettagrid, moving toward an adjacent object triggers its on_use_handler.
368
+ The move may fail (object is obstacle), but the handler still fires.
369
+ Position tracking correctly stays at the adjacent cell.
370
+ """
371
+ state.nav.using_object_this_step = True
372
+ return self._move_toward(state, target)
373
+
374
+ def update_position(self, state: AgentState) -> None:
375
+ """Update agent position based on last executed action.
376
+
377
+ This is a simple action-based update that serves as a fallback.
378
+ The map_tracker's object matching will correct any errors by
379
+ matching visible objects to their known world positions.
380
+ """
381
+ last_action = state.nav.last_action_executed
382
+
383
+ # Simple action-based position update
384
+ # Object matching in map_tracker will correct any errors
385
+ if last_action and last_action.startswith("move_"):
386
+ direction = last_action[5:] # Remove "move_" prefix
387
+ if direction in self.MOVE_DELTAS:
388
+ dr, dc = self.MOVE_DELTAS[direction]
389
+ new_r, new_c = state.row + dr, state.col + dc
390
+
391
+ # Only update if target is not a known obstacle
392
+ # (object matching will correct if this is wrong)
393
+ if self._is_in_bounds(state, new_r, new_c):
394
+ cell_type = state.map.occupancy[new_r][new_c]
395
+ if cell_type != CellType.OBSTACLE.value:
396
+ state.row = new_r
397
+ state.col = new_c
398
+
399
+ # Track position history for stuck detection
400
+ state.nav.position_history.append(state.pos)
401
+ if len(state.nav.position_history) > 30:
402
+ state.nav.position_history.pop(0)
403
+
404
+ def _is_stuck(self, state: AgentState) -> bool:
405
+ """Detect if agent is oscillating or revisiting positions frequently.
406
+
407
+ Detects:
408
+ 1. Oscillation between 2 positions (A→B→A→B)
409
+ 2. Larger oscillation patterns where agent revisits same positions
410
+ """
411
+ history = state.nav.position_history
412
+ if len(history) < 6:
413
+ return False
414
+
415
+ # Check last 6 positions for tight oscillation (2 positions)
416
+ recent = history[-6:]
417
+ unique_recent = set(recent)
418
+ if len(unique_recent) == 2:
419
+ if DEBUG:
420
+ print(f"[A{state.agent_id}] NAV: Stuck! Oscillating between {unique_recent}")
421
+ return True
422
+
423
+ # Check for larger oscillation pattern - revisiting positions we were at earlier
424
+ # (catches the east-west ping-pong over 8+ steps)
425
+ if len(history) >= 20:
426
+ current_pos = history[-1]
427
+ # Check if current position appeared earlier in history (not just recently)
428
+ earlier_history = history[:-10] # Positions from 10+ steps ago
429
+ revisit_count = earlier_history.count(current_pos)
430
+ if revisit_count >= 2:
431
+ if DEBUG:
432
+ print(f"[A{state.agent_id}] NAV: Stuck loop! Revisited {current_pos} {revisit_count}x")
433
+ return True
434
+
435
+ return False
436
+
437
+ def _break_stuck(self, state: AgentState) -> Optional[Action]:
438
+ """Try to escape stuck state with random movement into unexplored territory."""
439
+ if DEBUG:
440
+ print(f"[A{state.agent_id}] NAV: Breaking stuck loop")
441
+
442
+ # Clear cached path
443
+ state.nav.cached_path = None
444
+ state.nav.cached_path_target = None
445
+ state.nav.position_history.clear()
446
+
447
+ # Try random direction, allowing unknown cells to escape, avoiding agents
448
+ directions = list(self.DIRECTIONS)
449
+ random.shuffle(directions)
450
+ for direction in directions:
451
+ dr, dc = self.MOVE_DELTAS[direction]
452
+ nr, nc = state.row + dr, state.col + dc
453
+ if self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
454
+ return Action(name=f"move_{direction}")
455
+ return None
456
+
457
+ def _move_toward(self, state: AgentState, target: tuple[int, int]) -> Action:
458
+ """Return action to move one step toward target."""
459
+ tr, tc = target
460
+ if state.row == tr and state.col == tc:
461
+ return Action(name="noop")
462
+
463
+ dr = tr - state.row
464
+ dc = tc - state.col
465
+
466
+ # Check for agent collision
467
+ if (tr, tc) in state.map.agent_occupancy:
468
+ # Try to go around
469
+ return self._try_alternative_direction(state, target)
470
+
471
+ if dr == -1 and dc == 0:
472
+ return Action(name="move_north")
473
+ elif dr == 1 and dc == 0:
474
+ return Action(name="move_south")
475
+ elif dr == 0 and dc == 1:
476
+ return Action(name="move_east")
477
+ elif dr == 0 and dc == -1:
478
+ return Action(name="move_west")
479
+
480
+ return Action(name="noop")
481
+
482
+ def _try_alternative_direction(self, state: AgentState, target: tuple[int, int]) -> Action:
483
+ """Try to move around an obstacle or agent toward target.
484
+
485
+ Prefers directions that maintain progress toward the target.
486
+ """
487
+ # Collect valid moves with their distance to target
488
+ candidates: list[tuple[int, str]] = []
489
+
490
+ for direction in self.DIRECTIONS:
491
+ dr, dc = self.MOVE_DELTAS[direction]
492
+ nr, nc = state.row + dr, state.col + dc
493
+ if self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
494
+ new_dist = abs(target[0] - nr) + abs(target[1] - nc)
495
+ candidates.append((new_dist, direction))
496
+
497
+ if not candidates:
498
+ return Action(name="noop")
499
+
500
+ # Sort by distance to target (prefer moves that get closer)
501
+ candidates.sort(key=lambda x: x[0])
502
+ return Action(name=f"move_{candidates[0][1]}")
503
+
504
+ def _compute_goal_cells(
505
+ self, state: AgentState, target: tuple[int, int], reach_adjacent: bool
506
+ ) -> list[tuple[int, int]]:
507
+ """Compute goal cells for pathfinding using internal map knowledge."""
508
+ if not reach_adjacent:
509
+ return [target]
510
+
511
+ goals = []
512
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
513
+ nr, nc = target[0] + dr, target[1] + dc
514
+ # Accept FREE cells, and also UNKNOWN cells (might be reachable)
515
+ if self._is_traversable(state, nr, nc, allow_unknown=True):
516
+ goals.append((nr, nc))
517
+
518
+ return goals
519
+
520
+ def _shortest_path(
521
+ self,
522
+ state: AgentState,
523
+ start: tuple[int, int],
524
+ goals: list[tuple[int, int]],
525
+ allow_unknown: bool = False,
526
+ avoid_agents: bool = True,
527
+ ) -> list[tuple[int, int]]:
528
+ """A* pathfinding from start to any goal, navigating around agents.
529
+
530
+ Uses the internal map built from previous observations. Prefers known paths
531
+ but can traverse unknown cells if allow_unknown=True.
532
+
533
+ Args:
534
+ state: Agent state with internal map
535
+ start: Starting position
536
+ goals: List of goal positions
537
+ allow_unknown: If True, treat UNKNOWN cells as potentially traversable
538
+ avoid_agents: If True, treat agent positions as obstacles (default True)
539
+
540
+ Note: Goal cells are reachable even if they are obstacles (for walking into objects).
541
+ """
542
+ goal_set = set(goals)
543
+ if not goals:
544
+ return []
545
+
546
+ # Use minimum manhattan distance to any goal as heuristic
547
+ def heuristic(pos: tuple[int, int]) -> int:
548
+ return min(abs(pos[0] - g[0]) + abs(pos[1] - g[1]) for g in goals)
549
+
550
+ # Priority queue: (f_score, tie_breaker, position)
551
+ # tie_breaker ensures consistent ordering when f_scores are equal
552
+ tie_breaker = 0
553
+ open_set: list[tuple[int, int, tuple[int, int]]] = [(heuristic(start), tie_breaker, start)]
554
+ came_from: dict[tuple[int, int], Optional[tuple[int, int]]] = {start: None}
555
+ g_score: dict[tuple[int, int], int] = {start: 0}
556
+
557
+ while open_set:
558
+ _, _, current = heapq.heappop(open_set)
559
+
560
+ if current in goal_set:
561
+ return self._reconstruct_path(came_from, current)
562
+
563
+ # Skip if we've found a better path to this node already
564
+ current_g = g_score.get(current, float("inf"))
565
+ if isinstance(current_g, float):
566
+ continue
567
+
568
+ for nr, nc in self._get_neighbors(state, current):
569
+ neighbor = (nr, nc)
570
+
571
+ # Allow reaching goal cells even if they're obstacles (objects to use)
572
+ is_goal = neighbor in goal_set
573
+ if not is_goal and not self._is_traversable(
574
+ state, nr, nc, allow_unknown=allow_unknown, check_agents=avoid_agents
575
+ ):
576
+ continue
577
+
578
+ tentative_g = current_g + 1
579
+
580
+ if tentative_g < g_score.get(neighbor, float("inf")):
581
+ came_from[neighbor] = current
582
+ g_score[neighbor] = tentative_g
583
+ f_score = tentative_g + heuristic(neighbor)
584
+ tie_breaker += 1
585
+ heapq.heappush(open_set, (f_score, tie_breaker, neighbor))
586
+
587
+ return []
588
+
589
+ def _reconstruct_path(
590
+ self, came_from: dict[tuple[int, int], Optional[tuple[int, int]]], current: tuple[int, int]
591
+ ) -> list[tuple[int, int]]:
592
+ """Reconstruct path from BFS came_from dict."""
593
+ path = []
594
+ while came_from[current] is not None:
595
+ path.append(current)
596
+ prev = came_from[current]
597
+ assert prev is not None
598
+ current = prev
599
+ path.reverse()
600
+ return path
601
+
602
+ def _get_neighbors(self, state: AgentState, pos: tuple[int, int]) -> list[tuple[int, int]]:
603
+ """Get valid neighboring positions."""
604
+ r, c = pos
605
+ candidates = [(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)]
606
+ return [(nr, nc) for nr, nc in candidates if self._is_in_bounds(state, nr, nc)]
607
+
608
+ def _is_in_bounds(self, state: AgentState, r: int, c: int) -> bool:
609
+ """Check if position is within map bounds."""
610
+ return 0 <= r < state.map.grid_size and 0 <= c < state.map.grid_size
611
+
612
+ def _is_traversable(
613
+ self, state: AgentState, r: int, c: int, allow_unknown: bool = False, check_agents: bool = True
614
+ ) -> bool:
615
+ """Check if a cell is traversable.
616
+
617
+ Args:
618
+ state: Agent state
619
+ r: Row coordinate
620
+ c: Column coordinate
621
+ allow_unknown: If True, treat UNKNOWN cells as potentially traversable (for exploration)
622
+ check_agents: If True, treat cells with agents as non-traversable (default True)
623
+
624
+ Returns:
625
+ True if the cell can be moved into
626
+ """
627
+ if not self._is_in_bounds(state, r, c):
628
+ if DEBUG and state.step == 10 and state.agent_id == 0:
629
+ print(f"[NAV] ({r},{c}) out of bounds")
630
+ return False
631
+
632
+ if check_agents:
633
+ pos = (r, c)
634
+ # Check current observation (definite agent position)
635
+ if pos in state.map.agent_occupancy:
636
+ if DEBUG and state.step == 10 and state.agent_id == 0:
637
+ print(f"[NAV] ({r},{c}) has agent (current obs)")
638
+ return False
639
+
640
+ # Check recently-seen agents (may still be there)
641
+ # Only block if agent was seen very recently (within 5 steps)
642
+ if pos in state.map.recent_agents:
643
+ sighting = state.map.recent_agents[pos]
644
+ if state.step - sighting.last_seen_step <= 5:
645
+ if DEBUG and state.step == 10 and state.agent_id == 0:
646
+ print(f"[NAV] ({r},{c}) recent agent ({state.step - sighting.last_seen_step} ago)")
647
+ return False
648
+
649
+ occ = state.map.occupancy[r][c]
650
+ is_free = occ == CellType.FREE.value
651
+ is_unknown = occ == CellType.UNKNOWN.value
652
+
653
+ # Allow traversal if FREE, or if UNKNOWN and exploration is allowed
654
+ traversable = is_free or (allow_unknown and is_unknown)
655
+
656
+ if DEBUG and state.step == 10 and state.agent_id == 0 and not traversable:
657
+ print(f"[NAV] ({r},{c}) occ={occ}, FREE={CellType.FREE.value}, UNKNOWN={CellType.UNKNOWN.value}")
658
+ return traversable
659
+
660
+ def _get_cached_path(
661
+ self, state: AgentState, target: tuple[int, int], reach_adjacent: bool
662
+ ) -> Optional[list[tuple[int, int]]]:
663
+ """Get cached path if still valid.
664
+
665
+ Invalidates the cached path if:
666
+ - Target changed
667
+ - reach_adjacent mode changed
668
+ - Next step in path is blocked (by obstacle or agent)
669
+ - Any cell in the path is now occupied by an agent
670
+ """
671
+ if (
672
+ state.nav.cached_path
673
+ and state.nav.cached_path_target == target
674
+ and state.nav.cached_path_reach_adjacent == reach_adjacent
675
+ ):
676
+ # Check if any cell in the path is blocked by an agent
677
+ for pos in state.nav.cached_path:
678
+ if pos in state.map.agent_occupancy:
679
+ if DEBUG:
680
+ print(f"[A{state.agent_id}] NAV: Cached path blocked by agent at {pos}")
681
+ return None
682
+
683
+ # Check if next step is traversable
684
+ next_pos = state.nav.cached_path[0]
685
+ if self._is_traversable(state, next_pos[0], next_pos[1]):
686
+ return state.nav.cached_path
687
+ return None
688
+
689
+ # === Escape Mode Handling ===
690
+ # Generalized stuck detection and escape for all behaviors
691
+
692
+ def check_and_handle_escape(self, state: AgentState) -> Optional[Action]:
693
+ """Check if agent is stuck and handle escape mode.
694
+
695
+ This should be called at the start of each behavior's act() method.
696
+ Returns an escape action if in escape mode or stuck, None otherwise.
697
+
698
+ When stuck is detected:
699
+ 1. Calculates escape direction away from center of recent positions
700
+ 2. Enters escape mode, committing to escape for several steps
701
+ 3. Clears navigation state and position history
702
+
703
+ Args:
704
+ state: Agent state
705
+
706
+ Returns:
707
+ Action if escaping, None if not stuck and not in escape mode
708
+ """
709
+ # Track stuck detection: count consecutive steps at same position
710
+ if state.pos == state.last_position:
711
+ state.steps_at_same_position += 1
712
+ else:
713
+ state.steps_at_same_position = 0
714
+ state.last_position = state.pos
715
+
716
+ # Check if we're already in escape mode
717
+ if state.escape_direction is not None and state.step < state.escape_until_step:
718
+ escape_action = self._execute_escape(state)
719
+ if escape_action:
720
+ return escape_action
721
+ # Escape blocked, end escape mode early
722
+ state.escape_direction = None
723
+
724
+ # Check for stuck patterns
725
+ stuck_reason = self._check_stuck_patterns(state)
726
+
727
+ if stuck_reason:
728
+ if DEBUG:
729
+ print(f"[A{state.agent_id}] NAV: STUCK ({stuck_reason}), entering escape mode")
730
+ state.steps_at_same_position = 0
731
+
732
+ # Clear navigation state to force fresh pathfinding
733
+ state.nav.cached_path = None
734
+ state.nav.cached_path_target = None
735
+
736
+ # Calculate escape direction - move AWAY from center of recent positions
737
+ escape_direction = self._calculate_escape_direction(state)
738
+
739
+ # Enter escape mode
740
+ state.escape_direction = escape_direction
741
+ state.escape_until_step = state.step + self.ESCAPE_COMMITMENT_STEPS
742
+
743
+ # Clear position history for fresh stuck detection after escape
744
+ state.nav.position_history.clear()
745
+
746
+ if DEBUG:
747
+ print(f"[A{state.agent_id}] NAV: Escaping {escape_direction} for {self.ESCAPE_COMMITMENT_STEPS} steps")
748
+
749
+ # Execute the first escape step
750
+ escape_action = self._execute_escape(state)
751
+ if escape_action:
752
+ return escape_action
753
+
754
+ # Escape direction completely blocked, clear escape mode
755
+ state.escape_direction = None
756
+
757
+ return None
758
+
759
+ def _check_stuck_patterns(self, state: AgentState) -> Optional[str]:
760
+ """Check for various stuck patterns.
761
+
762
+ Returns a reason string if stuck, None otherwise.
763
+ """
764
+ # Check 1: Same position for too long
765
+ if state.steps_at_same_position >= self.STUCK_THRESHOLD:
766
+ return f"same_pos_{state.steps_at_same_position}"
767
+
768
+ # Check 2: Circular pattern - revisiting positions in recent history
769
+ if len(state.nav.position_history) >= 10:
770
+ recent_history = state.nav.position_history[-self.POSITION_HISTORY_SIZE :]
771
+ current_pos = state.pos
772
+ revisit_count = recent_history.count(current_pos)
773
+ if revisit_count >= self.CIRCULAR_STUCK_THRESHOLD:
774
+ return f"circular_{revisit_count}x"
775
+
776
+ # Check 3: Too few unique positions in recent history (tight circles)
777
+ if len(state.nav.position_history) >= self.TIGHT_LOOP_HISTORY:
778
+ recent = state.nav.position_history[-self.TIGHT_LOOP_HISTORY :]
779
+ unique_positions = len(set(recent))
780
+ if unique_positions <= self.TIGHT_LOOP_UNIQUE_MIN:
781
+ return f"tight_loop_{unique_positions}_unique"
782
+
783
+ return None
784
+
785
+ def _execute_escape(self, state: AgentState) -> Optional[Action]:
786
+ """Execute one step of escape movement.
787
+
788
+ Tries to move in the escape direction, with fallbacks to perpendicular directions.
789
+ Returns None if completely blocked.
790
+ """
791
+ if state.escape_direction is None:
792
+ return None
793
+
794
+ escape_dir = state.escape_direction
795
+ dr, dc = self.MOVE_DELTAS[escape_dir]
796
+
797
+ # Try primary escape direction
798
+ nr, nc = state.row + dr, state.col + dc
799
+ if self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
800
+ return Action(name=f"move_{escape_dir}")
801
+
802
+ # Primary blocked - try perpendicular directions
803
+ if escape_dir in ("north", "south"):
804
+ perpendicular = ["east", "west"]
805
+ else:
806
+ perpendicular = ["north", "south"]
807
+
808
+ random.shuffle(perpendicular)
809
+ for alt_dir in perpendicular:
810
+ alt_dr, alt_dc = self.MOVE_DELTAS[alt_dir]
811
+ alt_r, alt_c = state.row + alt_dr, state.col + alt_dc
812
+ if self._is_traversable(state, alt_r, alt_c, allow_unknown=True, check_agents=True):
813
+ return Action(name=f"move_{alt_dir}")
814
+
815
+ # Try opposite direction as last resort
816
+ opposite = {"north": "south", "south": "north", "east": "west", "west": "east"}
817
+ opp_dir = opposite[escape_dir]
818
+ opp_dr, opp_dc = self.MOVE_DELTAS[opp_dir]
819
+ opp_r, opp_c = state.row + opp_dr, state.col + opp_dc
820
+ if self._is_traversable(state, opp_r, opp_c, allow_unknown=True, check_agents=True):
821
+ # Switch escape direction since we're blocked
822
+ state.escape_direction = opp_dir
823
+ return Action(name=f"move_{opp_dir}")
824
+
825
+ return None
826
+
827
+ def _calculate_escape_direction(self, state: AgentState) -> str:
828
+ """Calculate the best direction to escape from a stuck position.
829
+
830
+ Strategy: Move AWAY from the center of mass of recent positions.
831
+ This prevents oscillating back into the same area.
832
+ """
833
+ history = state.nav.position_history
834
+ if len(history) < 3:
835
+ return random.choice(self.DIRECTIONS)
836
+
837
+ # Calculate center of mass of recent positions
838
+ recent = history[-min(len(history), 15) :]
839
+ avg_row = sum(pos[0] for pos in recent) / len(recent)
840
+ avg_col = sum(pos[1] for pos in recent) / len(recent)
841
+
842
+ # Calculate direction away from center of mass
843
+ dr = state.row - avg_row
844
+ dc = state.col - avg_col
845
+
846
+ # Escape perpendicular to our oscillation axis
847
+ if abs(dr) < abs(dc):
848
+ # Oscillating more east-west, escape north or south
849
+ return "south" if dr >= 0 else "north"
850
+ else:
851
+ # Oscillating more north-south, escape east or west
852
+ return "east" if dc >= 0 else "west"
853
+
854
+ def get_escape_debug_info(self, state: AgentState, stuck_reason: str = "") -> dict:
855
+ """Get debug info dict for escape mode.
856
+
857
+ Useful for behaviors to populate their debug_info when escaping.
858
+ """
859
+ return {
860
+ "mode": "escape",
861
+ "goal": f"escape_{state.escape_direction}" if state.escape_direction else "escape",
862
+ "target_object": "-",
863
+ "signal": stuck_reason or f"until_step_{state.escape_until_step}",
864
+ }