cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cogames_agents/__init__.py +0 -0
- cogames_agents/evals/__init__.py +5 -0
- cogames_agents/evals/planky_evals.py +415 -0
- cogames_agents/policy/__init__.py +0 -0
- cogames_agents/policy/evolution/__init__.py +0 -0
- cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
- cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
- cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
- cogames_agents/policy/nim_agents/__init__.py +20 -0
- cogames_agents/policy/nim_agents/agents.py +98 -0
- cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
- cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
- cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
- cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
- cogames_agents/policy/nim_agents/common.nim +1054 -0
- cogames_agents/policy/nim_agents/install.sh +1 -0
- cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
- cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
- cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
- cogames_agents/policy/nim_agents/nimby.lock +3 -0
- cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
- cogames_agents/policy/nim_agents/random_agents.nim +68 -0
- cogames_agents/policy/nim_agents/test_agents.py +53 -0
- cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
- cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
- cogames_agents/policy/scripted_agent/README.md +360 -0
- cogames_agents/policy/scripted_agent/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
- cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
- cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
- cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
- cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
- cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
- cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
- cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
- cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
- cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
- cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
- cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
- cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
- cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
- cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
- cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
- cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
- cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
- cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
- cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
- cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
- cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
- cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
- cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
- cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
- cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
- cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
- cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
- cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
- cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
- cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
- cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
- cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
- cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
- cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
- cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
- cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
- cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
- cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
- cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
- cogames_agents/policy/scripted_agent/common/roles.py +34 -0
- cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
- cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
- cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
- cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
- cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
- cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
- cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
- cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
- cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
- cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
- cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
- cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
- cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
- cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
- cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
- cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
- cogames_agents/policy/scripted_agent/planky/README.md +214 -0
- cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
- cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/planky/context.py +68 -0
- cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
- cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
- cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
- cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
- cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
- cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
- cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
- cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
- cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
- cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
- cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
- cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
- cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
- cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
- cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
- cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
- cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
- cogames_agents/policy/scripted_agent/types.py +239 -0
- cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
- cogames_agents/policy/scripted_agent/utils.py +381 -0
- cogames_agents/policy/scripted_registry.py +80 -0
- cogames_agents/py.typed +0 -0
- cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
- cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
- cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
- cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,864 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Navigator service for Pinky policy.
|
|
3
|
+
|
|
4
|
+
Handles pathfinding, movement, stuck detection, and exploration.
|
|
5
|
+
Uses A* pathfinding with dynamic agent avoidance.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import heapq
|
|
11
|
+
import random
|
|
12
|
+
from typing import TYPE_CHECKING, Optional
|
|
13
|
+
|
|
14
|
+
from cogames_agents.policy.scripted_agent.pinky.types import DEBUG, CellType
|
|
15
|
+
from mettagrid.simulator import Action
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from cogames_agents.policy.scripted_agent.pinky.state import AgentState
|
|
19
|
+
from mettagrid.policy.policy_env_interface import PolicyEnvInterface
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Navigator:
|
|
23
|
+
"""Handles all movement decisions - pathfinding, stuck detection, collision avoidance."""
|
|
24
|
+
|
|
25
|
+
MOVE_DELTAS = {
|
|
26
|
+
"north": (-1, 0),
|
|
27
|
+
"south": (1, 0),
|
|
28
|
+
"east": (0, 1),
|
|
29
|
+
"west": (0, -1),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
DIRECTIONS = ["north", "south", "east", "west"]
|
|
33
|
+
|
|
34
|
+
# Stuck detection thresholds (balanced)
|
|
35
|
+
STUCK_THRESHOLD = 10 # Consecutive steps at same position
|
|
36
|
+
POSITION_HISTORY_SIZE = 20 # How many positions to track for circular detection
|
|
37
|
+
CIRCULAR_STUCK_THRESHOLD = 5 # Revisiting same position this many times = stuck
|
|
38
|
+
TIGHT_LOOP_HISTORY = 15 # Check this many recent positions for tight loops
|
|
39
|
+
TIGHT_LOOP_UNIQUE_MIN = 4 # If fewer unique positions than this, we're stuck
|
|
40
|
+
|
|
41
|
+
# Escape mode settings
|
|
42
|
+
ESCAPE_COMMITMENT_STEPS = 4 # When stuck, commit to escaping for this many steps
|
|
43
|
+
|
|
44
|
+
def __init__(self, policy_env_info: PolicyEnvInterface):
|
|
45
|
+
# Store action names for potential validation
|
|
46
|
+
self._action_names = policy_env_info.action_names
|
|
47
|
+
|
|
48
|
+
def move_to(
|
|
49
|
+
self,
|
|
50
|
+
state: AgentState,
|
|
51
|
+
target: tuple[int, int],
|
|
52
|
+
reach_adjacent: bool = False,
|
|
53
|
+
) -> Action:
|
|
54
|
+
"""Pathfind toward a target position using the internal map.
|
|
55
|
+
|
|
56
|
+
Uses A* pathfinding with the map built from previous observations.
|
|
57
|
+
Navigates around other agents dynamically. First tries to find a path
|
|
58
|
+
through known (explored) terrain. If no known path exists, allows traversal
|
|
59
|
+
through unknown cells to reach the target.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
state: Current agent state
|
|
63
|
+
target: Target position to reach
|
|
64
|
+
reach_adjacent: If True, stop when adjacent to target instead of on it
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Action to move toward target, or noop if stuck/unreachable
|
|
68
|
+
"""
|
|
69
|
+
# Check for stuck loop first
|
|
70
|
+
if self._is_stuck(state):
|
|
71
|
+
action = self._break_stuck(state)
|
|
72
|
+
if action:
|
|
73
|
+
return action
|
|
74
|
+
|
|
75
|
+
start = state.pos
|
|
76
|
+
if start == target and not reach_adjacent:
|
|
77
|
+
return Action(name="noop")
|
|
78
|
+
|
|
79
|
+
# Compute goal cells
|
|
80
|
+
goal_cells = self._compute_goal_cells(state, target, reach_adjacent)
|
|
81
|
+
if not goal_cells:
|
|
82
|
+
if DEBUG:
|
|
83
|
+
print(f"[A{state.agent_id}] NAV: No goal cells for {target}")
|
|
84
|
+
return Action(name="noop")
|
|
85
|
+
|
|
86
|
+
# Check cached path (invalidate if path goes through now-blocked cells or agents)
|
|
87
|
+
path = self._get_cached_path(state, target, reach_adjacent)
|
|
88
|
+
|
|
89
|
+
# Compute new path if needed
|
|
90
|
+
if path is None:
|
|
91
|
+
# First try to find path through known terrain, avoiding agents
|
|
92
|
+
path = self._shortest_path(state, start, goal_cells, allow_unknown=False, avoid_agents=True)
|
|
93
|
+
|
|
94
|
+
# If no path avoiding agents, try allowing agent cells (they may move)
|
|
95
|
+
if not path and state.map.agent_occupancy:
|
|
96
|
+
if DEBUG:
|
|
97
|
+
print(f"[A{state.agent_id}] NAV: No path avoiding agents, trying through agent cells")
|
|
98
|
+
path = self._shortest_path(state, start, goal_cells, allow_unknown=False, avoid_agents=False)
|
|
99
|
+
|
|
100
|
+
# If still no known path, try allowing unknown cells (exploration)
|
|
101
|
+
if not path:
|
|
102
|
+
if DEBUG:
|
|
103
|
+
print(f"[A{state.agent_id}] NAV: No known path to {target}, trying through unknown")
|
|
104
|
+
path = self._shortest_path(state, start, goal_cells, allow_unknown=True, avoid_agents=True)
|
|
105
|
+
|
|
106
|
+
# Last resort: allow both unknown and agent cells
|
|
107
|
+
if not path and state.map.agent_occupancy:
|
|
108
|
+
path = self._shortest_path(state, start, goal_cells, allow_unknown=True, avoid_agents=False)
|
|
109
|
+
|
|
110
|
+
state.nav.cached_path = path.copy() if path else None
|
|
111
|
+
state.nav.cached_path_target = target
|
|
112
|
+
state.nav.cached_path_reach_adjacent = reach_adjacent
|
|
113
|
+
|
|
114
|
+
if not path:
|
|
115
|
+
if DEBUG:
|
|
116
|
+
print(f"[A{state.agent_id}] NAV: No path to {target}, exploring")
|
|
117
|
+
return self.explore(state)
|
|
118
|
+
|
|
119
|
+
next_pos = path[0]
|
|
120
|
+
|
|
121
|
+
# Check if next position is blocked by an agent
|
|
122
|
+
if next_pos in state.map.agent_occupancy:
|
|
123
|
+
# Try to find an immediate sidestep around the blocking agent
|
|
124
|
+
sidestep = self._find_sidestep(state, next_pos, target)
|
|
125
|
+
if sidestep:
|
|
126
|
+
if DEBUG:
|
|
127
|
+
print(f"[A{state.agent_id}] NAV: Agent at {next_pos}, sidestepping to {sidestep}")
|
|
128
|
+
# Clear cached path since we're deviating
|
|
129
|
+
state.nav.cached_path = None
|
|
130
|
+
state.nav.cached_path_target = None
|
|
131
|
+
return self._move_toward(state, sidestep)
|
|
132
|
+
else:
|
|
133
|
+
# No sidestep available, wait by doing noop (agent may move next step)
|
|
134
|
+
if DEBUG:
|
|
135
|
+
print(f"[A{state.agent_id}] NAV: Agent blocking at {next_pos}, waiting")
|
|
136
|
+
return Action(name="noop")
|
|
137
|
+
|
|
138
|
+
# Advance cached path
|
|
139
|
+
if state.nav.cached_path:
|
|
140
|
+
state.nav.cached_path = state.nav.cached_path[1:]
|
|
141
|
+
if not state.nav.cached_path:
|
|
142
|
+
state.nav.cached_path = None
|
|
143
|
+
state.nav.cached_path_target = None
|
|
144
|
+
|
|
145
|
+
return self._move_toward(state, next_pos)
|
|
146
|
+
|
|
147
|
+
def _find_sidestep(
|
|
148
|
+
self, state: AgentState, blocked_pos: tuple[int, int], target: tuple[int, int]
|
|
149
|
+
) -> Optional[tuple[int, int]]:
|
|
150
|
+
"""Find an immediate sidestep around a blocking agent.
|
|
151
|
+
|
|
152
|
+
Tries to find an adjacent free cell that still makes progress toward the target.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
state: Agent state
|
|
156
|
+
blocked_pos: The position blocked by an agent
|
|
157
|
+
target: Ultimate target we're trying to reach
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
A position to sidestep to, or None if no good sidestep available
|
|
161
|
+
"""
|
|
162
|
+
current = state.pos
|
|
163
|
+
current_dist = abs(target[0] - current[0]) + abs(target[1] - current[1])
|
|
164
|
+
|
|
165
|
+
candidates: list[tuple[int, tuple[int, int]]] = []
|
|
166
|
+
|
|
167
|
+
for direction in self.DIRECTIONS:
|
|
168
|
+
dr, dc = self.MOVE_DELTAS[direction]
|
|
169
|
+
nr, nc = current[0] + dr, current[1] + dc
|
|
170
|
+
neighbor = (nr, nc)
|
|
171
|
+
|
|
172
|
+
# Skip the blocked position
|
|
173
|
+
if neighbor == blocked_pos:
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
# Check if this cell is traversable
|
|
177
|
+
if not self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
# Calculate distance to target from this position
|
|
181
|
+
new_dist = abs(target[0] - nr) + abs(target[1] - nc)
|
|
182
|
+
|
|
183
|
+
# Prefer cells that maintain or improve distance to target
|
|
184
|
+
# Score: lower is better (distance increase as cost)
|
|
185
|
+
score = new_dist - current_dist
|
|
186
|
+
candidates.append((score, neighbor))
|
|
187
|
+
|
|
188
|
+
if not candidates:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
# Sort by score (prefer cells that don't increase distance much)
|
|
192
|
+
candidates.sort(key=lambda x: x[0])
|
|
193
|
+
|
|
194
|
+
# Only take sidesteps that don't increase distance by more than 2
|
|
195
|
+
# (otherwise we might be going backwards)
|
|
196
|
+
if candidates[0][0] <= 2:
|
|
197
|
+
return candidates[0][1]
|
|
198
|
+
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
def explore(self, state: AgentState, direction_bias: Optional[str] = None) -> Action:
|
|
202
|
+
"""Explore by navigating toward unexplored frontier cells.
|
|
203
|
+
|
|
204
|
+
Uses the map's explored grid to find the nearest unexplored cell
|
|
205
|
+
adjacent to known territory, then pathfinds toward it.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
state: Agent state with map knowledge
|
|
209
|
+
direction_bias: Optional direction preference to spread agents
|
|
210
|
+
"""
|
|
211
|
+
# Check for stuck loop
|
|
212
|
+
if self._is_stuck(state):
|
|
213
|
+
action = self._break_stuck(state)
|
|
214
|
+
if action:
|
|
215
|
+
return action
|
|
216
|
+
|
|
217
|
+
# Find nearest unexplored frontier cell
|
|
218
|
+
# Use agent_id to bias direction so agents spread out
|
|
219
|
+
if direction_bias is None:
|
|
220
|
+
directions = ["north", "east", "south", "west"]
|
|
221
|
+
direction_bias = directions[state.agent_id % 4]
|
|
222
|
+
|
|
223
|
+
frontier = state.map.find_nearest_unexplored(state.pos, max_dist=50, direction_bias=direction_bias)
|
|
224
|
+
|
|
225
|
+
if frontier is not None:
|
|
226
|
+
# Navigate toward the frontier cell
|
|
227
|
+
return self.move_to(state, frontier)
|
|
228
|
+
|
|
229
|
+
# No frontier found - fall back to expanding box pattern
|
|
230
|
+
if state.nav.explore_origin is None:
|
|
231
|
+
state.nav.explore_origin = state.pos
|
|
232
|
+
state.nav.explore_start_step = state.step
|
|
233
|
+
|
|
234
|
+
origin = state.nav.explore_origin
|
|
235
|
+
explore_step = state.step - state.nav.explore_start_step
|
|
236
|
+
|
|
237
|
+
# Calculate target position using expanding box pattern
|
|
238
|
+
target = self._get_explore_target(origin, explore_step)
|
|
239
|
+
|
|
240
|
+
# Move toward target
|
|
241
|
+
dr = target[0] - state.row
|
|
242
|
+
dc = target[1] - state.col
|
|
243
|
+
|
|
244
|
+
# If at target, advance to next step
|
|
245
|
+
if dr == 0 and dc == 0:
|
|
246
|
+
state.nav.explore_start_step = state.step - explore_step - 1
|
|
247
|
+
return self.explore(state, direction_bias)
|
|
248
|
+
|
|
249
|
+
# Pick direction toward target, prioritizing larger delta
|
|
250
|
+
direction = None
|
|
251
|
+
if abs(dr) >= abs(dc):
|
|
252
|
+
if dr > 0:
|
|
253
|
+
direction = "south"
|
|
254
|
+
elif dr < 0:
|
|
255
|
+
direction = "north"
|
|
256
|
+
elif dc > 0:
|
|
257
|
+
direction = "east"
|
|
258
|
+
elif dc < 0:
|
|
259
|
+
direction = "west"
|
|
260
|
+
else:
|
|
261
|
+
if dc > 0:
|
|
262
|
+
direction = "east"
|
|
263
|
+
elif dc < 0:
|
|
264
|
+
direction = "west"
|
|
265
|
+
elif dr > 0:
|
|
266
|
+
direction = "south"
|
|
267
|
+
elif dr < 0:
|
|
268
|
+
direction = "north"
|
|
269
|
+
|
|
270
|
+
if direction:
|
|
271
|
+
move_dr, move_dc = self.MOVE_DELTAS[direction]
|
|
272
|
+
next_r, next_c = state.row + move_dr, state.col + move_dc
|
|
273
|
+
if self._is_traversable(state, next_r, next_c, allow_unknown=True, check_agents=True):
|
|
274
|
+
return Action(name=f"move_{direction}")
|
|
275
|
+
|
|
276
|
+
# Primary direction blocked (possibly by agent) - try perpendicular directions
|
|
277
|
+
if direction in ("north", "south"):
|
|
278
|
+
alternatives = ["east", "west"]
|
|
279
|
+
else:
|
|
280
|
+
alternatives = ["north", "south"]
|
|
281
|
+
|
|
282
|
+
for alt_dir in alternatives:
|
|
283
|
+
alt_dr, alt_dc = self.MOVE_DELTAS[alt_dir]
|
|
284
|
+
alt_r, alt_c = state.row + alt_dr, state.col + alt_dc
|
|
285
|
+
if self._is_traversable(state, alt_r, alt_c, allow_unknown=True, check_agents=True):
|
|
286
|
+
return Action(name=f"move_{alt_dir}")
|
|
287
|
+
|
|
288
|
+
# All directions blocked by obstacles or agents - try any traversable direction
|
|
289
|
+
for fallback_dir in self.DIRECTIONS:
|
|
290
|
+
fb_dr, fb_dc = self.MOVE_DELTAS[fallback_dir]
|
|
291
|
+
fb_r, fb_c = state.row + fb_dr, state.col + fb_dc
|
|
292
|
+
if self._is_traversable(state, fb_r, fb_c, allow_unknown=True, check_agents=True):
|
|
293
|
+
return Action(name=f"move_{fallback_dir}")
|
|
294
|
+
|
|
295
|
+
# Completely blocked - wait (agents may move)
|
|
296
|
+
if DEBUG:
|
|
297
|
+
print(f"[A{state.agent_id}] NAV: Explore blocked, waiting")
|
|
298
|
+
return Action(name="noop")
|
|
299
|
+
|
|
300
|
+
def _get_explore_target(self, origin: tuple[int, int], step: int) -> tuple[int, int]:
|
|
301
|
+
"""Calculate target position for expanding box exploration.
|
|
302
|
+
|
|
303
|
+
Creates waypoints in a clockwise expanding box pattern:
|
|
304
|
+
Ring 1: E(5) → S(5) → W(10) → N(10)
|
|
305
|
+
Ring 2: E(10) → S(10) → W(15) → N(15)
|
|
306
|
+
etc.
|
|
307
|
+
"""
|
|
308
|
+
segment_base = 5 # Base segment length (accounts for movement cooldowns)
|
|
309
|
+
ring = 1
|
|
310
|
+
cumulative_steps = 0
|
|
311
|
+
|
|
312
|
+
while True:
|
|
313
|
+
seg_len = segment_base * ring
|
|
314
|
+
# Each ring has 4 segments: E, S, W, N
|
|
315
|
+
# E and S use seg_len, W and N use seg_len + segment_base (to complete the box)
|
|
316
|
+
ring_segments = [
|
|
317
|
+
("east", seg_len),
|
|
318
|
+
("south", seg_len),
|
|
319
|
+
("west", seg_len + segment_base),
|
|
320
|
+
("north", seg_len + segment_base),
|
|
321
|
+
]
|
|
322
|
+
|
|
323
|
+
for direction, length in ring_segments:
|
|
324
|
+
if cumulative_steps + length > step:
|
|
325
|
+
# We're in this segment
|
|
326
|
+
progress = step - cumulative_steps
|
|
327
|
+
dr, dc = self.MOVE_DELTAS[direction]
|
|
328
|
+
# Calculate position at start of this segment
|
|
329
|
+
# then add progress along segment
|
|
330
|
+
seg_start = self._get_segment_start(origin, ring, direction, segment_base)
|
|
331
|
+
return (seg_start[0] + dr * progress, seg_start[1] + dc * progress)
|
|
332
|
+
cumulative_steps += length
|
|
333
|
+
|
|
334
|
+
ring += 1
|
|
335
|
+
if ring > 10: # Safety limit - reset to ring 1
|
|
336
|
+
ring = 1
|
|
337
|
+
cumulative_steps = 0
|
|
338
|
+
|
|
339
|
+
def _get_segment_start(
|
|
340
|
+
self, origin: tuple[int, int], ring: int, direction: str, segment_base: int
|
|
341
|
+
) -> tuple[int, int]:
|
|
342
|
+
"""Get starting position for a segment in the expanding box."""
|
|
343
|
+
# Calculate corner positions for this ring
|
|
344
|
+
# After completing rings 1..ring-1, we're at the start of ring `ring`
|
|
345
|
+
offset = segment_base * ring
|
|
346
|
+
r, c = origin
|
|
347
|
+
|
|
348
|
+
if direction == "east":
|
|
349
|
+
# Start of E segment: NE corner of previous ring (or origin for ring 1)
|
|
350
|
+
if ring == 1:
|
|
351
|
+
return origin
|
|
352
|
+
return (r - offset + segment_base, c + offset - segment_base)
|
|
353
|
+
elif direction == "south":
|
|
354
|
+
# Start of S segment: after going E
|
|
355
|
+
return (r - offset + segment_base, c + offset)
|
|
356
|
+
elif direction == "west":
|
|
357
|
+
# Start of W segment: SE corner
|
|
358
|
+
return (r + offset, c + offset)
|
|
359
|
+
elif direction == "north":
|
|
360
|
+
# Start of N segment: SW corner
|
|
361
|
+
return (r + offset, c - offset)
|
|
362
|
+
return origin
|
|
363
|
+
|
|
364
|
+
def use_object_at(self, state: AgentState, target: tuple[int, int]) -> Action:
|
|
365
|
+
"""Move toward an object cell to interact with it.
|
|
366
|
+
|
|
367
|
+
In mettagrid, moving toward an adjacent object triggers its on_use_handler.
|
|
368
|
+
The move may fail (object is obstacle), but the handler still fires.
|
|
369
|
+
Position tracking correctly stays at the adjacent cell.
|
|
370
|
+
"""
|
|
371
|
+
state.nav.using_object_this_step = True
|
|
372
|
+
return self._move_toward(state, target)
|
|
373
|
+
|
|
374
|
+
def update_position(self, state: AgentState) -> None:
|
|
375
|
+
"""Update agent position based on last executed action.
|
|
376
|
+
|
|
377
|
+
This is a simple action-based update that serves as a fallback.
|
|
378
|
+
The map_tracker's object matching will correct any errors by
|
|
379
|
+
matching visible objects to their known world positions.
|
|
380
|
+
"""
|
|
381
|
+
last_action = state.nav.last_action_executed
|
|
382
|
+
|
|
383
|
+
# Simple action-based position update
|
|
384
|
+
# Object matching in map_tracker will correct any errors
|
|
385
|
+
if last_action and last_action.startswith("move_"):
|
|
386
|
+
direction = last_action[5:] # Remove "move_" prefix
|
|
387
|
+
if direction in self.MOVE_DELTAS:
|
|
388
|
+
dr, dc = self.MOVE_DELTAS[direction]
|
|
389
|
+
new_r, new_c = state.row + dr, state.col + dc
|
|
390
|
+
|
|
391
|
+
# Only update if target is not a known obstacle
|
|
392
|
+
# (object matching will correct if this is wrong)
|
|
393
|
+
if self._is_in_bounds(state, new_r, new_c):
|
|
394
|
+
cell_type = state.map.occupancy[new_r][new_c]
|
|
395
|
+
if cell_type != CellType.OBSTACLE.value:
|
|
396
|
+
state.row = new_r
|
|
397
|
+
state.col = new_c
|
|
398
|
+
|
|
399
|
+
# Track position history for stuck detection
|
|
400
|
+
state.nav.position_history.append(state.pos)
|
|
401
|
+
if len(state.nav.position_history) > 30:
|
|
402
|
+
state.nav.position_history.pop(0)
|
|
403
|
+
|
|
404
|
+
def _is_stuck(self, state: AgentState) -> bool:
|
|
405
|
+
"""Detect if agent is oscillating or revisiting positions frequently.
|
|
406
|
+
|
|
407
|
+
Detects:
|
|
408
|
+
1. Oscillation between 2 positions (A→B→A→B)
|
|
409
|
+
2. Larger oscillation patterns where agent revisits same positions
|
|
410
|
+
"""
|
|
411
|
+
history = state.nav.position_history
|
|
412
|
+
if len(history) < 6:
|
|
413
|
+
return False
|
|
414
|
+
|
|
415
|
+
# Check last 6 positions for tight oscillation (2 positions)
|
|
416
|
+
recent = history[-6:]
|
|
417
|
+
unique_recent = set(recent)
|
|
418
|
+
if len(unique_recent) == 2:
|
|
419
|
+
if DEBUG:
|
|
420
|
+
print(f"[A{state.agent_id}] NAV: Stuck! Oscillating between {unique_recent}")
|
|
421
|
+
return True
|
|
422
|
+
|
|
423
|
+
# Check for larger oscillation pattern - revisiting positions we were at earlier
|
|
424
|
+
# (catches the east-west ping-pong over 8+ steps)
|
|
425
|
+
if len(history) >= 20:
|
|
426
|
+
current_pos = history[-1]
|
|
427
|
+
# Check if current position appeared earlier in history (not just recently)
|
|
428
|
+
earlier_history = history[:-10] # Positions from 10+ steps ago
|
|
429
|
+
revisit_count = earlier_history.count(current_pos)
|
|
430
|
+
if revisit_count >= 2:
|
|
431
|
+
if DEBUG:
|
|
432
|
+
print(f"[A{state.agent_id}] NAV: Stuck loop! Revisited {current_pos} {revisit_count}x")
|
|
433
|
+
return True
|
|
434
|
+
|
|
435
|
+
return False
|
|
436
|
+
|
|
437
|
+
def _break_stuck(self, state: AgentState) -> Optional[Action]:
|
|
438
|
+
"""Try to escape stuck state with random movement into unexplored territory."""
|
|
439
|
+
if DEBUG:
|
|
440
|
+
print(f"[A{state.agent_id}] NAV: Breaking stuck loop")
|
|
441
|
+
|
|
442
|
+
# Clear cached path
|
|
443
|
+
state.nav.cached_path = None
|
|
444
|
+
state.nav.cached_path_target = None
|
|
445
|
+
state.nav.position_history.clear()
|
|
446
|
+
|
|
447
|
+
# Try random direction, allowing unknown cells to escape, avoiding agents
|
|
448
|
+
directions = list(self.DIRECTIONS)
|
|
449
|
+
random.shuffle(directions)
|
|
450
|
+
for direction in directions:
|
|
451
|
+
dr, dc = self.MOVE_DELTAS[direction]
|
|
452
|
+
nr, nc = state.row + dr, state.col + dc
|
|
453
|
+
if self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
|
|
454
|
+
return Action(name=f"move_{direction}")
|
|
455
|
+
return None
|
|
456
|
+
|
|
457
|
+
def _move_toward(self, state: AgentState, target: tuple[int, int]) -> Action:
|
|
458
|
+
"""Return action to move one step toward target."""
|
|
459
|
+
tr, tc = target
|
|
460
|
+
if state.row == tr and state.col == tc:
|
|
461
|
+
return Action(name="noop")
|
|
462
|
+
|
|
463
|
+
dr = tr - state.row
|
|
464
|
+
dc = tc - state.col
|
|
465
|
+
|
|
466
|
+
# Check for agent collision
|
|
467
|
+
if (tr, tc) in state.map.agent_occupancy:
|
|
468
|
+
# Try to go around
|
|
469
|
+
return self._try_alternative_direction(state, target)
|
|
470
|
+
|
|
471
|
+
if dr == -1 and dc == 0:
|
|
472
|
+
return Action(name="move_north")
|
|
473
|
+
elif dr == 1 and dc == 0:
|
|
474
|
+
return Action(name="move_south")
|
|
475
|
+
elif dr == 0 and dc == 1:
|
|
476
|
+
return Action(name="move_east")
|
|
477
|
+
elif dr == 0 and dc == -1:
|
|
478
|
+
return Action(name="move_west")
|
|
479
|
+
|
|
480
|
+
return Action(name="noop")
|
|
481
|
+
|
|
482
|
+
def _try_alternative_direction(self, state: AgentState, target: tuple[int, int]) -> Action:
|
|
483
|
+
"""Try to move around an obstacle or agent toward target.
|
|
484
|
+
|
|
485
|
+
Prefers directions that maintain progress toward the target.
|
|
486
|
+
"""
|
|
487
|
+
# Collect valid moves with their distance to target
|
|
488
|
+
candidates: list[tuple[int, str]] = []
|
|
489
|
+
|
|
490
|
+
for direction in self.DIRECTIONS:
|
|
491
|
+
dr, dc = self.MOVE_DELTAS[direction]
|
|
492
|
+
nr, nc = state.row + dr, state.col + dc
|
|
493
|
+
if self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
|
|
494
|
+
new_dist = abs(target[0] - nr) + abs(target[1] - nc)
|
|
495
|
+
candidates.append((new_dist, direction))
|
|
496
|
+
|
|
497
|
+
if not candidates:
|
|
498
|
+
return Action(name="noop")
|
|
499
|
+
|
|
500
|
+
# Sort by distance to target (prefer moves that get closer)
|
|
501
|
+
candidates.sort(key=lambda x: x[0])
|
|
502
|
+
return Action(name=f"move_{candidates[0][1]}")
|
|
503
|
+
|
|
504
|
+
def _compute_goal_cells(
|
|
505
|
+
self, state: AgentState, target: tuple[int, int], reach_adjacent: bool
|
|
506
|
+
) -> list[tuple[int, int]]:
|
|
507
|
+
"""Compute goal cells for pathfinding using internal map knowledge."""
|
|
508
|
+
if not reach_adjacent:
|
|
509
|
+
return [target]
|
|
510
|
+
|
|
511
|
+
goals = []
|
|
512
|
+
for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
|
|
513
|
+
nr, nc = target[0] + dr, target[1] + dc
|
|
514
|
+
# Accept FREE cells, and also UNKNOWN cells (might be reachable)
|
|
515
|
+
if self._is_traversable(state, nr, nc, allow_unknown=True):
|
|
516
|
+
goals.append((nr, nc))
|
|
517
|
+
|
|
518
|
+
return goals
|
|
519
|
+
|
|
520
|
+
def _shortest_path(
|
|
521
|
+
self,
|
|
522
|
+
state: AgentState,
|
|
523
|
+
start: tuple[int, int],
|
|
524
|
+
goals: list[tuple[int, int]],
|
|
525
|
+
allow_unknown: bool = False,
|
|
526
|
+
avoid_agents: bool = True,
|
|
527
|
+
) -> list[tuple[int, int]]:
|
|
528
|
+
"""A* pathfinding from start to any goal, navigating around agents.
|
|
529
|
+
|
|
530
|
+
Uses the internal map built from previous observations. Prefers known paths
|
|
531
|
+
but can traverse unknown cells if allow_unknown=True.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
state: Agent state with internal map
|
|
535
|
+
start: Starting position
|
|
536
|
+
goals: List of goal positions
|
|
537
|
+
allow_unknown: If True, treat UNKNOWN cells as potentially traversable
|
|
538
|
+
avoid_agents: If True, treat agent positions as obstacles (default True)
|
|
539
|
+
|
|
540
|
+
Note: Goal cells are reachable even if they are obstacles (for walking into objects).
|
|
541
|
+
"""
|
|
542
|
+
goal_set = set(goals)
|
|
543
|
+
if not goals:
|
|
544
|
+
return []
|
|
545
|
+
|
|
546
|
+
# Use minimum manhattan distance to any goal as heuristic
|
|
547
|
+
def heuristic(pos: tuple[int, int]) -> int:
|
|
548
|
+
return min(abs(pos[0] - g[0]) + abs(pos[1] - g[1]) for g in goals)
|
|
549
|
+
|
|
550
|
+
# Priority queue: (f_score, tie_breaker, position)
|
|
551
|
+
# tie_breaker ensures consistent ordering when f_scores are equal
|
|
552
|
+
tie_breaker = 0
|
|
553
|
+
open_set: list[tuple[int, int, tuple[int, int]]] = [(heuristic(start), tie_breaker, start)]
|
|
554
|
+
came_from: dict[tuple[int, int], Optional[tuple[int, int]]] = {start: None}
|
|
555
|
+
g_score: dict[tuple[int, int], int] = {start: 0}
|
|
556
|
+
|
|
557
|
+
while open_set:
|
|
558
|
+
_, _, current = heapq.heappop(open_set)
|
|
559
|
+
|
|
560
|
+
if current in goal_set:
|
|
561
|
+
return self._reconstruct_path(came_from, current)
|
|
562
|
+
|
|
563
|
+
# Skip if we've found a better path to this node already
|
|
564
|
+
current_g = g_score.get(current, float("inf"))
|
|
565
|
+
if isinstance(current_g, float):
|
|
566
|
+
continue
|
|
567
|
+
|
|
568
|
+
for nr, nc in self._get_neighbors(state, current):
|
|
569
|
+
neighbor = (nr, nc)
|
|
570
|
+
|
|
571
|
+
# Allow reaching goal cells even if they're obstacles (objects to use)
|
|
572
|
+
is_goal = neighbor in goal_set
|
|
573
|
+
if not is_goal and not self._is_traversable(
|
|
574
|
+
state, nr, nc, allow_unknown=allow_unknown, check_agents=avoid_agents
|
|
575
|
+
):
|
|
576
|
+
continue
|
|
577
|
+
|
|
578
|
+
tentative_g = current_g + 1
|
|
579
|
+
|
|
580
|
+
if tentative_g < g_score.get(neighbor, float("inf")):
|
|
581
|
+
came_from[neighbor] = current
|
|
582
|
+
g_score[neighbor] = tentative_g
|
|
583
|
+
f_score = tentative_g + heuristic(neighbor)
|
|
584
|
+
tie_breaker += 1
|
|
585
|
+
heapq.heappush(open_set, (f_score, tie_breaker, neighbor))
|
|
586
|
+
|
|
587
|
+
return []
|
|
588
|
+
|
|
589
|
+
def _reconstruct_path(
|
|
590
|
+
self, came_from: dict[tuple[int, int], Optional[tuple[int, int]]], current: tuple[int, int]
|
|
591
|
+
) -> list[tuple[int, int]]:
|
|
592
|
+
"""Reconstruct path from BFS came_from dict."""
|
|
593
|
+
path = []
|
|
594
|
+
while came_from[current] is not None:
|
|
595
|
+
path.append(current)
|
|
596
|
+
prev = came_from[current]
|
|
597
|
+
assert prev is not None
|
|
598
|
+
current = prev
|
|
599
|
+
path.reverse()
|
|
600
|
+
return path
|
|
601
|
+
|
|
602
|
+
def _get_neighbors(self, state: AgentState, pos: tuple[int, int]) -> list[tuple[int, int]]:
|
|
603
|
+
"""Get valid neighboring positions."""
|
|
604
|
+
r, c = pos
|
|
605
|
+
candidates = [(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)]
|
|
606
|
+
return [(nr, nc) for nr, nc in candidates if self._is_in_bounds(state, nr, nc)]
|
|
607
|
+
|
|
608
|
+
def _is_in_bounds(self, state: AgentState, r: int, c: int) -> bool:
|
|
609
|
+
"""Check if position is within map bounds."""
|
|
610
|
+
return 0 <= r < state.map.grid_size and 0 <= c < state.map.grid_size
|
|
611
|
+
|
|
612
|
+
def _is_traversable(
|
|
613
|
+
self, state: AgentState, r: int, c: int, allow_unknown: bool = False, check_agents: bool = True
|
|
614
|
+
) -> bool:
|
|
615
|
+
"""Check if a cell is traversable.
|
|
616
|
+
|
|
617
|
+
Args:
|
|
618
|
+
state: Agent state
|
|
619
|
+
r: Row coordinate
|
|
620
|
+
c: Column coordinate
|
|
621
|
+
allow_unknown: If True, treat UNKNOWN cells as potentially traversable (for exploration)
|
|
622
|
+
check_agents: If True, treat cells with agents as non-traversable (default True)
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
True if the cell can be moved into
|
|
626
|
+
"""
|
|
627
|
+
if not self._is_in_bounds(state, r, c):
|
|
628
|
+
if DEBUG and state.step == 10 and state.agent_id == 0:
|
|
629
|
+
print(f"[NAV] ({r},{c}) out of bounds")
|
|
630
|
+
return False
|
|
631
|
+
|
|
632
|
+
if check_agents:
|
|
633
|
+
pos = (r, c)
|
|
634
|
+
# Check current observation (definite agent position)
|
|
635
|
+
if pos in state.map.agent_occupancy:
|
|
636
|
+
if DEBUG and state.step == 10 and state.agent_id == 0:
|
|
637
|
+
print(f"[NAV] ({r},{c}) has agent (current obs)")
|
|
638
|
+
return False
|
|
639
|
+
|
|
640
|
+
# Check recently-seen agents (may still be there)
|
|
641
|
+
# Only block if agent was seen very recently (within 5 steps)
|
|
642
|
+
if pos in state.map.recent_agents:
|
|
643
|
+
sighting = state.map.recent_agents[pos]
|
|
644
|
+
if state.step - sighting.last_seen_step <= 5:
|
|
645
|
+
if DEBUG and state.step == 10 and state.agent_id == 0:
|
|
646
|
+
print(f"[NAV] ({r},{c}) recent agent ({state.step - sighting.last_seen_step} ago)")
|
|
647
|
+
return False
|
|
648
|
+
|
|
649
|
+
occ = state.map.occupancy[r][c]
|
|
650
|
+
is_free = occ == CellType.FREE.value
|
|
651
|
+
is_unknown = occ == CellType.UNKNOWN.value
|
|
652
|
+
|
|
653
|
+
# Allow traversal if FREE, or if UNKNOWN and exploration is allowed
|
|
654
|
+
traversable = is_free or (allow_unknown and is_unknown)
|
|
655
|
+
|
|
656
|
+
if DEBUG and state.step == 10 and state.agent_id == 0 and not traversable:
|
|
657
|
+
print(f"[NAV] ({r},{c}) occ={occ}, FREE={CellType.FREE.value}, UNKNOWN={CellType.UNKNOWN.value}")
|
|
658
|
+
return traversable
|
|
659
|
+
|
|
660
|
+
def _get_cached_path(
|
|
661
|
+
self, state: AgentState, target: tuple[int, int], reach_adjacent: bool
|
|
662
|
+
) -> Optional[list[tuple[int, int]]]:
|
|
663
|
+
"""Get cached path if still valid.
|
|
664
|
+
|
|
665
|
+
Invalidates the cached path if:
|
|
666
|
+
- Target changed
|
|
667
|
+
- reach_adjacent mode changed
|
|
668
|
+
- Next step in path is blocked (by obstacle or agent)
|
|
669
|
+
- Any cell in the path is now occupied by an agent
|
|
670
|
+
"""
|
|
671
|
+
if (
|
|
672
|
+
state.nav.cached_path
|
|
673
|
+
and state.nav.cached_path_target == target
|
|
674
|
+
and state.nav.cached_path_reach_adjacent == reach_adjacent
|
|
675
|
+
):
|
|
676
|
+
# Check if any cell in the path is blocked by an agent
|
|
677
|
+
for pos in state.nav.cached_path:
|
|
678
|
+
if pos in state.map.agent_occupancy:
|
|
679
|
+
if DEBUG:
|
|
680
|
+
print(f"[A{state.agent_id}] NAV: Cached path blocked by agent at {pos}")
|
|
681
|
+
return None
|
|
682
|
+
|
|
683
|
+
# Check if next step is traversable
|
|
684
|
+
next_pos = state.nav.cached_path[0]
|
|
685
|
+
if self._is_traversable(state, next_pos[0], next_pos[1]):
|
|
686
|
+
return state.nav.cached_path
|
|
687
|
+
return None
|
|
688
|
+
|
|
689
|
+
# === Escape Mode Handling ===
|
|
690
|
+
# Generalized stuck detection and escape for all behaviors
|
|
691
|
+
|
|
692
|
+
def check_and_handle_escape(self, state: AgentState) -> Optional[Action]:
|
|
693
|
+
"""Check if agent is stuck and handle escape mode.
|
|
694
|
+
|
|
695
|
+
This should be called at the start of each behavior's act() method.
|
|
696
|
+
Returns an escape action if in escape mode or stuck, None otherwise.
|
|
697
|
+
|
|
698
|
+
When stuck is detected:
|
|
699
|
+
1. Calculates escape direction away from center of recent positions
|
|
700
|
+
2. Enters escape mode, committing to escape for several steps
|
|
701
|
+
3. Clears navigation state and position history
|
|
702
|
+
|
|
703
|
+
Args:
|
|
704
|
+
state: Agent state
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
Action if escaping, None if not stuck and not in escape mode
|
|
708
|
+
"""
|
|
709
|
+
# Track stuck detection: count consecutive steps at same position
|
|
710
|
+
if state.pos == state.last_position:
|
|
711
|
+
state.steps_at_same_position += 1
|
|
712
|
+
else:
|
|
713
|
+
state.steps_at_same_position = 0
|
|
714
|
+
state.last_position = state.pos
|
|
715
|
+
|
|
716
|
+
# Check if we're already in escape mode
|
|
717
|
+
if state.escape_direction is not None and state.step < state.escape_until_step:
|
|
718
|
+
escape_action = self._execute_escape(state)
|
|
719
|
+
if escape_action:
|
|
720
|
+
return escape_action
|
|
721
|
+
# Escape blocked, end escape mode early
|
|
722
|
+
state.escape_direction = None
|
|
723
|
+
|
|
724
|
+
# Check for stuck patterns
|
|
725
|
+
stuck_reason = self._check_stuck_patterns(state)
|
|
726
|
+
|
|
727
|
+
if stuck_reason:
|
|
728
|
+
if DEBUG:
|
|
729
|
+
print(f"[A{state.agent_id}] NAV: STUCK ({stuck_reason}), entering escape mode")
|
|
730
|
+
state.steps_at_same_position = 0
|
|
731
|
+
|
|
732
|
+
# Clear navigation state to force fresh pathfinding
|
|
733
|
+
state.nav.cached_path = None
|
|
734
|
+
state.nav.cached_path_target = None
|
|
735
|
+
|
|
736
|
+
# Calculate escape direction - move AWAY from center of recent positions
|
|
737
|
+
escape_direction = self._calculate_escape_direction(state)
|
|
738
|
+
|
|
739
|
+
# Enter escape mode
|
|
740
|
+
state.escape_direction = escape_direction
|
|
741
|
+
state.escape_until_step = state.step + self.ESCAPE_COMMITMENT_STEPS
|
|
742
|
+
|
|
743
|
+
# Clear position history for fresh stuck detection after escape
|
|
744
|
+
state.nav.position_history.clear()
|
|
745
|
+
|
|
746
|
+
if DEBUG:
|
|
747
|
+
print(f"[A{state.agent_id}] NAV: Escaping {escape_direction} for {self.ESCAPE_COMMITMENT_STEPS} steps")
|
|
748
|
+
|
|
749
|
+
# Execute the first escape step
|
|
750
|
+
escape_action = self._execute_escape(state)
|
|
751
|
+
if escape_action:
|
|
752
|
+
return escape_action
|
|
753
|
+
|
|
754
|
+
# Escape direction completely blocked, clear escape mode
|
|
755
|
+
state.escape_direction = None
|
|
756
|
+
|
|
757
|
+
return None
|
|
758
|
+
|
|
759
|
+
def _check_stuck_patterns(self, state: AgentState) -> Optional[str]:
|
|
760
|
+
"""Check for various stuck patterns.
|
|
761
|
+
|
|
762
|
+
Returns a reason string if stuck, None otherwise.
|
|
763
|
+
"""
|
|
764
|
+
# Check 1: Same position for too long
|
|
765
|
+
if state.steps_at_same_position >= self.STUCK_THRESHOLD:
|
|
766
|
+
return f"same_pos_{state.steps_at_same_position}"
|
|
767
|
+
|
|
768
|
+
# Check 2: Circular pattern - revisiting positions in recent history
|
|
769
|
+
if len(state.nav.position_history) >= 10:
|
|
770
|
+
recent_history = state.nav.position_history[-self.POSITION_HISTORY_SIZE :]
|
|
771
|
+
current_pos = state.pos
|
|
772
|
+
revisit_count = recent_history.count(current_pos)
|
|
773
|
+
if revisit_count >= self.CIRCULAR_STUCK_THRESHOLD:
|
|
774
|
+
return f"circular_{revisit_count}x"
|
|
775
|
+
|
|
776
|
+
# Check 3: Too few unique positions in recent history (tight circles)
|
|
777
|
+
if len(state.nav.position_history) >= self.TIGHT_LOOP_HISTORY:
|
|
778
|
+
recent = state.nav.position_history[-self.TIGHT_LOOP_HISTORY :]
|
|
779
|
+
unique_positions = len(set(recent))
|
|
780
|
+
if unique_positions <= self.TIGHT_LOOP_UNIQUE_MIN:
|
|
781
|
+
return f"tight_loop_{unique_positions}_unique"
|
|
782
|
+
|
|
783
|
+
return None
|
|
784
|
+
|
|
785
|
+
def _execute_escape(self, state: AgentState) -> Optional[Action]:
|
|
786
|
+
"""Execute one step of escape movement.
|
|
787
|
+
|
|
788
|
+
Tries to move in the escape direction, with fallbacks to perpendicular directions.
|
|
789
|
+
Returns None if completely blocked.
|
|
790
|
+
"""
|
|
791
|
+
if state.escape_direction is None:
|
|
792
|
+
return None
|
|
793
|
+
|
|
794
|
+
escape_dir = state.escape_direction
|
|
795
|
+
dr, dc = self.MOVE_DELTAS[escape_dir]
|
|
796
|
+
|
|
797
|
+
# Try primary escape direction
|
|
798
|
+
nr, nc = state.row + dr, state.col + dc
|
|
799
|
+
if self._is_traversable(state, nr, nc, allow_unknown=True, check_agents=True):
|
|
800
|
+
return Action(name=f"move_{escape_dir}")
|
|
801
|
+
|
|
802
|
+
# Primary blocked - try perpendicular directions
|
|
803
|
+
if escape_dir in ("north", "south"):
|
|
804
|
+
perpendicular = ["east", "west"]
|
|
805
|
+
else:
|
|
806
|
+
perpendicular = ["north", "south"]
|
|
807
|
+
|
|
808
|
+
random.shuffle(perpendicular)
|
|
809
|
+
for alt_dir in perpendicular:
|
|
810
|
+
alt_dr, alt_dc = self.MOVE_DELTAS[alt_dir]
|
|
811
|
+
alt_r, alt_c = state.row + alt_dr, state.col + alt_dc
|
|
812
|
+
if self._is_traversable(state, alt_r, alt_c, allow_unknown=True, check_agents=True):
|
|
813
|
+
return Action(name=f"move_{alt_dir}")
|
|
814
|
+
|
|
815
|
+
# Try opposite direction as last resort
|
|
816
|
+
opposite = {"north": "south", "south": "north", "east": "west", "west": "east"}
|
|
817
|
+
opp_dir = opposite[escape_dir]
|
|
818
|
+
opp_dr, opp_dc = self.MOVE_DELTAS[opp_dir]
|
|
819
|
+
opp_r, opp_c = state.row + opp_dr, state.col + opp_dc
|
|
820
|
+
if self._is_traversable(state, opp_r, opp_c, allow_unknown=True, check_agents=True):
|
|
821
|
+
# Switch escape direction since we're blocked
|
|
822
|
+
state.escape_direction = opp_dir
|
|
823
|
+
return Action(name=f"move_{opp_dir}")
|
|
824
|
+
|
|
825
|
+
return None
|
|
826
|
+
|
|
827
|
+
def _calculate_escape_direction(self, state: AgentState) -> str:
|
|
828
|
+
"""Calculate the best direction to escape from a stuck position.
|
|
829
|
+
|
|
830
|
+
Strategy: Move AWAY from the center of mass of recent positions.
|
|
831
|
+
This prevents oscillating back into the same area.
|
|
832
|
+
"""
|
|
833
|
+
history = state.nav.position_history
|
|
834
|
+
if len(history) < 3:
|
|
835
|
+
return random.choice(self.DIRECTIONS)
|
|
836
|
+
|
|
837
|
+
# Calculate center of mass of recent positions
|
|
838
|
+
recent = history[-min(len(history), 15) :]
|
|
839
|
+
avg_row = sum(pos[0] for pos in recent) / len(recent)
|
|
840
|
+
avg_col = sum(pos[1] for pos in recent) / len(recent)
|
|
841
|
+
|
|
842
|
+
# Calculate direction away from center of mass
|
|
843
|
+
dr = state.row - avg_row
|
|
844
|
+
dc = state.col - avg_col
|
|
845
|
+
|
|
846
|
+
# Escape perpendicular to our oscillation axis
|
|
847
|
+
if abs(dr) < abs(dc):
|
|
848
|
+
# Oscillating more east-west, escape north or south
|
|
849
|
+
return "south" if dr >= 0 else "north"
|
|
850
|
+
else:
|
|
851
|
+
# Oscillating more north-south, escape east or west
|
|
852
|
+
return "east" if dc >= 0 else "west"
|
|
853
|
+
|
|
854
|
+
def get_escape_debug_info(self, state: AgentState, stuck_reason: str = "") -> dict:
|
|
855
|
+
"""Get debug info dict for escape mode.
|
|
856
|
+
|
|
857
|
+
Useful for behaviors to populate their debug_info when escaping.
|
|
858
|
+
"""
|
|
859
|
+
return {
|
|
860
|
+
"mode": "escape",
|
|
861
|
+
"goal": f"escape_{state.escape_direction}" if state.escape_direction else "escape",
|
|
862
|
+
"target_object": "-",
|
|
863
|
+
"signal": stuck_reason or f"until_step_{state.escape_until_step}",
|
|
864
|
+
}
|