cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cogames_agents/__init__.py +0 -0
- cogames_agents/evals/__init__.py +5 -0
- cogames_agents/evals/planky_evals.py +415 -0
- cogames_agents/policy/__init__.py +0 -0
- cogames_agents/policy/evolution/__init__.py +0 -0
- cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
- cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
- cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
- cogames_agents/policy/nim_agents/__init__.py +20 -0
- cogames_agents/policy/nim_agents/agents.py +98 -0
- cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
- cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
- cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
- cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
- cogames_agents/policy/nim_agents/common.nim +1054 -0
- cogames_agents/policy/nim_agents/install.sh +1 -0
- cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
- cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
- cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
- cogames_agents/policy/nim_agents/nimby.lock +3 -0
- cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
- cogames_agents/policy/nim_agents/random_agents.nim +68 -0
- cogames_agents/policy/nim_agents/test_agents.py +53 -0
- cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
- cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
- cogames_agents/policy/scripted_agent/README.md +360 -0
- cogames_agents/policy/scripted_agent/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
- cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
- cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
- cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
- cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
- cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
- cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
- cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
- cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
- cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
- cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
- cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
- cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
- cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
- cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
- cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
- cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
- cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
- cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
- cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
- cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
- cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
- cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
- cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
- cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
- cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
- cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
- cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
- cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
- cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
- cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
- cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
- cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
- cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
- cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
- cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
- cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
- cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
- cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
- cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
- cogames_agents/policy/scripted_agent/common/roles.py +34 -0
- cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
- cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
- cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
- cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
- cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
- cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
- cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
- cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
- cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
- cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
- cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
- cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
- cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
- cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
- cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
- cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
- cogames_agents/policy/scripted_agent/planky/README.md +214 -0
- cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
- cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/planky/context.py +68 -0
- cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
- cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
- cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
- cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
- cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
- cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
- cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
- cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
- cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
- cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
- cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
- cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
- cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
- cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
- cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
- cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
- cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
- cogames_agents/policy/scripted_agent/types.py +239 -0
- cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
- cogames_agents/policy/scripted_agent/utils.py +381 -0
- cogames_agents/policy/scripted_registry.py +80 -0
- cogames_agents/py.typed +0 -0
- cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
- cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
- cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
- cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Planky Policy — goal-tree scripted agent.
|
|
3
|
+
|
|
4
|
+
PlankyBrain coordinates per-agent state and goal evaluation.
|
|
5
|
+
PlankyPolicy is the multi-agent wrapper with URI-based role distribution.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from mettagrid.mettagrid_c import dtype_actions
|
|
15
|
+
from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
|
|
16
|
+
from mettagrid.policy.policy_env_interface import PolicyEnvInterface
|
|
17
|
+
from mettagrid.simulator import Action, ObservationToken
|
|
18
|
+
from mettagrid.simulator.interface import AgentObservation
|
|
19
|
+
|
|
20
|
+
from .context import PlankyContext, StateSnapshot
|
|
21
|
+
from .entity_map import EntityMap
|
|
22
|
+
from .goal import Goal, evaluate_goals
|
|
23
|
+
from .goals.aligner import AlignJunctionGoal, GetAlignerGearGoal
|
|
24
|
+
from .goals.miner import DepositCargoGoal, ExploreHubGoal, GetMinerGearGoal, MineResourceGoal, PickResourceGoal
|
|
25
|
+
from .goals.scout import ExploreGoal, GetScoutGearGoal
|
|
26
|
+
from .goals.scrambler import GetScramblerGearGoal, ScrambleJunctionGoal
|
|
27
|
+
from .goals.shared import FallbackMineGoal, GetHeartsGoal
|
|
28
|
+
from .goals.stem import SelectRoleGoal
|
|
29
|
+
from .goals.survive import SurviveGoal
|
|
30
|
+
from .navigator import Navigator
|
|
31
|
+
from .obs_parser import ObsParser
|
|
32
|
+
from .trace import TraceLog
|
|
33
|
+
|
|
34
|
+
# Role vibes that map to roles
|
|
35
|
+
VIBE_TO_ROLE = {"miner", "scout", "aligner", "scrambler"}
|
|
36
|
+
|
|
37
|
+
# Default spawn position (center of 200x200 grid)
|
|
38
|
+
SPAWN_POS = (100, 100)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _make_goal_list(role: str) -> list[Goal]:
|
|
42
|
+
"""Create goal list for a role."""
|
|
43
|
+
if role == "miner":
|
|
44
|
+
return [
|
|
45
|
+
SurviveGoal(hp_threshold=15),
|
|
46
|
+
ExploreHubGoal(),
|
|
47
|
+
GetMinerGearGoal(),
|
|
48
|
+
PickResourceGoal(),
|
|
49
|
+
DepositCargoGoal(),
|
|
50
|
+
MineResourceGoal(),
|
|
51
|
+
]
|
|
52
|
+
elif role == "scout":
|
|
53
|
+
return [
|
|
54
|
+
SurviveGoal(hp_threshold=50),
|
|
55
|
+
GetScoutGearGoal(),
|
|
56
|
+
ExploreGoal(),
|
|
57
|
+
]
|
|
58
|
+
elif role == "aligner":
|
|
59
|
+
# Aligners NEED gear + heart to align junctions.
|
|
60
|
+
# Hearts require gear first — don't waste resources on hearts without gear.
|
|
61
|
+
# FallbackMine at end: mine resources when can't get gear/hearts.
|
|
62
|
+
return [
|
|
63
|
+
SurviveGoal(hp_threshold=50),
|
|
64
|
+
GetAlignerGearGoal(),
|
|
65
|
+
GetHeartsGoal(),
|
|
66
|
+
AlignJunctionGoal(),
|
|
67
|
+
FallbackMineGoal(),
|
|
68
|
+
]
|
|
69
|
+
elif role == "scrambler":
|
|
70
|
+
# Scramblers NEED gear + heart to scramble junctions.
|
|
71
|
+
# FallbackMine at end: mine resources when can't get gear/hearts.
|
|
72
|
+
return [
|
|
73
|
+
SurviveGoal(hp_threshold=30),
|
|
74
|
+
GetScramblerGearGoal(),
|
|
75
|
+
GetHeartsGoal(),
|
|
76
|
+
ScrambleJunctionGoal(),
|
|
77
|
+
FallbackMineGoal(),
|
|
78
|
+
]
|
|
79
|
+
elif role == "stem":
|
|
80
|
+
return [
|
|
81
|
+
SurviveGoal(hp_threshold=20),
|
|
82
|
+
SelectRoleGoal(),
|
|
83
|
+
]
|
|
84
|
+
else:
|
|
85
|
+
# Default/inactive
|
|
86
|
+
return []
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class PlankyAgentState:
|
|
90
|
+
"""Persistent state for a Planky agent across ticks."""
|
|
91
|
+
|
|
92
|
+
def __init__(self, agent_id: int, role: str, goals: list[Goal]) -> None:
|
|
93
|
+
self.agent_id = agent_id
|
|
94
|
+
self.role = role
|
|
95
|
+
self.goals = goals
|
|
96
|
+
self.entity_map = EntityMap()
|
|
97
|
+
self.navigator = Navigator()
|
|
98
|
+
self.blackboard: dict[str, Any] = {}
|
|
99
|
+
self.step = 0
|
|
100
|
+
self.my_collective_id: int | None = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class PlankyBrain(StatefulPolicyImpl[PlankyAgentState]):
|
|
104
|
+
"""Per-agent coordinator that owns state and evaluates the goal tree."""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
policy_env_info: PolicyEnvInterface,
|
|
109
|
+
agent_id: int,
|
|
110
|
+
role: str,
|
|
111
|
+
trace_enabled: bool = False,
|
|
112
|
+
trace_level: int = 1,
|
|
113
|
+
trace_agent: int = -1,
|
|
114
|
+
) -> None:
|
|
115
|
+
self._agent_id = agent_id
|
|
116
|
+
self._policy_env_info = policy_env_info
|
|
117
|
+
self._role = role
|
|
118
|
+
self._obs_parser = ObsParser(policy_env_info)
|
|
119
|
+
self._action_names = policy_env_info.action_names
|
|
120
|
+
|
|
121
|
+
# Tracing
|
|
122
|
+
self._trace_enabled = trace_enabled
|
|
123
|
+
self._trace_level = trace_level
|
|
124
|
+
self._trace_agent = trace_agent # -1 = trace all
|
|
125
|
+
|
|
126
|
+
def initial_agent_state(self) -> PlankyAgentState:
|
|
127
|
+
goals = _make_goal_list(self._role)
|
|
128
|
+
return PlankyAgentState(
|
|
129
|
+
agent_id=self._agent_id,
|
|
130
|
+
role=self._role,
|
|
131
|
+
goals=goals,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def step_with_state(self, obs: AgentObservation, agent_state: PlankyAgentState) -> tuple[Action, PlankyAgentState]:
|
|
135
|
+
agent_state.step += 1
|
|
136
|
+
|
|
137
|
+
# Parse observation
|
|
138
|
+
state, visible_entities = self._obs_parser.parse(obs, agent_state.step, SPAWN_POS)
|
|
139
|
+
|
|
140
|
+
# Update entity map
|
|
141
|
+
agent_state.entity_map.update_from_observation(
|
|
142
|
+
agent_pos=state.position,
|
|
143
|
+
obs_half_height=self._obs_parser.obs_half_height,
|
|
144
|
+
obs_half_width=self._obs_parser.obs_half_width,
|
|
145
|
+
visible_entities=visible_entities,
|
|
146
|
+
step=agent_state.step,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Detect own collective_id from nearest hub (once)
|
|
150
|
+
if agent_state.my_collective_id is None:
|
|
151
|
+
hub = agent_state.entity_map.find_nearest(state.position, type_contains="hub")
|
|
152
|
+
if hub is not None:
|
|
153
|
+
_, hub_entity = hub
|
|
154
|
+
cid = hub_entity.properties.get("collective_id")
|
|
155
|
+
if cid is not None:
|
|
156
|
+
agent_state.my_collective_id = cid
|
|
157
|
+
|
|
158
|
+
# Detect useful actions by comparing state changes
|
|
159
|
+
# Useful = mined resources, deposited to collective, aligned/scrambled junction
|
|
160
|
+
self._detect_useful_action(state, agent_state)
|
|
161
|
+
|
|
162
|
+
# Detect failed moves: if last action was a move but position didn't change
|
|
163
|
+
last_pos = agent_state.blackboard.get("_last_pos")
|
|
164
|
+
last_action = agent_state.blackboard.get("_last_action", "")
|
|
165
|
+
if last_pos is not None and last_action.startswith("move_") and state.position == last_pos:
|
|
166
|
+
# Move failed - track consecutive failures
|
|
167
|
+
fail_count = agent_state.blackboard.get("_move_fail_count", 0) + 1
|
|
168
|
+
agent_state.blackboard["_move_fail_count"] = fail_count
|
|
169
|
+
|
|
170
|
+
# After 3 consecutive failed moves, clear navigation cache and targets
|
|
171
|
+
if fail_count >= 3:
|
|
172
|
+
agent_state.navigator._cached_path = None
|
|
173
|
+
agent_state.navigator._cached_target = None
|
|
174
|
+
# Clear any target resource selection to force re-evaluation
|
|
175
|
+
if fail_count >= 6:
|
|
176
|
+
agent_state.blackboard.pop("target_resource", None)
|
|
177
|
+
agent_state.blackboard["_move_fail_count"] = 0
|
|
178
|
+
else:
|
|
179
|
+
agent_state.blackboard["_move_fail_count"] = 0
|
|
180
|
+
|
|
181
|
+
agent_state.blackboard["_last_pos"] = state.position
|
|
182
|
+
|
|
183
|
+
# Vibe-driven role system: agent's role IS their vibe
|
|
184
|
+
# "default" → set initial role vibe
|
|
185
|
+
# "gear" → stem mode (role selection)
|
|
186
|
+
# any valid role → run that role's goals
|
|
187
|
+
|
|
188
|
+
# Check if goals want to change role (via blackboard)
|
|
189
|
+
if "change_role" in agent_state.blackboard:
|
|
190
|
+
new_role = agent_state.blackboard.pop("change_role")
|
|
191
|
+
if new_role in VIBE_TO_ROLE:
|
|
192
|
+
return Action(name=f"change_vibe_{new_role}"), agent_state
|
|
193
|
+
|
|
194
|
+
# Map vibe to role
|
|
195
|
+
current_vibe = state.vibe
|
|
196
|
+
if current_vibe == "default":
|
|
197
|
+
if self._role in VIBE_TO_ROLE:
|
|
198
|
+
# Non-stem agent: set initial role vibe
|
|
199
|
+
return Action(name=f"change_vibe_{self._role}"), agent_state
|
|
200
|
+
else:
|
|
201
|
+
# Stem agent: default vibe = stem mode
|
|
202
|
+
effective_role = "stem"
|
|
203
|
+
elif current_vibe == "gear":
|
|
204
|
+
# Gear vibe = stem mode (role selection)
|
|
205
|
+
effective_role = "stem"
|
|
206
|
+
elif current_vibe in VIBE_TO_ROLE:
|
|
207
|
+
effective_role = current_vibe
|
|
208
|
+
else:
|
|
209
|
+
if self._role in VIBE_TO_ROLE:
|
|
210
|
+
return Action(name=f"change_vibe_{self._role}"), agent_state
|
|
211
|
+
effective_role = "stem"
|
|
212
|
+
|
|
213
|
+
# Update goals if role changed
|
|
214
|
+
if effective_role != agent_state.role:
|
|
215
|
+
if self._should_trace(agent_state):
|
|
216
|
+
print(f"[planky][t={agent_state.step} a={self._agent_id}] role: {agent_state.role}→{effective_role}")
|
|
217
|
+
agent_state.role = effective_role
|
|
218
|
+
agent_state.goals = _make_goal_list(effective_role)
|
|
219
|
+
|
|
220
|
+
# Build context
|
|
221
|
+
should_trace = self._should_trace(agent_state)
|
|
222
|
+
trace = TraceLog() if should_trace else None
|
|
223
|
+
|
|
224
|
+
# Calculate steps since last useful action
|
|
225
|
+
last_useful = agent_state.blackboard.get("_last_useful_step", 0)
|
|
226
|
+
steps_since_useful = agent_state.step - last_useful
|
|
227
|
+
if trace:
|
|
228
|
+
trace.steps_since_useful = steps_since_useful
|
|
229
|
+
|
|
230
|
+
# If we've been idle too long (100+ steps), force a reset of cached state
|
|
231
|
+
# This helps break out of stuck loops
|
|
232
|
+
if steps_since_useful >= 100 and steps_since_useful % 50 == 0:
|
|
233
|
+
# Clear cached navigation and target selections
|
|
234
|
+
agent_state.navigator._cached_path = None
|
|
235
|
+
agent_state.navigator._cached_target = None
|
|
236
|
+
agent_state.blackboard.pop("target_resource", None)
|
|
237
|
+
if trace:
|
|
238
|
+
trace.activate("IdleReset", f"clearing cache after {steps_since_useful} idle steps")
|
|
239
|
+
|
|
240
|
+
ctx = PlankyContext(
|
|
241
|
+
state=state,
|
|
242
|
+
map=agent_state.entity_map,
|
|
243
|
+
blackboard=agent_state.blackboard,
|
|
244
|
+
navigator=agent_state.navigator,
|
|
245
|
+
trace=trace,
|
|
246
|
+
action_names=self._action_names,
|
|
247
|
+
agent_id=self._agent_id,
|
|
248
|
+
step=agent_state.step,
|
|
249
|
+
my_collective_id=agent_state.my_collective_id,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# If we're stuck (many failed moves), force exploration to discover terrain
|
|
253
|
+
fail_count = agent_state.blackboard.get("_move_fail_count", 0)
|
|
254
|
+
if fail_count >= 6:
|
|
255
|
+
action = agent_state.navigator.explore(
|
|
256
|
+
state.position,
|
|
257
|
+
agent_state.entity_map,
|
|
258
|
+
direction_bias=["north", "east", "south", "west"][self._agent_id % 4],
|
|
259
|
+
)
|
|
260
|
+
if trace:
|
|
261
|
+
trace.active_goal_chain = f"ForceExplore(stuck={fail_count})"
|
|
262
|
+
trace.action_name = action.name
|
|
263
|
+
else:
|
|
264
|
+
# Evaluate goals normally
|
|
265
|
+
action = evaluate_goals(agent_state.goals, ctx)
|
|
266
|
+
|
|
267
|
+
# DEBUG: dump all entity types for agent 0
|
|
268
|
+
if self._agent_id == 0 and agent_state.step == 25:
|
|
269
|
+
all_types = {}
|
|
270
|
+
for p, e in agent_state.entity_map.entities.items():
|
|
271
|
+
t = e.type
|
|
272
|
+
if t not in all_types:
|
|
273
|
+
all_types[t] = []
|
|
274
|
+
all_types[t].append((p, e.properties.get("alignment")))
|
|
275
|
+
for t, entries in sorted(all_types.items()):
|
|
276
|
+
print(f"[planky-debug] type={t}: {entries[:5]}")
|
|
277
|
+
|
|
278
|
+
# Emit trace
|
|
279
|
+
if trace:
|
|
280
|
+
line = trace.format_line(
|
|
281
|
+
step=agent_state.step,
|
|
282
|
+
agent_id=self._agent_id,
|
|
283
|
+
role=agent_state.role,
|
|
284
|
+
pos=state.position,
|
|
285
|
+
hp=state.hp,
|
|
286
|
+
level=self._trace_level,
|
|
287
|
+
)
|
|
288
|
+
print(f"[planky] {line}")
|
|
289
|
+
# Log collective resources and entity map info
|
|
290
|
+
if agent_state.step % 25 == 0 or agent_state.step == 3:
|
|
291
|
+
print(
|
|
292
|
+
f"[planky][t={agent_state.step} a={self._agent_id}] "
|
|
293
|
+
f"collective: C={state.collective_carbon} O={state.collective_oxygen} "
|
|
294
|
+
f"G={state.collective_germanium} S={state.collective_silicon} "
|
|
295
|
+
f"cargo={state.cargo_total}/{state.cargo_capacity} "
|
|
296
|
+
f"energy={state.energy}"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Track action for failed-move detection
|
|
300
|
+
agent_state.blackboard["_last_action"] = action.name
|
|
301
|
+
|
|
302
|
+
return action, agent_state
|
|
303
|
+
|
|
304
|
+
def _should_trace(self, agent_state: PlankyAgentState) -> bool:
|
|
305
|
+
if not self._trace_enabled:
|
|
306
|
+
return False
|
|
307
|
+
if self._trace_agent >= 0 and self._agent_id != self._trace_agent:
|
|
308
|
+
return False
|
|
309
|
+
return True
|
|
310
|
+
|
|
311
|
+
def _detect_useful_action(self, state: StateSnapshot, agent_state: PlankyAgentState) -> None:
|
|
312
|
+
"""Detect if a useful action occurred by comparing state changes.
|
|
313
|
+
|
|
314
|
+
Useful actions:
|
|
315
|
+
- Mine: cargo increased
|
|
316
|
+
- Deposit: cargo decreased AND collective increased
|
|
317
|
+
- Align/Scramble: heart decreased (spent on junction action)
|
|
318
|
+
- Got gear: gear flag changed
|
|
319
|
+
- Got heart: heart count increased
|
|
320
|
+
"""
|
|
321
|
+
bb = agent_state.blackboard
|
|
322
|
+
|
|
323
|
+
# Get previous state values
|
|
324
|
+
prev_cargo = bb.get("_prev_cargo", 0)
|
|
325
|
+
prev_heart = bb.get("_prev_heart", 0)
|
|
326
|
+
prev_collective_total = bb.get("_prev_collective_total", 0)
|
|
327
|
+
|
|
328
|
+
# Calculate current values
|
|
329
|
+
current_cargo = state.cargo_total
|
|
330
|
+
current_heart = state.heart
|
|
331
|
+
current_collective = (
|
|
332
|
+
state.collective_carbon + state.collective_oxygen + state.collective_germanium + state.collective_silicon
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Detect useful actions
|
|
336
|
+
useful = False
|
|
337
|
+
|
|
338
|
+
# Mined resources (cargo increased)
|
|
339
|
+
if current_cargo > prev_cargo:
|
|
340
|
+
useful = True
|
|
341
|
+
|
|
342
|
+
# Deposited resources (cargo decreased, collective increased)
|
|
343
|
+
if current_cargo < prev_cargo and current_collective > prev_collective_total:
|
|
344
|
+
useful = True
|
|
345
|
+
|
|
346
|
+
# Got a heart (heart increased)
|
|
347
|
+
if current_heart > prev_heart:
|
|
348
|
+
useful = True
|
|
349
|
+
|
|
350
|
+
# Spent a heart on align/scramble (heart decreased)
|
|
351
|
+
if current_heart < prev_heart:
|
|
352
|
+
useful = True
|
|
353
|
+
|
|
354
|
+
# Update tracking
|
|
355
|
+
if useful:
|
|
356
|
+
bb["_last_useful_step"] = agent_state.step
|
|
357
|
+
|
|
358
|
+
# Store current values for next tick comparison
|
|
359
|
+
bb["_prev_cargo"] = current_cargo
|
|
360
|
+
bb["_prev_heart"] = current_heart
|
|
361
|
+
bb["_prev_collective_total"] = current_collective
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class PlankyPolicy(MultiAgentPolicy):
|
|
365
|
+
"""Multi-agent goal-tree policy with URI-based role distribution.
|
|
366
|
+
|
|
367
|
+
URI parameters:
|
|
368
|
+
?miner=4&scout=0&aligner=2&scrambler=4 — role counts
|
|
369
|
+
?trace=1&trace_level=2&trace_agent=0 — tracing
|
|
370
|
+
"""
|
|
371
|
+
|
|
372
|
+
short_names = ["planky"]
|
|
373
|
+
|
|
374
|
+
def __init__(
|
|
375
|
+
self,
|
|
376
|
+
policy_env_info: PolicyEnvInterface,
|
|
377
|
+
device: str = "cpu",
|
|
378
|
+
# Role counts — if stem > 0, defaults to all-stem unless explicit roles given
|
|
379
|
+
miner: int = -1,
|
|
380
|
+
scout: int = 0,
|
|
381
|
+
aligner: int = -1,
|
|
382
|
+
scrambler: int = -1,
|
|
383
|
+
stem: int = 0,
|
|
384
|
+
# Tracing
|
|
385
|
+
trace: int = 0,
|
|
386
|
+
trace_level: int = 1,
|
|
387
|
+
trace_agent: int = -1,
|
|
388
|
+
# Accept any extra kwargs
|
|
389
|
+
**kwargs: object,
|
|
390
|
+
) -> None:
|
|
391
|
+
super().__init__(policy_env_info, device=device)
|
|
392
|
+
self._feature_by_id = {f.id: f for f in policy_env_info.obs_features}
|
|
393
|
+
self._action_name_to_index = {name: idx for idx, name in enumerate(policy_env_info.action_names)}
|
|
394
|
+
print(f"[planky] Action names: {list(policy_env_info.action_names)}")
|
|
395
|
+
self._noop_action_value = dtype_actions.type(self._action_name_to_index.get("noop", 0))
|
|
396
|
+
|
|
397
|
+
# Tracing
|
|
398
|
+
self._trace_enabled = bool(trace)
|
|
399
|
+
self._trace_level = trace_level
|
|
400
|
+
self._trace_agent = trace_agent
|
|
401
|
+
|
|
402
|
+
# Resolve defaults: if stem > 0 and miner/aligner/scrambler not explicitly set, zero them
|
|
403
|
+
if stem > 0:
|
|
404
|
+
if miner == -1:
|
|
405
|
+
miner = 0
|
|
406
|
+
if aligner == -1:
|
|
407
|
+
aligner = 0
|
|
408
|
+
if scrambler == -1:
|
|
409
|
+
scrambler = 0
|
|
410
|
+
else:
|
|
411
|
+
if miner == -1:
|
|
412
|
+
miner = 3
|
|
413
|
+
if aligner == -1:
|
|
414
|
+
aligner = 5
|
|
415
|
+
if scrambler == -1:
|
|
416
|
+
scrambler = 0
|
|
417
|
+
|
|
418
|
+
# Build per-team role distribution
|
|
419
|
+
team_roles: list[str] = []
|
|
420
|
+
team_roles.extend(["miner"] * miner)
|
|
421
|
+
team_roles.extend(["scout"] * scout)
|
|
422
|
+
team_roles.extend(["aligner"] * aligner)
|
|
423
|
+
team_roles.extend(["scrambler"] * scrambler)
|
|
424
|
+
team_roles.extend(["stem"] * stem)
|
|
425
|
+
|
|
426
|
+
# Tile the role distribution to cover all agents (supports multi-team setups).
|
|
427
|
+
num_agents = policy_env_info.num_agents
|
|
428
|
+
team_size = len(team_roles) if team_roles else 1
|
|
429
|
+
num_teams = max(1, (num_agents + team_size - 1) // team_size)
|
|
430
|
+
self._role_distribution: list[str] = (team_roles * num_teams)[:num_agents]
|
|
431
|
+
|
|
432
|
+
if self._trace_enabled:
|
|
433
|
+
print(f"[planky] Role distribution ({num_teams} teams): {self._role_distribution}")
|
|
434
|
+
|
|
435
|
+
self._agent_policies: dict[int, StatefulAgentPolicy[PlankyAgentState]] = {}
|
|
436
|
+
|
|
437
|
+
def agent_policy(self, agent_id: int) -> StatefulAgentPolicy[PlankyAgentState]:
|
|
438
|
+
if agent_id not in self._agent_policies:
|
|
439
|
+
role = self._role_distribution[agent_id] if agent_id < len(self._role_distribution) else "default"
|
|
440
|
+
|
|
441
|
+
brain = PlankyBrain(
|
|
442
|
+
policy_env_info=self._policy_env_info,
|
|
443
|
+
agent_id=agent_id,
|
|
444
|
+
role=role,
|
|
445
|
+
trace_enabled=self._trace_enabled,
|
|
446
|
+
trace_level=self._trace_level,
|
|
447
|
+
trace_agent=self._trace_agent,
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
self._agent_policies[agent_id] = StatefulAgentPolicy(
|
|
451
|
+
brain,
|
|
452
|
+
self._policy_env_info,
|
|
453
|
+
agent_id=agent_id,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
return self._agent_policies[agent_id]
|
|
457
|
+
|
|
458
|
+
def step_batch(self, raw_observations: np.ndarray, raw_actions: np.ndarray) -> None:
|
|
459
|
+
raw_actions[...] = self._noop_action_value
|
|
460
|
+
num_agents = min(raw_observations.shape[0], self._policy_env_info.num_agents)
|
|
461
|
+
for agent_id in range(num_agents):
|
|
462
|
+
obs = self._raw_obs_to_agent_obs(agent_id, raw_observations[agent_id])
|
|
463
|
+
action = self.agent_policy(agent_id).step(obs)
|
|
464
|
+
action_index = self._action_name_to_index.get(action.name, 0)
|
|
465
|
+
raw_actions[agent_id] = dtype_actions.type(action_index)
|
|
466
|
+
|
|
467
|
+
def _raw_obs_to_agent_obs(self, agent_id: int, raw_obs: np.ndarray) -> AgentObservation:
|
|
468
|
+
tokens: list[ObservationToken] = []
|
|
469
|
+
for token in raw_obs:
|
|
470
|
+
feature_id = int(token[1])
|
|
471
|
+
if feature_id == 0xFF:
|
|
472
|
+
break
|
|
473
|
+
feature = self._feature_by_id.get(feature_id)
|
|
474
|
+
if feature is None:
|
|
475
|
+
continue
|
|
476
|
+
location_packed = int(token[0])
|
|
477
|
+
value = int(token[2])
|
|
478
|
+
tokens.append(
|
|
479
|
+
ObservationToken(
|
|
480
|
+
feature=feature,
|
|
481
|
+
value=value,
|
|
482
|
+
raw_token=(location_packed, feature_id, value),
|
|
483
|
+
)
|
|
484
|
+
)
|
|
485
|
+
return AgentObservation(agent_id=agent_id, tokens=tokens)
|
|
File without changes
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Shared fixtures for Planky capability tests."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from cogames_agents.policy.scripted_agent.planky.tests.helpers import (
|
|
6
|
+
EpisodeResult,
|
|
7
|
+
run_planky_episode,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
MISSION = "cogsguard_machina_1.basic"
|
|
11
|
+
DEFAULT_STEPS = 500
|
|
12
|
+
DEFAULT_SEED = 42
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture(scope="module")
|
|
16
|
+
def miner_episode() -> EpisodeResult:
|
|
17
|
+
return run_planky_episode(
|
|
18
|
+
policy_uri="metta://policy/planky?miner=1&aligner=0&trace=1&trace_level=2&trace_agent=0",
|
|
19
|
+
mission=MISSION,
|
|
20
|
+
steps=DEFAULT_STEPS,
|
|
21
|
+
seed=DEFAULT_SEED,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture(scope="module")
|
|
26
|
+
def aligner_episode() -> EpisodeResult:
|
|
27
|
+
"""Aligner needs miners to fund gear + hearts."""
|
|
28
|
+
return run_planky_episode(
|
|
29
|
+
policy_uri="metta://policy/planky?miner=4&aligner=1&scrambler=0&trace=1&trace_level=2&trace_agent=4",
|
|
30
|
+
mission=MISSION,
|
|
31
|
+
steps=DEFAULT_STEPS,
|
|
32
|
+
seed=DEFAULT_SEED,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@pytest.fixture(scope="module")
|
|
37
|
+
def scrambler_episode() -> EpisodeResult:
|
|
38
|
+
"""Scrambler needs miners to fund gear + hearts."""
|
|
39
|
+
return run_planky_episode(
|
|
40
|
+
policy_uri="metta://policy/planky?miner=4&aligner=0&scrambler=1&trace=1&trace_level=2&trace_agent=4",
|
|
41
|
+
mission=MISSION,
|
|
42
|
+
steps=DEFAULT_STEPS,
|
|
43
|
+
seed=DEFAULT_SEED,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.fixture(scope="module")
|
|
48
|
+
def scout_episode() -> EpisodeResult:
|
|
49
|
+
"""Scout needs miners to fund gear."""
|
|
50
|
+
return run_planky_episode(
|
|
51
|
+
policy_uri="metta://policy/planky?miner=4&aligner=0&scout=1&trace=1&trace_level=2&trace_agent=4",
|
|
52
|
+
mission=MISSION,
|
|
53
|
+
steps=DEFAULT_STEPS,
|
|
54
|
+
seed=DEFAULT_SEED,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@pytest.fixture(scope="module")
|
|
59
|
+
def stem_episode() -> EpisodeResult:
|
|
60
|
+
"""Full stem=5 run — tests dynamic role selection + pipeline."""
|
|
61
|
+
return run_planky_episode(
|
|
62
|
+
policy_uri="metta://policy/planky?stem=5&trace=1&trace_level=2",
|
|
63
|
+
mission=MISSION,
|
|
64
|
+
steps=DEFAULT_STEPS,
|
|
65
|
+
seed=DEFAULT_SEED,
|
|
66
|
+
)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Shared helpers for Planky capability tests."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from metta_alo.rollout import run_single_episode
|
|
12
|
+
|
|
13
|
+
from cogames.cli.mission import get_mission
|
|
14
|
+
from mettagrid.util.uri_resolvers.schemes import policy_spec_from_uri
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class PlayTrace:
|
|
19
|
+
"""Parsed trace output from a Planky episode."""
|
|
20
|
+
|
|
21
|
+
lines: list[str] = field(default_factory=list)
|
|
22
|
+
goal_activations: list[str] = field(default_factory=list)
|
|
23
|
+
role_changes: list[str] = field(default_factory=list)
|
|
24
|
+
idle_steps: int = 0
|
|
25
|
+
|
|
26
|
+
def had_goal(self, name: str) -> bool:
|
|
27
|
+
return any(name in line for line in self.goal_activations)
|
|
28
|
+
|
|
29
|
+
def summary(self, max_lines: int = 50) -> str:
|
|
30
|
+
parts = []
|
|
31
|
+
if self.role_changes:
|
|
32
|
+
parts.append("Role changes: " + ", ".join(self.role_changes))
|
|
33
|
+
if self.goal_activations:
|
|
34
|
+
parts.append(f"Goal activations ({len(self.goal_activations)} total):")
|
|
35
|
+
show = (
|
|
36
|
+
self.goal_activations[:10]
|
|
37
|
+
+ (["..."] if len(self.goal_activations) > 20 else [])
|
|
38
|
+
+ self.goal_activations[-10:]
|
|
39
|
+
if len(self.goal_activations) > 20
|
|
40
|
+
else self.goal_activations
|
|
41
|
+
)
|
|
42
|
+
for line in show:
|
|
43
|
+
parts.append(f" {line}")
|
|
44
|
+
if self.idle_steps > 0:
|
|
45
|
+
parts.append(f"Max idle steps: {self.idle_steps}")
|
|
46
|
+
return "\n".join(parts)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _parse_trace(output: str) -> PlayTrace:
|
|
50
|
+
"""Parse [planky] trace lines from captured stdout."""
|
|
51
|
+
trace = PlayTrace()
|
|
52
|
+
for line in output.splitlines():
|
|
53
|
+
if "[planky]" not in line:
|
|
54
|
+
continue
|
|
55
|
+
trace.lines.append(line)
|
|
56
|
+
|
|
57
|
+
if "\u2192" in line:
|
|
58
|
+
trace.goal_activations.append(line.strip())
|
|
59
|
+
|
|
60
|
+
if "role:" in line:
|
|
61
|
+
trace.role_changes.append(line.strip())
|
|
62
|
+
|
|
63
|
+
idle_match = re.search(r"IDLE=(\d+)", line)
|
|
64
|
+
if idle_match:
|
|
65
|
+
trace.idle_steps = max(trace.idle_steps, int(idle_match.group(1)))
|
|
66
|
+
|
|
67
|
+
return trace
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class EpisodeResult:
|
|
72
|
+
"""Combined stats + trace from a Planky episode."""
|
|
73
|
+
|
|
74
|
+
rewards: list[float]
|
|
75
|
+
steps: int
|
|
76
|
+
agent_stats: dict[str, float] # Aggregated across all agents
|
|
77
|
+
cogs_stats: dict[str, float]
|
|
78
|
+
clips_stats: dict[str, float]
|
|
79
|
+
trace: PlayTrace
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def total_reward(self) -> float:
|
|
83
|
+
return sum(self.rewards)
|
|
84
|
+
|
|
85
|
+
def gear_gained(self, gear: str) -> int:
|
|
86
|
+
return int(self.agent_stats.get(f"{gear}.gained", 0))
|
|
87
|
+
|
|
88
|
+
def resource_deposited(self, resource: str) -> int:
|
|
89
|
+
return int(self.cogs_stats.get(f"collective.{resource}.deposited", 0))
|
|
90
|
+
|
|
91
|
+
def total_deposited(self) -> int:
|
|
92
|
+
return sum(self.resource_deposited(r) for r in ["carbon", "oxygen", "germanium", "silicon"])
|
|
93
|
+
|
|
94
|
+
def junctions_aligned(self) -> int:
|
|
95
|
+
return int(self.cogs_stats.get("junction.gained", 0))
|
|
96
|
+
|
|
97
|
+
def hearts_gained(self) -> int:
|
|
98
|
+
return int(self.agent_stats.get("heart.gained", 0))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def run_planky_episode(
|
|
102
|
+
policy_uri: str,
|
|
103
|
+
mission: str = "cogsguard_machina_1.basic",
|
|
104
|
+
steps: Optional[int] = None,
|
|
105
|
+
seed: int = 42,
|
|
106
|
+
) -> EpisodeResult:
|
|
107
|
+
"""Run a single Planky episode and return structured results + trace."""
|
|
108
|
+
_name, env_cfg, _mission_obj = get_mission(mission_arg=mission)
|
|
109
|
+
|
|
110
|
+
if steps is not None:
|
|
111
|
+
env_cfg.game.max_steps = steps
|
|
112
|
+
|
|
113
|
+
policy_spec = policy_spec_from_uri(policy_uri, device="cpu")
|
|
114
|
+
num_agents = env_cfg.game.num_agents
|
|
115
|
+
|
|
116
|
+
# Capture stdout for trace output
|
|
117
|
+
captured = io.StringIO()
|
|
118
|
+
old_stdout = sys.stdout
|
|
119
|
+
sys.stdout = captured
|
|
120
|
+
try:
|
|
121
|
+
results, _replay = run_single_episode(
|
|
122
|
+
policy_specs=[policy_spec],
|
|
123
|
+
assignments=[0] * num_agents,
|
|
124
|
+
env=env_cfg,
|
|
125
|
+
seed=seed,
|
|
126
|
+
render_mode=None,
|
|
127
|
+
device="cpu",
|
|
128
|
+
)
|
|
129
|
+
finally:
|
|
130
|
+
sys.stdout = old_stdout
|
|
131
|
+
|
|
132
|
+
output = captured.getvalue()
|
|
133
|
+
trace = _parse_trace(output)
|
|
134
|
+
|
|
135
|
+
# Aggregate agent stats
|
|
136
|
+
agent_stats: dict[str, float] = {}
|
|
137
|
+
for agent in results.stats.get("agent", []):
|
|
138
|
+
for key, value in agent.items():
|
|
139
|
+
agent_stats[key] = agent_stats.get(key, 0) + value
|
|
140
|
+
|
|
141
|
+
collective = results.stats.get("collective", {})
|
|
142
|
+
cogs_stats = collective.get("cogs", {})
|
|
143
|
+
clips_stats = collective.get("clips", {})
|
|
144
|
+
|
|
145
|
+
return EpisodeResult(
|
|
146
|
+
rewards=list(results.rewards),
|
|
147
|
+
steps=results.steps,
|
|
148
|
+
agent_stats=agent_stats,
|
|
149
|
+
cogs_stats=cogs_stats,
|
|
150
|
+
clips_stats=clips_stats,
|
|
151
|
+
trace=trace,
|
|
152
|
+
)
|