cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. cogames_agents/__init__.py +0 -0
  2. cogames_agents/evals/__init__.py +5 -0
  3. cogames_agents/evals/planky_evals.py +415 -0
  4. cogames_agents/policy/__init__.py +0 -0
  5. cogames_agents/policy/evolution/__init__.py +0 -0
  6. cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
  7. cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
  8. cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
  9. cogames_agents/policy/nim_agents/__init__.py +20 -0
  10. cogames_agents/policy/nim_agents/agents.py +98 -0
  11. cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
  12. cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
  13. cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
  14. cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
  15. cogames_agents/policy/nim_agents/common.nim +1054 -0
  16. cogames_agents/policy/nim_agents/install.sh +1 -0
  17. cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
  18. cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
  19. cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
  20. cogames_agents/policy/nim_agents/nimby.lock +3 -0
  21. cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
  22. cogames_agents/policy/nim_agents/random_agents.nim +68 -0
  23. cogames_agents/policy/nim_agents/test_agents.py +53 -0
  24. cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
  25. cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
  26. cogames_agents/policy/scripted_agent/README.md +360 -0
  27. cogames_agents/policy/scripted_agent/__init__.py +0 -0
  28. cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
  29. cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
  30. cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
  31. cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
  32. cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
  33. cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
  34. cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
  35. cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
  36. cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
  37. cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
  38. cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
  39. cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
  40. cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
  41. cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
  42. cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
  43. cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
  44. cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
  45. cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
  46. cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
  47. cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
  48. cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
  49. cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
  50. cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
  51. cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
  52. cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
  53. cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
  54. cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
  55. cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
  56. cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
  57. cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
  58. cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
  59. cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
  60. cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
  61. cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
  62. cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
  63. cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
  64. cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
  65. cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
  66. cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
  67. cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
  68. cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
  69. cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
  70. cogames_agents/policy/scripted_agent/common/roles.py +34 -0
  71. cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
  72. cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
  73. cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
  74. cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
  75. cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
  76. cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
  77. cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
  78. cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
  79. cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
  80. cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
  81. cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
  82. cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
  83. cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
  84. cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
  85. cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
  86. cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
  87. cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
  88. cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
  89. cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
  90. cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
  91. cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
  92. cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
  93. cogames_agents/policy/scripted_agent/planky/README.md +214 -0
  94. cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
  95. cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
  96. cogames_agents/policy/scripted_agent/planky/context.py +68 -0
  97. cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
  98. cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
  99. cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
  100. cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
  101. cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
  102. cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
  103. cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
  104. cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
  105. cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
  106. cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
  107. cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
  108. cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
  109. cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
  110. cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
  111. cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
  112. cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
  113. cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
  114. cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
  115. cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
  116. cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
  117. cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
  118. cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
  119. cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
  120. cogames_agents/policy/scripted_agent/types.py +239 -0
  121. cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
  122. cogames_agents/policy/scripted_agent/utils.py +381 -0
  123. cogames_agents/policy/scripted_registry.py +80 -0
  124. cogames_agents/py.typed +0 -0
  125. cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
  126. cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
  127. cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
  128. cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,485 @@
1
+ """
2
+ Planky Policy — goal-tree scripted agent.
3
+
4
+ PlankyBrain coordinates per-agent state and goal evaluation.
5
+ PlankyPolicy is the multi-agent wrapper with URI-based role distribution.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+ from mettagrid.mettagrid_c import dtype_actions
15
+ from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
16
+ from mettagrid.policy.policy_env_interface import PolicyEnvInterface
17
+ from mettagrid.simulator import Action, ObservationToken
18
+ from mettagrid.simulator.interface import AgentObservation
19
+
20
+ from .context import PlankyContext, StateSnapshot
21
+ from .entity_map import EntityMap
22
+ from .goal import Goal, evaluate_goals
23
+ from .goals.aligner import AlignJunctionGoal, GetAlignerGearGoal
24
+ from .goals.miner import DepositCargoGoal, ExploreHubGoal, GetMinerGearGoal, MineResourceGoal, PickResourceGoal
25
+ from .goals.scout import ExploreGoal, GetScoutGearGoal
26
+ from .goals.scrambler import GetScramblerGearGoal, ScrambleJunctionGoal
27
+ from .goals.shared import FallbackMineGoal, GetHeartsGoal
28
+ from .goals.stem import SelectRoleGoal
29
+ from .goals.survive import SurviveGoal
30
+ from .navigator import Navigator
31
+ from .obs_parser import ObsParser
32
+ from .trace import TraceLog
33
+
34
+ # Role vibes that map to roles
35
+ VIBE_TO_ROLE = {"miner", "scout", "aligner", "scrambler"}
36
+
37
+ # Default spawn position (center of 200x200 grid)
38
+ SPAWN_POS = (100, 100)
39
+
40
+
41
+ def _make_goal_list(role: str) -> list[Goal]:
42
+ """Create goal list for a role."""
43
+ if role == "miner":
44
+ return [
45
+ SurviveGoal(hp_threshold=15),
46
+ ExploreHubGoal(),
47
+ GetMinerGearGoal(),
48
+ PickResourceGoal(),
49
+ DepositCargoGoal(),
50
+ MineResourceGoal(),
51
+ ]
52
+ elif role == "scout":
53
+ return [
54
+ SurviveGoal(hp_threshold=50),
55
+ GetScoutGearGoal(),
56
+ ExploreGoal(),
57
+ ]
58
+ elif role == "aligner":
59
+ # Aligners NEED gear + heart to align junctions.
60
+ # Hearts require gear first — don't waste resources on hearts without gear.
61
+ # FallbackMine at end: mine resources when can't get gear/hearts.
62
+ return [
63
+ SurviveGoal(hp_threshold=50),
64
+ GetAlignerGearGoal(),
65
+ GetHeartsGoal(),
66
+ AlignJunctionGoal(),
67
+ FallbackMineGoal(),
68
+ ]
69
+ elif role == "scrambler":
70
+ # Scramblers NEED gear + heart to scramble junctions.
71
+ # FallbackMine at end: mine resources when can't get gear/hearts.
72
+ return [
73
+ SurviveGoal(hp_threshold=30),
74
+ GetScramblerGearGoal(),
75
+ GetHeartsGoal(),
76
+ ScrambleJunctionGoal(),
77
+ FallbackMineGoal(),
78
+ ]
79
+ elif role == "stem":
80
+ return [
81
+ SurviveGoal(hp_threshold=20),
82
+ SelectRoleGoal(),
83
+ ]
84
+ else:
85
+ # Default/inactive
86
+ return []
87
+
88
+
89
+ class PlankyAgentState:
90
+ """Persistent state for a Planky agent across ticks."""
91
+
92
+ def __init__(self, agent_id: int, role: str, goals: list[Goal]) -> None:
93
+ self.agent_id = agent_id
94
+ self.role = role
95
+ self.goals = goals
96
+ self.entity_map = EntityMap()
97
+ self.navigator = Navigator()
98
+ self.blackboard: dict[str, Any] = {}
99
+ self.step = 0
100
+ self.my_collective_id: int | None = None
101
+
102
+
103
+ class PlankyBrain(StatefulPolicyImpl[PlankyAgentState]):
104
+ """Per-agent coordinator that owns state and evaluates the goal tree."""
105
+
106
+ def __init__(
107
+ self,
108
+ policy_env_info: PolicyEnvInterface,
109
+ agent_id: int,
110
+ role: str,
111
+ trace_enabled: bool = False,
112
+ trace_level: int = 1,
113
+ trace_agent: int = -1,
114
+ ) -> None:
115
+ self._agent_id = agent_id
116
+ self._policy_env_info = policy_env_info
117
+ self._role = role
118
+ self._obs_parser = ObsParser(policy_env_info)
119
+ self._action_names = policy_env_info.action_names
120
+
121
+ # Tracing
122
+ self._trace_enabled = trace_enabled
123
+ self._trace_level = trace_level
124
+ self._trace_agent = trace_agent # -1 = trace all
125
+
126
+ def initial_agent_state(self) -> PlankyAgentState:
127
+ goals = _make_goal_list(self._role)
128
+ return PlankyAgentState(
129
+ agent_id=self._agent_id,
130
+ role=self._role,
131
+ goals=goals,
132
+ )
133
+
134
+ def step_with_state(self, obs: AgentObservation, agent_state: PlankyAgentState) -> tuple[Action, PlankyAgentState]:
135
+ agent_state.step += 1
136
+
137
+ # Parse observation
138
+ state, visible_entities = self._obs_parser.parse(obs, agent_state.step, SPAWN_POS)
139
+
140
+ # Update entity map
141
+ agent_state.entity_map.update_from_observation(
142
+ agent_pos=state.position,
143
+ obs_half_height=self._obs_parser.obs_half_height,
144
+ obs_half_width=self._obs_parser.obs_half_width,
145
+ visible_entities=visible_entities,
146
+ step=agent_state.step,
147
+ )
148
+
149
+ # Detect own collective_id from nearest hub (once)
150
+ if agent_state.my_collective_id is None:
151
+ hub = agent_state.entity_map.find_nearest(state.position, type_contains="hub")
152
+ if hub is not None:
153
+ _, hub_entity = hub
154
+ cid = hub_entity.properties.get("collective_id")
155
+ if cid is not None:
156
+ agent_state.my_collective_id = cid
157
+
158
+ # Detect useful actions by comparing state changes
159
+ # Useful = mined resources, deposited to collective, aligned/scrambled junction
160
+ self._detect_useful_action(state, agent_state)
161
+
162
+ # Detect failed moves: if last action was a move but position didn't change
163
+ last_pos = agent_state.blackboard.get("_last_pos")
164
+ last_action = agent_state.blackboard.get("_last_action", "")
165
+ if last_pos is not None and last_action.startswith("move_") and state.position == last_pos:
166
+ # Move failed - track consecutive failures
167
+ fail_count = agent_state.blackboard.get("_move_fail_count", 0) + 1
168
+ agent_state.blackboard["_move_fail_count"] = fail_count
169
+
170
+ # After 3 consecutive failed moves, clear navigation cache and targets
171
+ if fail_count >= 3:
172
+ agent_state.navigator._cached_path = None
173
+ agent_state.navigator._cached_target = None
174
+ # Clear any target resource selection to force re-evaluation
175
+ if fail_count >= 6:
176
+ agent_state.blackboard.pop("target_resource", None)
177
+ agent_state.blackboard["_move_fail_count"] = 0
178
+ else:
179
+ agent_state.blackboard["_move_fail_count"] = 0
180
+
181
+ agent_state.blackboard["_last_pos"] = state.position
182
+
183
+ # Vibe-driven role system: agent's role IS their vibe
184
+ # "default" → set initial role vibe
185
+ # "gear" → stem mode (role selection)
186
+ # any valid role → run that role's goals
187
+
188
+ # Check if goals want to change role (via blackboard)
189
+ if "change_role" in agent_state.blackboard:
190
+ new_role = agent_state.blackboard.pop("change_role")
191
+ if new_role in VIBE_TO_ROLE:
192
+ return Action(name=f"change_vibe_{new_role}"), agent_state
193
+
194
+ # Map vibe to role
195
+ current_vibe = state.vibe
196
+ if current_vibe == "default":
197
+ if self._role in VIBE_TO_ROLE:
198
+ # Non-stem agent: set initial role vibe
199
+ return Action(name=f"change_vibe_{self._role}"), agent_state
200
+ else:
201
+ # Stem agent: default vibe = stem mode
202
+ effective_role = "stem"
203
+ elif current_vibe == "gear":
204
+ # Gear vibe = stem mode (role selection)
205
+ effective_role = "stem"
206
+ elif current_vibe in VIBE_TO_ROLE:
207
+ effective_role = current_vibe
208
+ else:
209
+ if self._role in VIBE_TO_ROLE:
210
+ return Action(name=f"change_vibe_{self._role}"), agent_state
211
+ effective_role = "stem"
212
+
213
+ # Update goals if role changed
214
+ if effective_role != agent_state.role:
215
+ if self._should_trace(agent_state):
216
+ print(f"[planky][t={agent_state.step} a={self._agent_id}] role: {agent_state.role}→{effective_role}")
217
+ agent_state.role = effective_role
218
+ agent_state.goals = _make_goal_list(effective_role)
219
+
220
+ # Build context
221
+ should_trace = self._should_trace(agent_state)
222
+ trace = TraceLog() if should_trace else None
223
+
224
+ # Calculate steps since last useful action
225
+ last_useful = agent_state.blackboard.get("_last_useful_step", 0)
226
+ steps_since_useful = agent_state.step - last_useful
227
+ if trace:
228
+ trace.steps_since_useful = steps_since_useful
229
+
230
+ # If we've been idle too long (100+ steps), force a reset of cached state
231
+ # This helps break out of stuck loops
232
+ if steps_since_useful >= 100 and steps_since_useful % 50 == 0:
233
+ # Clear cached navigation and target selections
234
+ agent_state.navigator._cached_path = None
235
+ agent_state.navigator._cached_target = None
236
+ agent_state.blackboard.pop("target_resource", None)
237
+ if trace:
238
+ trace.activate("IdleReset", f"clearing cache after {steps_since_useful} idle steps")
239
+
240
+ ctx = PlankyContext(
241
+ state=state,
242
+ map=agent_state.entity_map,
243
+ blackboard=agent_state.blackboard,
244
+ navigator=agent_state.navigator,
245
+ trace=trace,
246
+ action_names=self._action_names,
247
+ agent_id=self._agent_id,
248
+ step=agent_state.step,
249
+ my_collective_id=agent_state.my_collective_id,
250
+ )
251
+
252
+ # If we're stuck (many failed moves), force exploration to discover terrain
253
+ fail_count = agent_state.blackboard.get("_move_fail_count", 0)
254
+ if fail_count >= 6:
255
+ action = agent_state.navigator.explore(
256
+ state.position,
257
+ agent_state.entity_map,
258
+ direction_bias=["north", "east", "south", "west"][self._agent_id % 4],
259
+ )
260
+ if trace:
261
+ trace.active_goal_chain = f"ForceExplore(stuck={fail_count})"
262
+ trace.action_name = action.name
263
+ else:
264
+ # Evaluate goals normally
265
+ action = evaluate_goals(agent_state.goals, ctx)
266
+
267
+ # DEBUG: dump all entity types for agent 0
268
+ if self._agent_id == 0 and agent_state.step == 25:
269
+ all_types = {}
270
+ for p, e in agent_state.entity_map.entities.items():
271
+ t = e.type
272
+ if t not in all_types:
273
+ all_types[t] = []
274
+ all_types[t].append((p, e.properties.get("alignment")))
275
+ for t, entries in sorted(all_types.items()):
276
+ print(f"[planky-debug] type={t}: {entries[:5]}")
277
+
278
+ # Emit trace
279
+ if trace:
280
+ line = trace.format_line(
281
+ step=agent_state.step,
282
+ agent_id=self._agent_id,
283
+ role=agent_state.role,
284
+ pos=state.position,
285
+ hp=state.hp,
286
+ level=self._trace_level,
287
+ )
288
+ print(f"[planky] {line}")
289
+ # Log collective resources and entity map info
290
+ if agent_state.step % 25 == 0 or agent_state.step == 3:
291
+ print(
292
+ f"[planky][t={agent_state.step} a={self._agent_id}] "
293
+ f"collective: C={state.collective_carbon} O={state.collective_oxygen} "
294
+ f"G={state.collective_germanium} S={state.collective_silicon} "
295
+ f"cargo={state.cargo_total}/{state.cargo_capacity} "
296
+ f"energy={state.energy}"
297
+ )
298
+
299
+ # Track action for failed-move detection
300
+ agent_state.blackboard["_last_action"] = action.name
301
+
302
+ return action, agent_state
303
+
304
+ def _should_trace(self, agent_state: PlankyAgentState) -> bool:
305
+ if not self._trace_enabled:
306
+ return False
307
+ if self._trace_agent >= 0 and self._agent_id != self._trace_agent:
308
+ return False
309
+ return True
310
+
311
+ def _detect_useful_action(self, state: StateSnapshot, agent_state: PlankyAgentState) -> None:
312
+ """Detect if a useful action occurred by comparing state changes.
313
+
314
+ Useful actions:
315
+ - Mine: cargo increased
316
+ - Deposit: cargo decreased AND collective increased
317
+ - Align/Scramble: heart decreased (spent on junction action)
318
+ - Got gear: gear flag changed
319
+ - Got heart: heart count increased
320
+ """
321
+ bb = agent_state.blackboard
322
+
323
+ # Get previous state values
324
+ prev_cargo = bb.get("_prev_cargo", 0)
325
+ prev_heart = bb.get("_prev_heart", 0)
326
+ prev_collective_total = bb.get("_prev_collective_total", 0)
327
+
328
+ # Calculate current values
329
+ current_cargo = state.cargo_total
330
+ current_heart = state.heart
331
+ current_collective = (
332
+ state.collective_carbon + state.collective_oxygen + state.collective_germanium + state.collective_silicon
333
+ )
334
+
335
+ # Detect useful actions
336
+ useful = False
337
+
338
+ # Mined resources (cargo increased)
339
+ if current_cargo > prev_cargo:
340
+ useful = True
341
+
342
+ # Deposited resources (cargo decreased, collective increased)
343
+ if current_cargo < prev_cargo and current_collective > prev_collective_total:
344
+ useful = True
345
+
346
+ # Got a heart (heart increased)
347
+ if current_heart > prev_heart:
348
+ useful = True
349
+
350
+ # Spent a heart on align/scramble (heart decreased)
351
+ if current_heart < prev_heart:
352
+ useful = True
353
+
354
+ # Update tracking
355
+ if useful:
356
+ bb["_last_useful_step"] = agent_state.step
357
+
358
+ # Store current values for next tick comparison
359
+ bb["_prev_cargo"] = current_cargo
360
+ bb["_prev_heart"] = current_heart
361
+ bb["_prev_collective_total"] = current_collective
362
+
363
+
364
+ class PlankyPolicy(MultiAgentPolicy):
365
+ """Multi-agent goal-tree policy with URI-based role distribution.
366
+
367
+ URI parameters:
368
+ ?miner=4&scout=0&aligner=2&scrambler=4 — role counts
369
+ ?trace=1&trace_level=2&trace_agent=0 — tracing
370
+ """
371
+
372
+ short_names = ["planky"]
373
+
374
+ def __init__(
375
+ self,
376
+ policy_env_info: PolicyEnvInterface,
377
+ device: str = "cpu",
378
+ # Role counts — if stem > 0, defaults to all-stem unless explicit roles given
379
+ miner: int = -1,
380
+ scout: int = 0,
381
+ aligner: int = -1,
382
+ scrambler: int = -1,
383
+ stem: int = 0,
384
+ # Tracing
385
+ trace: int = 0,
386
+ trace_level: int = 1,
387
+ trace_agent: int = -1,
388
+ # Accept any extra kwargs
389
+ **kwargs: object,
390
+ ) -> None:
391
+ super().__init__(policy_env_info, device=device)
392
+ self._feature_by_id = {f.id: f for f in policy_env_info.obs_features}
393
+ self._action_name_to_index = {name: idx for idx, name in enumerate(policy_env_info.action_names)}
394
+ print(f"[planky] Action names: {list(policy_env_info.action_names)}")
395
+ self._noop_action_value = dtype_actions.type(self._action_name_to_index.get("noop", 0))
396
+
397
+ # Tracing
398
+ self._trace_enabled = bool(trace)
399
+ self._trace_level = trace_level
400
+ self._trace_agent = trace_agent
401
+
402
+ # Resolve defaults: if stem > 0 and miner/aligner/scrambler not explicitly set, zero them
403
+ if stem > 0:
404
+ if miner == -1:
405
+ miner = 0
406
+ if aligner == -1:
407
+ aligner = 0
408
+ if scrambler == -1:
409
+ scrambler = 0
410
+ else:
411
+ if miner == -1:
412
+ miner = 3
413
+ if aligner == -1:
414
+ aligner = 5
415
+ if scrambler == -1:
416
+ scrambler = 0
417
+
418
+ # Build per-team role distribution
419
+ team_roles: list[str] = []
420
+ team_roles.extend(["miner"] * miner)
421
+ team_roles.extend(["scout"] * scout)
422
+ team_roles.extend(["aligner"] * aligner)
423
+ team_roles.extend(["scrambler"] * scrambler)
424
+ team_roles.extend(["stem"] * stem)
425
+
426
+ # Tile the role distribution to cover all agents (supports multi-team setups).
427
+ num_agents = policy_env_info.num_agents
428
+ team_size = len(team_roles) if team_roles else 1
429
+ num_teams = max(1, (num_agents + team_size - 1) // team_size)
430
+ self._role_distribution: list[str] = (team_roles * num_teams)[:num_agents]
431
+
432
+ if self._trace_enabled:
433
+ print(f"[planky] Role distribution ({num_teams} teams): {self._role_distribution}")
434
+
435
+ self._agent_policies: dict[int, StatefulAgentPolicy[PlankyAgentState]] = {}
436
+
437
+ def agent_policy(self, agent_id: int) -> StatefulAgentPolicy[PlankyAgentState]:
438
+ if agent_id not in self._agent_policies:
439
+ role = self._role_distribution[agent_id] if agent_id < len(self._role_distribution) else "default"
440
+
441
+ brain = PlankyBrain(
442
+ policy_env_info=self._policy_env_info,
443
+ agent_id=agent_id,
444
+ role=role,
445
+ trace_enabled=self._trace_enabled,
446
+ trace_level=self._trace_level,
447
+ trace_agent=self._trace_agent,
448
+ )
449
+
450
+ self._agent_policies[agent_id] = StatefulAgentPolicy(
451
+ brain,
452
+ self._policy_env_info,
453
+ agent_id=agent_id,
454
+ )
455
+
456
+ return self._agent_policies[agent_id]
457
+
458
+ def step_batch(self, raw_observations: np.ndarray, raw_actions: np.ndarray) -> None:
459
+ raw_actions[...] = self._noop_action_value
460
+ num_agents = min(raw_observations.shape[0], self._policy_env_info.num_agents)
461
+ for agent_id in range(num_agents):
462
+ obs = self._raw_obs_to_agent_obs(agent_id, raw_observations[agent_id])
463
+ action = self.agent_policy(agent_id).step(obs)
464
+ action_index = self._action_name_to_index.get(action.name, 0)
465
+ raw_actions[agent_id] = dtype_actions.type(action_index)
466
+
467
+ def _raw_obs_to_agent_obs(self, agent_id: int, raw_obs: np.ndarray) -> AgentObservation:
468
+ tokens: list[ObservationToken] = []
469
+ for token in raw_obs:
470
+ feature_id = int(token[1])
471
+ if feature_id == 0xFF:
472
+ break
473
+ feature = self._feature_by_id.get(feature_id)
474
+ if feature is None:
475
+ continue
476
+ location_packed = int(token[0])
477
+ value = int(token[2])
478
+ tokens.append(
479
+ ObservationToken(
480
+ feature=feature,
481
+ value=value,
482
+ raw_token=(location_packed, feature_id, value),
483
+ )
484
+ )
485
+ return AgentObservation(agent_id=agent_id, tokens=tokens)
@@ -0,0 +1,66 @@
1
+ """Shared fixtures for Planky capability tests."""
2
+
3
+ import pytest
4
+
5
+ from cogames_agents.policy.scripted_agent.planky.tests.helpers import (
6
+ EpisodeResult,
7
+ run_planky_episode,
8
+ )
9
+
10
+ MISSION = "cogsguard_machina_1.basic"
11
+ DEFAULT_STEPS = 500
12
+ DEFAULT_SEED = 42
13
+
14
+
15
+ @pytest.fixture(scope="module")
16
+ def miner_episode() -> EpisodeResult:
17
+ return run_planky_episode(
18
+ policy_uri="metta://policy/planky?miner=1&aligner=0&trace=1&trace_level=2&trace_agent=0",
19
+ mission=MISSION,
20
+ steps=DEFAULT_STEPS,
21
+ seed=DEFAULT_SEED,
22
+ )
23
+
24
+
25
+ @pytest.fixture(scope="module")
26
+ def aligner_episode() -> EpisodeResult:
27
+ """Aligner needs miners to fund gear + hearts."""
28
+ return run_planky_episode(
29
+ policy_uri="metta://policy/planky?miner=4&aligner=1&scrambler=0&trace=1&trace_level=2&trace_agent=4",
30
+ mission=MISSION,
31
+ steps=DEFAULT_STEPS,
32
+ seed=DEFAULT_SEED,
33
+ )
34
+
35
+
36
+ @pytest.fixture(scope="module")
37
+ def scrambler_episode() -> EpisodeResult:
38
+ """Scrambler needs miners to fund gear + hearts."""
39
+ return run_planky_episode(
40
+ policy_uri="metta://policy/planky?miner=4&aligner=0&scrambler=1&trace=1&trace_level=2&trace_agent=4",
41
+ mission=MISSION,
42
+ steps=DEFAULT_STEPS,
43
+ seed=DEFAULT_SEED,
44
+ )
45
+
46
+
47
+ @pytest.fixture(scope="module")
48
+ def scout_episode() -> EpisodeResult:
49
+ """Scout needs miners to fund gear."""
50
+ return run_planky_episode(
51
+ policy_uri="metta://policy/planky?miner=4&aligner=0&scout=1&trace=1&trace_level=2&trace_agent=4",
52
+ mission=MISSION,
53
+ steps=DEFAULT_STEPS,
54
+ seed=DEFAULT_SEED,
55
+ )
56
+
57
+
58
+ @pytest.fixture(scope="module")
59
+ def stem_episode() -> EpisodeResult:
60
+ """Full stem=5 run — tests dynamic role selection + pipeline."""
61
+ return run_planky_episode(
62
+ policy_uri="metta://policy/planky?stem=5&trace=1&trace_level=2",
63
+ mission=MISSION,
64
+ steps=DEFAULT_STEPS,
65
+ seed=DEFAULT_SEED,
66
+ )
@@ -0,0 +1,152 @@
1
+ """Shared helpers for Planky capability tests."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import re
7
+ import sys
8
+ from dataclasses import dataclass, field
9
+ from typing import Optional
10
+
11
+ from metta_alo.rollout import run_single_episode
12
+
13
+ from cogames.cli.mission import get_mission
14
+ from mettagrid.util.uri_resolvers.schemes import policy_spec_from_uri
15
+
16
+
17
+ @dataclass
18
+ class PlayTrace:
19
+ """Parsed trace output from a Planky episode."""
20
+
21
+ lines: list[str] = field(default_factory=list)
22
+ goal_activations: list[str] = field(default_factory=list)
23
+ role_changes: list[str] = field(default_factory=list)
24
+ idle_steps: int = 0
25
+
26
+ def had_goal(self, name: str) -> bool:
27
+ return any(name in line for line in self.goal_activations)
28
+
29
+ def summary(self, max_lines: int = 50) -> str:
30
+ parts = []
31
+ if self.role_changes:
32
+ parts.append("Role changes: " + ", ".join(self.role_changes))
33
+ if self.goal_activations:
34
+ parts.append(f"Goal activations ({len(self.goal_activations)} total):")
35
+ show = (
36
+ self.goal_activations[:10]
37
+ + (["..."] if len(self.goal_activations) > 20 else [])
38
+ + self.goal_activations[-10:]
39
+ if len(self.goal_activations) > 20
40
+ else self.goal_activations
41
+ )
42
+ for line in show:
43
+ parts.append(f" {line}")
44
+ if self.idle_steps > 0:
45
+ parts.append(f"Max idle steps: {self.idle_steps}")
46
+ return "\n".join(parts)
47
+
48
+
49
+ def _parse_trace(output: str) -> PlayTrace:
50
+ """Parse [planky] trace lines from captured stdout."""
51
+ trace = PlayTrace()
52
+ for line in output.splitlines():
53
+ if "[planky]" not in line:
54
+ continue
55
+ trace.lines.append(line)
56
+
57
+ if "\u2192" in line:
58
+ trace.goal_activations.append(line.strip())
59
+
60
+ if "role:" in line:
61
+ trace.role_changes.append(line.strip())
62
+
63
+ idle_match = re.search(r"IDLE=(\d+)", line)
64
+ if idle_match:
65
+ trace.idle_steps = max(trace.idle_steps, int(idle_match.group(1)))
66
+
67
+ return trace
68
+
69
+
70
+ @dataclass
71
+ class EpisodeResult:
72
+ """Combined stats + trace from a Planky episode."""
73
+
74
+ rewards: list[float]
75
+ steps: int
76
+ agent_stats: dict[str, float] # Aggregated across all agents
77
+ cogs_stats: dict[str, float]
78
+ clips_stats: dict[str, float]
79
+ trace: PlayTrace
80
+
81
+ @property
82
+ def total_reward(self) -> float:
83
+ return sum(self.rewards)
84
+
85
+ def gear_gained(self, gear: str) -> int:
86
+ return int(self.agent_stats.get(f"{gear}.gained", 0))
87
+
88
+ def resource_deposited(self, resource: str) -> int:
89
+ return int(self.cogs_stats.get(f"collective.{resource}.deposited", 0))
90
+
91
+ def total_deposited(self) -> int:
92
+ return sum(self.resource_deposited(r) for r in ["carbon", "oxygen", "germanium", "silicon"])
93
+
94
+ def junctions_aligned(self) -> int:
95
+ return int(self.cogs_stats.get("junction.gained", 0))
96
+
97
+ def hearts_gained(self) -> int:
98
+ return int(self.agent_stats.get("heart.gained", 0))
99
+
100
+
101
+ def run_planky_episode(
102
+ policy_uri: str,
103
+ mission: str = "cogsguard_machina_1.basic",
104
+ steps: Optional[int] = None,
105
+ seed: int = 42,
106
+ ) -> EpisodeResult:
107
+ """Run a single Planky episode and return structured results + trace."""
108
+ _name, env_cfg, _mission_obj = get_mission(mission_arg=mission)
109
+
110
+ if steps is not None:
111
+ env_cfg.game.max_steps = steps
112
+
113
+ policy_spec = policy_spec_from_uri(policy_uri, device="cpu")
114
+ num_agents = env_cfg.game.num_agents
115
+
116
+ # Capture stdout for trace output
117
+ captured = io.StringIO()
118
+ old_stdout = sys.stdout
119
+ sys.stdout = captured
120
+ try:
121
+ results, _replay = run_single_episode(
122
+ policy_specs=[policy_spec],
123
+ assignments=[0] * num_agents,
124
+ env=env_cfg,
125
+ seed=seed,
126
+ render_mode=None,
127
+ device="cpu",
128
+ )
129
+ finally:
130
+ sys.stdout = old_stdout
131
+
132
+ output = captured.getvalue()
133
+ trace = _parse_trace(output)
134
+
135
+ # Aggregate agent stats
136
+ agent_stats: dict[str, float] = {}
137
+ for agent in results.stats.get("agent", []):
138
+ for key, value in agent.items():
139
+ agent_stats[key] = agent_stats.get(key, 0) + value
140
+
141
+ collective = results.stats.get("collective", {})
142
+ cogs_stats = collective.get("cogs", {})
143
+ clips_stats = collective.get("clips", {})
144
+
145
+ return EpisodeResult(
146
+ rewards=list(results.rewards),
147
+ steps=results.steps,
148
+ agent_stats=agent_stats,
149
+ cogs_stats=cogs_stats,
150
+ clips_stats=clips_stats,
151
+ trace=trace,
152
+ )