cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. cogames_agents/__init__.py +0 -0
  2. cogames_agents/evals/__init__.py +5 -0
  3. cogames_agents/evals/planky_evals.py +415 -0
  4. cogames_agents/policy/__init__.py +0 -0
  5. cogames_agents/policy/evolution/__init__.py +0 -0
  6. cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
  7. cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
  8. cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
  9. cogames_agents/policy/nim_agents/__init__.py +20 -0
  10. cogames_agents/policy/nim_agents/agents.py +98 -0
  11. cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
  12. cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
  13. cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
  14. cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
  15. cogames_agents/policy/nim_agents/common.nim +1054 -0
  16. cogames_agents/policy/nim_agents/install.sh +1 -0
  17. cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
  18. cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
  19. cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
  20. cogames_agents/policy/nim_agents/nimby.lock +3 -0
  21. cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
  22. cogames_agents/policy/nim_agents/random_agents.nim +68 -0
  23. cogames_agents/policy/nim_agents/test_agents.py +53 -0
  24. cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
  25. cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
  26. cogames_agents/policy/scripted_agent/README.md +360 -0
  27. cogames_agents/policy/scripted_agent/__init__.py +0 -0
  28. cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
  29. cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
  30. cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
  31. cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
  32. cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
  33. cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
  34. cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
  35. cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
  36. cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
  37. cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
  38. cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
  39. cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
  40. cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
  41. cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
  42. cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
  43. cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
  44. cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
  45. cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
  46. cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
  47. cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
  48. cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
  49. cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
  50. cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
  51. cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
  52. cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
  53. cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
  54. cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
  55. cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
  56. cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
  57. cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
  58. cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
  59. cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
  60. cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
  61. cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
  62. cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
  63. cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
  64. cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
  65. cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
  66. cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
  67. cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
  68. cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
  69. cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
  70. cogames_agents/policy/scripted_agent/common/roles.py +34 -0
  71. cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
  72. cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
  73. cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
  74. cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
  75. cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
  76. cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
  77. cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
  78. cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
  79. cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
  80. cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
  81. cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
  82. cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
  83. cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
  84. cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
  85. cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
  86. cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
  87. cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
  88. cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
  89. cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
  90. cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
  91. cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
  92. cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
  93. cogames_agents/policy/scripted_agent/planky/README.md +214 -0
  94. cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
  95. cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
  96. cogames_agents/policy/scripted_agent/planky/context.py +68 -0
  97. cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
  98. cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
  99. cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
  100. cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
  101. cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
  102. cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
  103. cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
  104. cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
  105. cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
  106. cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
  107. cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
  108. cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
  109. cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
  110. cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
  111. cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
  112. cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
  113. cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
  114. cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
  115. cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
  116. cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
  117. cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
  118. cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
  119. cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
  120. cogames_agents/policy/scripted_agent/types.py +239 -0
  121. cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
  122. cogames_agents/policy/scripted_agent/utils.py +381 -0
  123. cogames_agents/policy/scripted_registry.py +80 -0
  124. cogames_agents/py.typed +0 -0
  125. cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
  126. cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
  127. cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
  128. cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,525 @@
1
+ """
2
+ Cogas Policy — goal-tree scripted agent.
3
+
4
+ CogasBrain coordinates per-agent state and goal evaluation.
5
+ CogasPolicy is the multi-agent wrapper with URI-based role distribution.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+ from mettagrid.mettagrid_c import dtype_actions
15
+ from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
16
+ from mettagrid.policy.policy_env_interface import PolicyEnvInterface
17
+ from mettagrid.simulator import Action, ObservationToken
18
+ from mettagrid.simulator.interface import AgentObservation
19
+
20
+ from .context import CogasContext, StateSnapshot
21
+ from .entity_map import EntityMap
22
+ from .goal import Goal, evaluate_goals
23
+ from .goals.aligner import AlignJunctionGoal, GetAlignerGearGoal
24
+ from .goals.miner import DepositCargoGoal, ExploreHubGoal, GetMinerGearGoal, MineResourceGoal, PickResourceGoal
25
+ from .goals.scout import ExploreGoal, GetScoutGearGoal
26
+ from .goals.scrambler import GetScramblerGearGoal, ScrambleJunctionGoal
27
+ from .goals.shared import FallbackMineGoal, GetHeartsGoal
28
+ from .goals.stem import SelectRoleGoal
29
+ from .goals.survive import SurviveGoal
30
+ from .navigator import Navigator
31
+ from .obs_parser import ObsParser
32
+ from .trace import TraceLog
33
+
34
+ # Role vibes that map to roles
35
+ VIBE_TO_ROLE = {"miner", "scout", "aligner", "scrambler"}
36
+
37
+ # Default spawn position (center of 200x200 grid)
38
+ SPAWN_POS = (100, 100)
39
+
40
+
41
+ def _make_goal_list(role: str) -> list[Goal]:
42
+ """Create goal list for a role."""
43
+ if role == "miner":
44
+ return [
45
+ SurviveGoal(hp_threshold=15),
46
+ ExploreHubGoal(),
47
+ GetMinerGearGoal(),
48
+ PickResourceGoal(),
49
+ DepositCargoGoal(),
50
+ MineResourceGoal(),
51
+ ]
52
+ elif role == "scout":
53
+ return [
54
+ SurviveGoal(hp_threshold=50),
55
+ GetScoutGearGoal(),
56
+ ExploreGoal(),
57
+ ]
58
+ elif role == "aligner":
59
+ # Aligners NEED gear + heart to align junctions.
60
+ # Hearts require gear first — don't waste resources on hearts without gear.
61
+ # FallbackMine at end: mine resources when can't get gear/hearts.
62
+ return [
63
+ SurviveGoal(hp_threshold=50),
64
+ GetAlignerGearGoal(),
65
+ GetHeartsGoal(),
66
+ AlignJunctionGoal(),
67
+ FallbackMineGoal(),
68
+ ]
69
+ elif role == "scrambler":
70
+ # Scramblers NEED gear + heart to scramble junctions.
71
+ # FallbackMine at end: mine resources when can't get gear/hearts.
72
+ return [
73
+ SurviveGoal(hp_threshold=30),
74
+ GetScramblerGearGoal(),
75
+ GetHeartsGoal(),
76
+ ScrambleJunctionGoal(),
77
+ FallbackMineGoal(),
78
+ ]
79
+ elif role == "stem":
80
+ return [
81
+ SurviveGoal(hp_threshold=20),
82
+ SelectRoleGoal(),
83
+ ]
84
+ else:
85
+ # Default/inactive
86
+ return []
87
+
88
+
89
+ class CogasAgentState:
90
+ """Persistent state for a Cogas agent across ticks."""
91
+
92
+ def __init__(self, agent_id: int, role: str, goals: list[Goal]) -> None:
93
+ self.agent_id = agent_id
94
+ self.role = role
95
+ self.goals = goals
96
+ self.entity_map = EntityMap()
97
+ self.navigator = Navigator()
98
+ self.blackboard: dict[str, Any] = {}
99
+ self.step = 0
100
+ self.my_collective_id: int | None = None
101
+
102
+
103
+ class CogasBrain(StatefulPolicyImpl[CogasAgentState]):
104
+ """Per-agent coordinator that owns state and evaluates the goal tree."""
105
+
106
+ def __init__(
107
+ self,
108
+ policy_env_info: PolicyEnvInterface,
109
+ agent_id: int,
110
+ role: str,
111
+ trace_enabled: bool = False,
112
+ trace_level: int = 1,
113
+ trace_agent: int = -1,
114
+ ) -> None:
115
+ self._agent_id = agent_id
116
+ self._policy_env_info = policy_env_info
117
+ self._role = role
118
+ self._obs_parser = ObsParser(policy_env_info)
119
+ self._action_names = policy_env_info.action_names
120
+
121
+ # Tracing
122
+ self._trace_enabled = trace_enabled
123
+ self._trace_level = trace_level
124
+ self._trace_agent = trace_agent # -1 = trace all
125
+
126
+ def initial_agent_state(self) -> CogasAgentState:
127
+ goals = _make_goal_list(self._role)
128
+ return CogasAgentState(
129
+ agent_id=self._agent_id,
130
+ role=self._role,
131
+ goals=goals,
132
+ )
133
+
134
+ def step_with_state(self, obs: AgentObservation, agent_state: CogasAgentState) -> tuple[Action, CogasAgentState]:
135
+ agent_state.step += 1
136
+
137
+ # Parse observation
138
+ state, visible_entities = self._obs_parser.parse(obs, agent_state.step, SPAWN_POS)
139
+
140
+ # Update entity map
141
+ agent_state.entity_map.update_from_observation(
142
+ agent_pos=state.position,
143
+ obs_half_height=self._obs_parser.obs_half_height,
144
+ obs_half_width=self._obs_parser.obs_half_width,
145
+ visible_entities=visible_entities,
146
+ step=agent_state.step,
147
+ )
148
+
149
+ # Detect own collective_id from nearest hub (once)
150
+ if agent_state.my_collective_id is None:
151
+ hub = agent_state.entity_map.find_nearest(state.position, type_contains="hub")
152
+ if hub is not None:
153
+ _, hub_entity = hub
154
+ cid = hub_entity.properties.get("collective_id")
155
+ if cid is not None:
156
+ agent_state.my_collective_id = cid
157
+
158
+ # Detect useful actions by comparing state changes
159
+ # Useful = mined resources, deposited to collective, aligned/scrambled junction
160
+ self._detect_useful_action(state, agent_state)
161
+
162
+ # Detect failed moves: if last action was a move but position didn't change
163
+ last_pos = agent_state.blackboard.get("_last_pos")
164
+ last_action = agent_state.blackboard.get("_last_action", "")
165
+ if last_pos is not None and last_action.startswith("move_") and state.position == last_pos:
166
+ # Move failed - track consecutive failures
167
+ fail_count = agent_state.blackboard.get("_move_fail_count", 0) + 1
168
+ agent_state.blackboard["_move_fail_count"] = fail_count
169
+
170
+ # After 3 consecutive failed moves, clear navigation cache and targets
171
+ if fail_count >= 3:
172
+ agent_state.navigator._cached_path = None
173
+ agent_state.navigator._cached_target = None
174
+ # Clear any target resource selection to force re-evaluation
175
+ if fail_count >= 6:
176
+ agent_state.blackboard.pop("target_resource", None)
177
+ agent_state.blackboard["_move_fail_count"] = 0
178
+ else:
179
+ agent_state.blackboard["_move_fail_count"] = 0
180
+
181
+ agent_state.blackboard["_last_pos"] = state.position
182
+
183
+ # Vibe-driven role system: agent's role IS their vibe
184
+ # "default" → set initial role vibe
185
+ # "gear" → stem mode (role selection)
186
+ # any valid role → run that role's goals
187
+
188
+ # Check if goals want to change role (via blackboard)
189
+ if "change_role" in agent_state.blackboard:
190
+ new_role = agent_state.blackboard.pop("change_role")
191
+ if new_role in VIBE_TO_ROLE:
192
+ return Action(name=f"change_vibe_{new_role}"), agent_state
193
+
194
+ # Map vibe to role
195
+ current_vibe = state.vibe
196
+ if current_vibe == "default":
197
+ if self._role in VIBE_TO_ROLE:
198
+ # Non-stem agent: set initial role vibe
199
+ return Action(name=f"change_vibe_{self._role}"), agent_state
200
+ else:
201
+ # Stem agent: default vibe = stem mode
202
+ effective_role = "stem"
203
+ elif current_vibe == "gear":
204
+ # Gear vibe = stem mode (role selection)
205
+ effective_role = "stem"
206
+ elif current_vibe in VIBE_TO_ROLE:
207
+ effective_role = current_vibe
208
+ else:
209
+ if self._role in VIBE_TO_ROLE:
210
+ return Action(name=f"change_vibe_{self._role}"), agent_state
211
+ effective_role = "stem"
212
+
213
+ # Update goals if role changed
214
+ if effective_role != agent_state.role:
215
+ if self._should_trace(agent_state):
216
+ print(f"[cogas][t={agent_state.step} a={self._agent_id}] role: {agent_state.role}→{effective_role}")
217
+ agent_state.role = effective_role
218
+ agent_state.goals = _make_goal_list(effective_role)
219
+
220
+ # Build context
221
+ should_trace = self._should_trace(agent_state)
222
+ trace = TraceLog() if should_trace else None
223
+
224
+ # Calculate steps since last useful action
225
+ last_useful = agent_state.blackboard.get("_last_useful_step", 0)
226
+ steps_since_useful = agent_state.step - last_useful
227
+ if trace:
228
+ trace.steps_since_useful = steps_since_useful
229
+
230
+ # If we've been idle too long (50+ steps), force a reset of cached state
231
+ # This helps break out of stuck loops - reduced from 100 for faster recovery
232
+ if steps_since_useful >= 50 and steps_since_useful % 25 == 0:
233
+ # Clear cached navigation and target selections
234
+ agent_state.navigator._cached_path = None
235
+ agent_state.navigator._cached_target = None
236
+ agent_state.blackboard.pop("target_resource", None)
237
+ if trace:
238
+ trace.activate("IdleReset", f"clearing cache after {steps_since_useful} idle steps")
239
+
240
+ ctx = CogasContext(
241
+ state=state,
242
+ map=agent_state.entity_map,
243
+ blackboard=agent_state.blackboard,
244
+ navigator=agent_state.navigator,
245
+ trace=trace,
246
+ action_names=self._action_names,
247
+ agent_id=self._agent_id,
248
+ step=agent_state.step,
249
+ my_collective_id=agent_state.my_collective_id,
250
+ )
251
+
252
+ # If we're stuck (many failed moves), force exploration to discover terrain
253
+ # Reduced threshold from 6 to 3 for faster recovery
254
+ fail_count = agent_state.blackboard.get("_move_fail_count", 0)
255
+ if fail_count >= 3:
256
+ action = agent_state.navigator.explore(
257
+ state.position,
258
+ agent_state.entity_map,
259
+ direction_bias=["north", "east", "south", "west"][self._agent_id % 4],
260
+ )
261
+ if trace:
262
+ trace.active_goal_chain = f"ForceExplore(stuck={fail_count})"
263
+ trace.action_name = action.name
264
+ else:
265
+ # Evaluate goals normally
266
+ action = evaluate_goals(agent_state.goals, ctx)
267
+
268
+ # DEBUG: dump all entity types for agent 0
269
+ if self._agent_id == 0 and agent_state.step == 25:
270
+ all_types = {}
271
+ for p, e in agent_state.entity_map.entities.items():
272
+ t = e.type
273
+ if t not in all_types:
274
+ all_types[t] = []
275
+ all_types[t].append((p, e.properties.get("alignment")))
276
+ for t, entries in sorted(all_types.items()):
277
+ print(f"[cogas-debug] type={t}: {entries[:5]}")
278
+
279
+ # Emit trace
280
+ if trace:
281
+ line = trace.format_line(
282
+ step=agent_state.step,
283
+ agent_id=self._agent_id,
284
+ role=agent_state.role,
285
+ pos=state.position,
286
+ hp=state.hp,
287
+ level=self._trace_level,
288
+ )
289
+ print(f"[cogas] {line}")
290
+ # Log collective resources and entity map info
291
+ if agent_state.step % 25 == 0 or agent_state.step == 3:
292
+ print(
293
+ f"[cogas][t={agent_state.step} a={self._agent_id}] "
294
+ f"collective: C={state.collective_carbon} O={state.collective_oxygen} "
295
+ f"G={state.collective_germanium} S={state.collective_silicon} "
296
+ f"cargo={state.cargo_total}/{state.cargo_capacity} "
297
+ f"energy={state.energy}"
298
+ )
299
+
300
+ # Track action for failed-move detection
301
+ agent_state.blackboard["_last_action"] = action.name
302
+
303
+ return action, agent_state
304
+
305
+ def _should_trace(self, agent_state: CogasAgentState) -> bool:
306
+ if not self._trace_enabled:
307
+ return False
308
+ if self._trace_agent >= 0 and self._agent_id != self._trace_agent:
309
+ return False
310
+ return True
311
+
312
+ def _detect_useful_action(self, state: StateSnapshot, agent_state: CogasAgentState) -> None:
313
+ """Detect if a useful action occurred by comparing state changes.
314
+
315
+ Useful actions:
316
+ - Mine: cargo increased
317
+ - Deposit: cargo decreased AND collective increased
318
+ - Align/Scramble: heart decreased (spent on junction action)
319
+ - Got gear: gear flag changed
320
+ - Got heart: heart count increased
321
+ """
322
+ bb = agent_state.blackboard
323
+
324
+ # Get previous state values
325
+ prev_cargo = bb.get("_prev_cargo", 0)
326
+ prev_heart = bb.get("_prev_heart", 0)
327
+ prev_collective_total = bb.get("_prev_collective_total", 0)
328
+
329
+ # Calculate current values
330
+ current_cargo = state.cargo_total
331
+ current_heart = state.heart
332
+ current_collective = (
333
+ state.collective_carbon + state.collective_oxygen + state.collective_germanium + state.collective_silicon
334
+ )
335
+
336
+ # Detect useful actions
337
+ useful = False
338
+
339
+ # Mined resources (cargo increased)
340
+ if current_cargo > prev_cargo:
341
+ useful = True
342
+ # Track first mine milestone
343
+ if "_first_mine_step" not in bb:
344
+ bb["_first_mine_step"] = agent_state.step
345
+
346
+ # Deposited resources (cargo decreased, collective increased)
347
+ if current_cargo < prev_cargo and current_collective > prev_collective_total:
348
+ useful = True
349
+ # Track first deposit milestone
350
+ if "_first_deposit_step" not in bb:
351
+ bb["_first_deposit_step"] = agent_state.step
352
+
353
+ # Got a heart (heart increased)
354
+ if current_heart > prev_heart:
355
+ useful = True
356
+ # Track first heart milestone
357
+ if "_first_heart_step" not in bb:
358
+ bb["_first_heart_step"] = agent_state.step
359
+
360
+ # Spent a heart on align/scramble (heart decreased)
361
+ if current_heart < prev_heart:
362
+ useful = True
363
+ # Track first junction action milestone
364
+ if "_first_junction_step" not in bb:
365
+ bb["_first_junction_step"] = agent_state.step
366
+
367
+ # Update tracking
368
+ if useful:
369
+ bb["_last_useful_step"] = agent_state.step
370
+
371
+ # Store current values for next tick comparison
372
+ bb["_prev_cargo"] = current_cargo
373
+ bb["_prev_heart"] = current_heart
374
+ bb["_prev_collective_total"] = current_collective
375
+
376
+ # Print early-game diagnostics at key steps
377
+ if self._agent_id == 0:
378
+ if agent_state.step == 50:
379
+ first_mine = bb.get("_first_mine_step", "NEVER")
380
+ extractors = sum(
381
+ 1
382
+ for r in ["carbon", "oxygen", "germanium", "silicon"]
383
+ if agent_state.entity_map.find(type=f"{r}_extractor")
384
+ )
385
+ res = f"C={state.collective_carbon} O={state.collective_oxygen}"
386
+ print(f"[DIAG t=50] ext={extractors}/4 mine={first_mine} {res}")
387
+ elif agent_state.step == 100:
388
+ first_mine = bb.get("_first_mine_step", "NEVER")
389
+ first_deposit = bb.get("_first_deposit_step", "NEVER")
390
+ res = f"C={state.collective_carbon} O={state.collective_oxygen}"
391
+ print(f"[DIAG t=100] mine={first_mine} dep={first_deposit} {res}")
392
+ # Agent 3 is the first aligner (agents 0,1,2 are miners per 3:5 pattern)
393
+ if self._agent_id == 3:
394
+ if agent_state.step == 100:
395
+ has_gear = state.aligner_gear
396
+ res = f"C={state.collective_carbon} O={state.collective_oxygen}"
397
+ print(f"[ALIGNER t=100] gear={has_gear} {res}")
398
+ elif agent_state.step == 200:
399
+ first_heart = bb.get("_first_heart_step", "NEVER")
400
+ first_junction = bb.get("_first_junction_step", "NEVER")
401
+ print(f"[ALIGNER t=200] first_heart={first_heart} first_junction={first_junction}")
402
+
403
+
404
+ class CogasPolicy(MultiAgentPolicy):
405
+ """Multi-agent goal-tree policy with URI-based role distribution.
406
+
407
+ URI parameters:
408
+ ?miner=4&scout=0&aligner=2&scrambler=4 — role counts
409
+ ?trace=1&trace_level=2&trace_agent=0 — tracing
410
+ """
411
+
412
+ short_names = ["cogas"]
413
+
414
+ def __init__(
415
+ self,
416
+ policy_env_info: PolicyEnvInterface,
417
+ device: str = "cpu",
418
+ # Role counts — if stem > 0, defaults to all-stem unless explicit roles given
419
+ miner: int = -1,
420
+ scout: int = 0,
421
+ aligner: int = -1,
422
+ scrambler: int = -1,
423
+ stem: int = 8,
424
+ # Tracing
425
+ trace: int = 0,
426
+ trace_level: int = 1,
427
+ trace_agent: int = -1,
428
+ # Accept any extra kwargs
429
+ **kwargs: object,
430
+ ) -> None:
431
+ super().__init__(policy_env_info, device=device)
432
+ self._feature_by_id = {f.id: f for f in policy_env_info.obs_features}
433
+ self._action_name_to_index = {name: idx for idx, name in enumerate(policy_env_info.action_names)}
434
+ print(f"[cogas] Action names: {list(policy_env_info.action_names)}")
435
+ self._noop_action_value = dtype_actions.type(self._action_name_to_index.get("noop", 0))
436
+
437
+ # Tracing
438
+ self._trace_enabled = bool(trace)
439
+ self._trace_level = trace_level
440
+ self._trace_agent = trace_agent
441
+
442
+ # Resolve defaults: if stem > 0 and miner/aligner/scrambler not explicitly set, zero them
443
+ if stem > 0:
444
+ if miner == -1:
445
+ miner = 0
446
+ if aligner == -1:
447
+ aligner = 0
448
+ if scrambler == -1:
449
+ scrambler = 0
450
+ else:
451
+ if miner == -1:
452
+ miner = 3
453
+ if aligner == -1:
454
+ aligner = 5
455
+ if scrambler == -1:
456
+ scrambler = 0
457
+
458
+ # Build per-team role distribution
459
+ team_roles: list[str] = []
460
+ team_roles.extend(["miner"] * miner)
461
+ team_roles.extend(["scout"] * scout)
462
+ team_roles.extend(["aligner"] * aligner)
463
+ team_roles.extend(["scrambler"] * scrambler)
464
+ team_roles.extend(["stem"] * stem)
465
+
466
+ # Tile the role distribution to cover all agents (supports multi-team setups).
467
+ num_agents = policy_env_info.num_agents
468
+ team_size = len(team_roles) if team_roles else 1
469
+ num_teams = max(1, (num_agents + team_size - 1) // team_size)
470
+ self._role_distribution: list[str] = (team_roles * num_teams)[:num_agents]
471
+
472
+ if self._trace_enabled:
473
+ print(f"[cogas] Role distribution ({num_teams} teams): {self._role_distribution}")
474
+
475
+ self._agent_policies: dict[int, StatefulAgentPolicy[CogasAgentState]] = {}
476
+
477
+ def agent_policy(self, agent_id: int) -> StatefulAgentPolicy[CogasAgentState]:
478
+ if agent_id not in self._agent_policies:
479
+ role = self._role_distribution[agent_id] if agent_id < len(self._role_distribution) else "default"
480
+
481
+ brain = CogasBrain(
482
+ policy_env_info=self._policy_env_info,
483
+ agent_id=agent_id,
484
+ role=role,
485
+ trace_enabled=self._trace_enabled,
486
+ trace_level=self._trace_level,
487
+ trace_agent=self._trace_agent,
488
+ )
489
+
490
+ self._agent_policies[agent_id] = StatefulAgentPolicy(
491
+ brain,
492
+ self._policy_env_info,
493
+ agent_id=agent_id,
494
+ )
495
+
496
+ return self._agent_policies[agent_id]
497
+
498
+ def step_batch(self, raw_observations: np.ndarray, raw_actions: np.ndarray) -> None:
499
+ raw_actions[...] = self._noop_action_value
500
+ num_agents = min(raw_observations.shape[0], self._policy_env_info.num_agents)
501
+ for agent_id in range(num_agents):
502
+ obs = self._raw_obs_to_agent_obs(agent_id, raw_observations[agent_id])
503
+ action = self.agent_policy(agent_id).step(obs)
504
+ action_index = self._action_name_to_index.get(action.name, 0)
505
+ raw_actions[agent_id] = dtype_actions.type(action_index)
506
+
507
+ def _raw_obs_to_agent_obs(self, agent_id: int, raw_obs: np.ndarray) -> AgentObservation:
508
+ tokens: list[ObservationToken] = []
509
+ for token in raw_obs:
510
+ feature_id = int(token[1])
511
+ if feature_id == 0xFF:
512
+ break
513
+ feature = self._feature_by_id.get(feature_id)
514
+ if feature is None:
515
+ continue
516
+ location_packed = int(token[0])
517
+ value = int(token[2])
518
+ tokens.append(
519
+ ObservationToken(
520
+ feature=feature,
521
+ value=value,
522
+ raw_token=(location_packed, feature_id, value),
523
+ )
524
+ )
525
+ return AgentObservation(agent_id=agent_id, tokens=tokens)
@@ -0,0 +1,69 @@
1
+ """Tracing system for Cogas policy."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional
7
+
8
+
9
+ @dataclass
10
+ class TraceEntry:
11
+ """One goal evaluation entry."""
12
+
13
+ goal_name: str
14
+ satisfied: bool
15
+ detail: str = ""
16
+
17
+
18
+ @dataclass
19
+ class TraceLog:
20
+ """Collects trace information during a single tick."""
21
+
22
+ entries: list[TraceEntry] = field(default_factory=list)
23
+ active_goal_chain: str = ""
24
+ action_name: str = ""
25
+ blackboard_summary: str = ""
26
+ nav_target: Optional[tuple[int, int]] = None
27
+ steps_since_useful: int = 0 # Steps since last useful action (mine/deposit/align/scramble)
28
+
29
+ def skip(self, goal_name: str, reason: str = "ok") -> None:
30
+ """Record a satisfied (skipped) goal."""
31
+ self.entries.append(TraceEntry(goal_name=goal_name, satisfied=True, detail=reason))
32
+
33
+ def activate(self, goal_name: str, detail: str = "") -> None:
34
+ """Record an activated (unsatisfied) goal."""
35
+ self.entries.append(TraceEntry(goal_name=goal_name, satisfied=False, detail=detail))
36
+
37
+ def format_line(
38
+ self,
39
+ step: int,
40
+ agent_id: int,
41
+ role: str,
42
+ pos: tuple[int, int],
43
+ hp: int,
44
+ level: int,
45
+ ) -> str:
46
+ """Format the trace as a single line."""
47
+ # Include idle indicator if agent hasn't done anything useful recently
48
+ idle_str = f" IDLE={self.steps_since_useful}" if self.steps_since_useful >= 20 else ""
49
+ prefix = f"[t={step} a={agent_id} {role} ({pos[0]},{pos[1]}) hp={hp}{idle_str}]"
50
+
51
+ if level == 1:
52
+ return f"{prefix} {self.active_goal_chain} → {self.action_name}"
53
+
54
+ if level == 2:
55
+ skips = " ".join(f"skip:{e.goal_name}({e.detail})" for e in self.entries if e.satisfied)
56
+ target_str = ""
57
+ if self.nav_target:
58
+ dist = abs(self.nav_target[0] - pos[0]) + abs(self.nav_target[1] - pos[1])
59
+ target_str = f" dist={dist}"
60
+ bb = f" | bb={{{self.blackboard_summary}}}" if self.blackboard_summary else ""
61
+ idle_detail = f" idle={self.steps_since_useful}" if self.steps_since_useful > 0 else ""
62
+ return f"{prefix} {skips} → {self.active_goal_chain}{target_str} → {self.action_name}{bb}{idle_detail}"
63
+
64
+ # Level 3 — full detail
65
+ all_entries = " ".join(f"{'skip' if e.satisfied else 'ACTIVE'}:{e.goal_name}({e.detail})" for e in self.entries)
66
+ target_str = f" nav_target={self.nav_target}" if self.nav_target else ""
67
+ bb = f" bb={{{self.blackboard_summary}}}" if self.blackboard_summary else ""
68
+ idle_detail = f" idle={self.steps_since_useful}"
69
+ return f"{prefix} {all_entries}{target_str} → {self.action_name}{bb}{idle_detail}"