cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. cogames_agents/__init__.py +0 -0
  2. cogames_agents/evals/__init__.py +5 -0
  3. cogames_agents/evals/planky_evals.py +415 -0
  4. cogames_agents/policy/__init__.py +0 -0
  5. cogames_agents/policy/evolution/__init__.py +0 -0
  6. cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
  7. cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
  8. cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
  9. cogames_agents/policy/nim_agents/__init__.py +20 -0
  10. cogames_agents/policy/nim_agents/agents.py +98 -0
  11. cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
  12. cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
  13. cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
  14. cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
  15. cogames_agents/policy/nim_agents/common.nim +1054 -0
  16. cogames_agents/policy/nim_agents/install.sh +1 -0
  17. cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
  18. cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
  19. cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
  20. cogames_agents/policy/nim_agents/nimby.lock +3 -0
  21. cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
  22. cogames_agents/policy/nim_agents/random_agents.nim +68 -0
  23. cogames_agents/policy/nim_agents/test_agents.py +53 -0
  24. cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
  25. cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
  26. cogames_agents/policy/scripted_agent/README.md +360 -0
  27. cogames_agents/policy/scripted_agent/__init__.py +0 -0
  28. cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
  29. cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
  30. cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
  31. cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
  32. cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
  33. cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
  34. cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
  35. cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
  36. cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
  37. cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
  38. cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
  39. cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
  40. cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
  41. cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
  42. cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
  43. cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
  44. cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
  45. cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
  46. cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
  47. cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
  48. cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
  49. cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
  50. cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
  51. cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
  52. cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
  53. cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
  54. cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
  55. cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
  56. cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
  57. cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
  58. cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
  59. cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
  60. cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
  61. cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
  62. cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
  63. cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
  64. cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
  65. cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
  66. cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
  67. cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
  68. cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
  69. cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
  70. cogames_agents/policy/scripted_agent/common/roles.py +34 -0
  71. cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
  72. cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
  73. cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
  74. cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
  75. cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
  76. cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
  77. cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
  78. cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
  79. cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
  80. cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
  81. cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
  82. cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
  83. cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
  84. cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
  85. cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
  86. cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
  87. cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
  88. cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
  89. cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
  90. cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
  91. cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
  92. cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
  93. cogames_agents/policy/scripted_agent/planky/README.md +214 -0
  94. cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
  95. cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
  96. cogames_agents/policy/scripted_agent/planky/context.py +68 -0
  97. cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
  98. cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
  99. cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
  100. cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
  101. cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
  102. cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
  103. cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
  104. cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
  105. cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
  106. cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
  107. cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
  108. cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
  109. cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
  110. cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
  111. cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
  112. cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
  113. cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
  114. cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
  115. cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
  116. cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
  117. cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
  118. cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
  119. cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
  120. cogames_agents/policy/scripted_agent/types.py +239 -0
  121. cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
  122. cogames_agents/policy/scripted_agent/utils.py +381 -0
  123. cogames_agents/policy/scripted_registry.py +80 -0
  124. cogames_agents/py.typed +0 -0
  125. cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
  126. cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
  127. cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
  128. cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,570 @@
1
+ """
2
+ Pinky Policy - main policy implementation.
3
+
4
+ AgentBrain coordinates per-agent state, services, and behavior.
5
+ PinkyPolicy is the multi-agent wrapper with URI-based vibe distribution.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Optional
11
+
12
+ import numpy as np
13
+
14
+ from mettagrid.mettagrid_c import dtype_actions
15
+ from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
16
+ from mettagrid.policy.policy_env_interface import PolicyEnvInterface
17
+ from mettagrid.simulator import Action, ObservationToken
18
+ from mettagrid.simulator.interface import AgentObservation
19
+
20
+ from .behaviors import (
21
+ AlignerBehavior,
22
+ MinerBehavior,
23
+ RoleBehavior,
24
+ ScoutBehavior,
25
+ ScramblerBehavior,
26
+ Services,
27
+ change_vibe_action,
28
+ )
29
+ from .services import MapTracker, Navigator, SafetyManager
30
+ from .state import AgentState
31
+ from .types import DEBUG, VIBE_TO_ROLE, DebugInfo, Role
32
+
33
+ # The "gear" vibe triggers changing to role vibe
34
+ GEAR_VIBE = "gear"
35
+
36
+
37
+ class PinkyAgentBrain(StatefulPolicyImpl[AgentState]):
38
+ """Per-agent coordinator that owns state and delegates to behavior/services."""
39
+
40
+ # Role behaviors mapping
41
+ ROLE_BEHAVIORS: dict[Role, type[RoleBehavior]] = {
42
+ Role.MINER: MinerBehavior,
43
+ Role.SCOUT: ScoutBehavior,
44
+ Role.ALIGNER: AlignerBehavior,
45
+ Role.SCRAMBLER: ScramblerBehavior,
46
+ }
47
+
48
+ def __init__(
49
+ self,
50
+ policy_env_info: PolicyEnvInterface,
51
+ agent_id: int,
52
+ initial_vibe: Optional[str] = None,
53
+ debug: bool = False,
54
+ lazy: bool = False,
55
+ change_role: int = 0,
56
+ ):
57
+ self._agent_id = agent_id
58
+ self._policy_env_info = policy_env_info
59
+ self._initial_vibe = initial_vibe or "miner"
60
+ self._debug = debug
61
+ self._lazy = lazy # If True, don't auto-activate on step 1; wait for gear vibe
62
+ self._change_role = change_role # If > 0, consider role changes every N steps
63
+
64
+ # Track previous debug output to avoid duplicate prints
65
+ self._prev_debug_output: Optional[str] = None
66
+
67
+ # Determine initial role from vibe
68
+ self._role = VIBE_TO_ROLE.get(self._initial_vibe, Role.MINER)
69
+
70
+ # Track the "base" role for role changes (what they started as)
71
+ self._base_role = self._role
72
+
73
+ # Create services
74
+ self._navigator = Navigator(policy_env_info)
75
+ self._map_tracker = MapTracker(policy_env_info)
76
+ self._safety = SafetyManager()
77
+
78
+ # Create behavior for this role
79
+ behavior_class = self.ROLE_BEHAVIORS.get(self._role, MinerBehavior)
80
+ self._behavior: RoleBehavior = behavior_class()
81
+
82
+ # Action lookup
83
+ self._action_names = policy_env_info.action_names
84
+
85
+ def initial_agent_state(self) -> AgentState:
86
+ """Create initial state for this agent."""
87
+ return AgentState(
88
+ agent_id=self._agent_id,
89
+ role=self._role,
90
+ vibe=self._initial_vibe,
91
+ )
92
+
93
+ def step_with_state(self, obs: AgentObservation, state: AgentState) -> tuple[Action, AgentState]:
94
+ """Process observation and return action with updated state.
95
+
96
+ Vibe behavior:
97
+ - Step 1 with default vibe → change to assigned role vibe (auto-activate, unless lazy=True)
98
+ - "gear" vibe → change to assigned role vibe
99
+ - Role vibe (miner/scout/aligner/scrambler) → execute role behavior
100
+ - Other vibes (heart, etc.) → noop
101
+ """
102
+ state.step += 1
103
+
104
+ # Store observation for behavior use
105
+ state.last_obs = obs
106
+
107
+ # Update last action executed from observation (for position tracking)
108
+ state.nav.last_action_executed = self._get_last_action_from_obs(obs)
109
+
110
+ # Update position based on last action
111
+ self._navigator.update_position(state)
112
+
113
+ # Reset per-step flags
114
+ state.nav.using_object_this_step = False
115
+
116
+ # Update map knowledge from observation (this also reads current vibe)
117
+ self._map_tracker.update(state, obs)
118
+
119
+ # Create services bundle (needed for vibe change action)
120
+ services = Services(
121
+ navigator=self._navigator,
122
+ map_tracker=self._map_tracker,
123
+ safety=self._safety,
124
+ action_names=self._action_names,
125
+ )
126
+
127
+ # Step 1 with default vibe OR "gear" vibe → activate role
128
+ # This must happen BEFORE the "noop if not role vibe" check!
129
+ # (but only if agent has an assigned role, not if they're meant to stay default)
130
+ # In lazy mode, skip step 1 auto-activation; only activate on gear vibe
131
+ if self._initial_vibe != "default":
132
+ # Gear vibe handling: pick a role based on agent_id
133
+ # Best: 2 miners, 5 aligners, 3 scramblers
134
+ if state.vibe == GEAR_VIBE or (self._initial_vibe == GEAR_VIBE and state.step == 1):
135
+ # Distribution: 2 miners, 5 aligners, 3 scramblers
136
+ role_order = [
137
+ "aligner",
138
+ "aligner",
139
+ "scrambler",
140
+ "miner",
141
+ "aligner",
142
+ "scrambler",
143
+ "aligner",
144
+ "scrambler",
145
+ "aligner",
146
+ "miner",
147
+ ]
148
+ initial_role = role_order[state.agent_id % len(role_order)]
149
+
150
+ action = change_vibe_action(initial_role, services)
151
+ state.nav.last_action = action
152
+ state.debug_info = DebugInfo(
153
+ mode="activate",
154
+ goal=f"gear_to_{initial_role}",
155
+ target_object="vibe",
156
+ )
157
+ if self._debug:
158
+ self._print_debug_if_changed(state, action)
159
+ elif DEBUG and state.step <= 20:
160
+ print(
161
+ f"[A{state.agent_id}] Step {state.step}: gear vibe, becoming {initial_role}, "
162
+ f"pos={state.pos}, hp={state.hp}"
163
+ )
164
+ return action, state
165
+
166
+ # Step 1 with default vibe (non-lazy mode) → change to role vibe
167
+ if not self._lazy and state.step == 1 and state.vibe == "default":
168
+ role_vibe = self._initial_vibe # The assigned role (miner/scout/etc.)
169
+ action = change_vibe_action(role_vibe, services)
170
+ state.nav.last_action = action
171
+ state.debug_info = DebugInfo(
172
+ mode="activate",
173
+ goal=f"change_vibe_to_{role_vibe}",
174
+ target_object="vibe",
175
+ )
176
+ if self._debug:
177
+ self._print_debug_if_changed(state, action)
178
+ elif DEBUG and state.step <= 20:
179
+ print(
180
+ f"[A{state.agent_id}] Step {state.step}: vibe={state.vibe}, changing to {role_vibe}, "
181
+ f"pos={state.pos}, hp={state.hp}"
182
+ )
183
+ return action, state
184
+
185
+ # Check if vibe changed and update role/behavior accordingly
186
+ new_role = VIBE_TO_ROLE.get(state.vibe)
187
+ if new_role is not None and new_role != state.role:
188
+ state.role = new_role
189
+ behavior_class = self.ROLE_BEHAVIORS.get(new_role, MinerBehavior)
190
+ self._behavior = behavior_class()
191
+ if DEBUG:
192
+ print(f"[A{state.agent_id}] Vibe changed to {state.vibe}, switching to role {new_role.value}")
193
+
194
+ # Check for dynamic role changes based on communal resources
195
+ if self._change_role > 0 and state.step % self._change_role == 0 and state.step > 0:
196
+ role_change_action = self._check_role_change(state, services)
197
+ if role_change_action:
198
+ state.nav.last_action = role_change_action
199
+ return role_change_action, state
200
+
201
+ # If vibe is "default" or not a role vibe, do nothing (noop)
202
+ if state.vibe not in VIBE_TO_ROLE:
203
+ action = Action(name="noop")
204
+ state.nav.last_action = action
205
+ if DEBUG and state.step <= 20:
206
+ print(
207
+ f"[A{state.agent_id}] Step {state.step}: vibe={state.vibe} (inactive), "
208
+ f"pos={state.pos}, hp={state.hp}, action=noop"
209
+ )
210
+ return action, state
211
+
212
+ # Role vibe → execute role behavior
213
+ if state.vibe in VIBE_TO_ROLE:
214
+ # Update role/behavior if vibe changed to a different role
215
+ new_role = VIBE_TO_ROLE[state.vibe]
216
+ if new_role != state.role:
217
+ state.role = new_role
218
+ behavior_class = self.ROLE_BEHAVIORS.get(new_role, MinerBehavior)
219
+ self._behavior = behavior_class()
220
+ if DEBUG:
221
+ print(f"[A{state.agent_id}] Role changed to {new_role.value}")
222
+
223
+ # Execute role behavior
224
+ action = self._behavior.act(state, services)
225
+ state.nav.last_action = action
226
+
227
+ if self._debug:
228
+ self._print_debug_if_changed(state, action)
229
+ elif DEBUG and state.step <= 100:
230
+ print(
231
+ f"[A{state.agent_id}] Step {state.step}: role={state.role.value}, "
232
+ f"pos={state.pos}, hp={state.hp}, cargo={state.total_cargo}, action={action.name}"
233
+ )
234
+ return action, state
235
+
236
+ # Other vibes (heart, etc.) → noop
237
+ action = Action(name="noop")
238
+ state.nav.last_action = action
239
+ state.debug_info = DebugInfo(mode="inactive", goal=f"vibe={state.vibe}")
240
+ if self._debug:
241
+ self._print_debug_if_changed(state, action)
242
+ elif DEBUG and state.step <= 20:
243
+ print(
244
+ f"[A{state.agent_id}] Step {state.step}: vibe={state.vibe} (inactive), "
245
+ f"pos={state.pos}, hp={state.hp}, action=noop"
246
+ )
247
+ return action, state
248
+
249
+ def _get_last_action_from_obs(self, obs: AgentObservation) -> Optional[str]:
250
+ """Extract last executed action from observation.
251
+
252
+ Global observation tokens (like last_action) can be at any location,
253
+ so we search all tokens for the feature name.
254
+ """
255
+ for tok in obs.tokens:
256
+ if tok.feature.name == "last_action":
257
+ # Convert action ID to name
258
+ action_id = tok.value
259
+ if 0 <= action_id < len(self._action_names):
260
+ return self._action_names[action_id]
261
+ return None
262
+
263
+ def _print_debug_if_changed(self, state: AgentState, action: Action) -> None:
264
+ """Print debug info only if it changed from the previous step."""
265
+ debug_output = state.debug_info.format(state.role.value, action.name)
266
+ if debug_output != self._prev_debug_output:
267
+ # Format position as [col,row]
268
+ pos_str = f"[{state.pos[1]},{state.pos[0]}]"
269
+ dest = state.debug_info.target_pos
270
+ if dest:
271
+ dest_str = f"[{dest[1]},{dest[0]}]"
272
+ dist = abs(state.pos[0] - dest[0]) + abs(state.pos[1] - dest[1])
273
+ target_info = f"({state.debug_info.target_object},dist={dist})"
274
+ else:
275
+ dest_str = "[-,-]"
276
+ target_info = f"({state.debug_info.target_object or '-'})"
277
+ role = state.role.value
278
+ mode = state.debug_info.mode
279
+
280
+ # Build gear string - show which gear the agent has
281
+ gear_list = []
282
+ if state.miner_gear:
283
+ gear_list.append("miner")
284
+ if state.scout_gear:
285
+ gear_list.append("scout")
286
+ if state.aligner_gear:
287
+ gear_list.append("aligner")
288
+ if state.scrambler_gear:
289
+ gear_list.append("scrambler")
290
+ gear_str = ",".join(gear_list) if gear_list else "-"
291
+
292
+ print(
293
+ f"[pinky][Step {state.step}][A{state.agent_id}] [{role}] [g:{gear_str}] [h:{state.heart}] [{mode}] "
294
+ f"{pos_str}->{dest_str} {target_info} : {action.name}"
295
+ )
296
+ self._prev_debug_output = debug_output
297
+
298
+ def _check_role_change(self, state: AgentState, services: Services) -> Optional[Action]:
299
+ """Check if agent should change roles based on communal resource levels.
300
+
301
+ Role change rules:
302
+ - Miners: if all communal resources > 20, become aligner or scrambler
303
+ - Aligners/Scramblers: if any communal resource < 5, become miner (after grace period)
304
+
305
+ Returns:
306
+ Action to change vibe if role change is needed, None otherwise.
307
+ """
308
+ # Threshold constants
309
+ RESOURCE_HIGH_THRESHOLD = 25 # All resources must be above this for miner->other
310
+ RESOURCE_LOW_THRESHOLD = 3 # Any resource below this triggers other->miner
311
+ ALIGNER_GRACE_PERIOD = 100 # Protect aligners early game
312
+
313
+ # Get communal resource levels
314
+ resources = [
315
+ state.collective_carbon,
316
+ state.collective_oxygen,
317
+ state.collective_germanium,
318
+ state.collective_silicon,
319
+ ]
320
+
321
+ all_resources_high = all(r > RESOURCE_HIGH_THRESHOLD for r in resources)
322
+ any_resource_low = any(r < RESOURCE_LOW_THRESHOLD for r in resources)
323
+
324
+ current_role = state.role
325
+ new_vibe: Optional[str] = None
326
+
327
+ if current_role == Role.MINER:
328
+ # Miner with abundant resources -> become aligner or scrambler
329
+ if all_resources_high:
330
+ # Alternate between aligner and scrambler based on agent_id
331
+ if state.agent_id % 2 == 0:
332
+ new_vibe = "aligner"
333
+ else:
334
+ new_vibe = "scrambler"
335
+ if DEBUG or self._debug:
336
+ print(
337
+ f"[A{state.agent_id}] ROLE_CHANGE: miner -> {new_vibe} "
338
+ f"(all resources > {RESOURCE_HIGH_THRESHOLD}: "
339
+ f"C={state.collective_carbon}, O={state.collective_oxygen}, "
340
+ f"G={state.collective_germanium}, S={state.collective_silicon})"
341
+ )
342
+
343
+ elif current_role in (Role.ALIGNER, Role.SCRAMBLER):
344
+ # Aligner/Scrambler with scarce resources -> become miner
345
+ # But give them a grace period at the start to get established
346
+ if any_resource_low and state.step > ALIGNER_GRACE_PERIOD:
347
+ new_vibe = "miner"
348
+ if DEBUG or self._debug:
349
+ print(
350
+ f"[A{state.agent_id}] ROLE_CHANGE: {current_role.value} -> miner "
351
+ f"(some resource < {RESOURCE_LOW_THRESHOLD}: "
352
+ f"C={state.collective_carbon}, O={state.collective_oxygen}, "
353
+ f"G={state.collective_germanium}, S={state.collective_silicon})"
354
+ )
355
+
356
+ if new_vibe:
357
+ state.debug_info = DebugInfo(
358
+ mode="role_change",
359
+ goal=f"become_{new_vibe}",
360
+ target_object="vibe",
361
+ )
362
+ return change_vibe_action(new_vibe, services)
363
+
364
+ return None
365
+
366
+ def _get_any_gear(self, state: AgentState, services: Services) -> Action:
367
+ """Go get gear from a gear station when agent has "gear" vibe but no physical gear.
368
+
369
+ Strategy: Distribute agents across gear stations based on agent_id for balanced team.
370
+ - More miners (resource gathering is critical)
371
+ - Some aligners/scramblers (territory control)
372
+ - Some scouts (optional)
373
+
374
+ Distribution for first 10 agents: 4 miners, 2 aligners, 2 scramblers, 2 scouts
375
+ Then cycle continues.
376
+ """
377
+ from .behaviors.base import is_adjacent, manhattan_distance
378
+
379
+ # Determine which gear station this agent should go to based on agent_id
380
+ # Distribution: miner, miner, aligner, scrambler, miner, miner, aligner, scrambler, scout, scout
381
+ # This gives 4 miners : 2 aligners : 2 scramblers : 2 scouts for 10 agents
382
+ role_order = [
383
+ "miner",
384
+ "miner",
385
+ "aligner",
386
+ "scrambler",
387
+ "miner",
388
+ "miner",
389
+ "aligner",
390
+ "scrambler",
391
+ "scout",
392
+ "scout",
393
+ ]
394
+ target_role = role_order[state.agent_id % len(role_order)]
395
+ target_station = f"{target_role}_station"
396
+
397
+ # First try to find the target station in current observation
398
+ if state.last_obs is not None:
399
+ result = services.map_tracker.get_direction_to_nearest(state, state.last_obs, frozenset({target_station}))
400
+ if result:
401
+ direction, target_pos = result
402
+ state.debug_info = DebugInfo(
403
+ mode="get_gear",
404
+ goal=f"visible_{target_role}",
405
+ target_object=target_station,
406
+ target_pos=target_pos,
407
+ )
408
+ return Action(name=f"move_{direction}")
409
+
410
+ # Check map knowledge for the target station
411
+ station_pos = state.map.stations.get(target_station)
412
+
413
+ if station_pos is not None:
414
+ dist = manhattan_distance(state.pos, station_pos)
415
+
416
+ # If adjacent or on the station, try to use it
417
+ if is_adjacent(state.pos, station_pos) or state.pos == station_pos:
418
+ state.debug_info = DebugInfo(
419
+ mode="get_gear",
420
+ goal=f"use_{target_role}",
421
+ target_object=target_station,
422
+ target_pos=station_pos,
423
+ )
424
+ return services.navigator.use_object_at(state, station_pos)
425
+
426
+ # Navigate to the station
427
+ state.debug_info = DebugInfo(
428
+ mode="get_gear",
429
+ goal=f"to_{target_role}(dist={dist})",
430
+ target_object=target_station,
431
+ target_pos=station_pos,
432
+ )
433
+ return services.navigator.move_to(state, station_pos, reach_adjacent=True)
434
+
435
+ # Target station not known yet - explore to find it
436
+ # Explore south since gear stations are typically south of spawn in the hub
437
+ state.debug_info = DebugInfo(
438
+ mode="get_gear",
439
+ goal=f"find_{target_role}",
440
+ target_object=target_station,
441
+ )
442
+ return services.navigator.explore(state, direction_bias="south")
443
+
444
+
445
+ class PinkyPolicy(MultiAgentPolicy):
446
+ """Multi-agent policy with URI-based vibe distribution.
447
+
448
+ URI parameters specify how many agents get each role:
449
+ - ?miner=1 → agent 0 is miner, rest are default (noop)
450
+ - ?miner=2&scout=1 → agents 0,1 are miners, agent 2 is scout, rest are default
451
+ - Agents beyond the specified count stay neutral (default vibe, noop)
452
+ - ?change_role=100 → agents consider role changes every 100 steps based on communal resources
453
+ """
454
+
455
+ short_names = ["pinky"]
456
+
457
+ def __init__(
458
+ self,
459
+ policy_env_info: PolicyEnvInterface,
460
+ device: str = "cpu",
461
+ # URI parameters for vibe counts (unspecified = 0, agents beyond specified stay default/noop)
462
+ miner: int = 0,
463
+ scout: int = 0,
464
+ aligner: int = 0,
465
+ scrambler: int = 0,
466
+ gear: int = 0, # Agents with "gear" vibe use change_role action to get assigned a role
467
+ # Debug flag - print structured intent info each step
468
+ debug: int = 0,
469
+ # Lazy mode - don't auto-activate on step 1; wait for gear vibe
470
+ lazy: int = 0,
471
+ # Role change interval - if > 0, agents consider changing roles every N steps
472
+ # Miners become aligners/scramblers when all communal resources > 30
473
+ # Aligners/Scramblers become miners when any communal resource < 10
474
+ change_role: int = 0,
475
+ # Accept any extra kwargs to be flexible
476
+ **kwargs: object,
477
+ ):
478
+ super().__init__(policy_env_info, device=device)
479
+ self._feature_by_id = {feature.id: feature for feature in policy_env_info.obs_features}
480
+ self._action_name_to_index = {name: idx for idx, name in enumerate(policy_env_info.action_names)}
481
+ self._noop_action_value = dtype_actions.type(self._action_name_to_index.get("noop", 0))
482
+
483
+ # Debug mode from URI param (?debug=1)
484
+ self._debug = bool(debug)
485
+ # Lazy mode from URI param (?lazy=1) - wait for gear vibe instead of auto-activating
486
+ self._lazy = bool(lazy)
487
+ # Role change interval from URI param (?change_role=100)
488
+ self._change_role = change_role
489
+
490
+ # Build vibe distribution from counts - agents beyond this list stay default
491
+ self._vibe_distribution: list[str] = []
492
+ self._vibe_distribution.extend(["miner"] * miner)
493
+ self._vibe_distribution.extend(["scout"] * scout)
494
+ self._vibe_distribution.extend(["aligner"] * aligner)
495
+ self._vibe_distribution.extend(["scrambler"] * scrambler)
496
+ self._vibe_distribution.extend(["gear"] * gear) # Gear vibe triggers change_role action
497
+
498
+ if DEBUG or self._debug:
499
+ lazy_str = " (lazy mode - wait for gear)" if self._lazy else ""
500
+ change_role_str = f" (role changes every {self._change_role} steps)" if self._change_role > 0 else ""
501
+ print(
502
+ f"[PINKY] Vibe distribution: {self._vibe_distribution}{lazy_str}{change_role_str} "
503
+ f"(agents beyond this stay default)"
504
+ )
505
+ if self._debug:
506
+ print("[PINKY] Debug mode enabled - will print: role:mode:goal:target:action")
507
+
508
+ # Cache for agent policies
509
+ self._agent_policies: dict[int, StatefulAgentPolicy[AgentState]] = {}
510
+
511
+ def agent_policy(self, agent_id: int) -> StatefulAgentPolicy[AgentState]:
512
+ """Get or create policy for an agent."""
513
+ if agent_id not in self._agent_policies:
514
+ # Assign vibe from distribution, or "default" if agent_id exceeds distribution
515
+ if agent_id < len(self._vibe_distribution):
516
+ initial_vibe = self._vibe_distribution[agent_id]
517
+ else:
518
+ initial_vibe = "default" # No role assigned, will noop
519
+
520
+ if DEBUG or self._debug:
521
+ print(f"[PINKY] Agent {agent_id} assigned vibe: {initial_vibe}")
522
+
523
+ # Create brain for this agent
524
+ brain = PinkyAgentBrain(
525
+ policy_env_info=self._policy_env_info,
526
+ agent_id=agent_id,
527
+ initial_vibe=initial_vibe,
528
+ debug=self._debug,
529
+ lazy=self._lazy,
530
+ change_role=self._change_role,
531
+ )
532
+
533
+ # Wrap in StatefulAgentPolicy
534
+ self._agent_policies[agent_id] = StatefulAgentPolicy(
535
+ brain,
536
+ self._policy_env_info,
537
+ agent_id=agent_id,
538
+ )
539
+
540
+ return self._agent_policies[agent_id]
541
+
542
+ def step_batch(self, raw_observations: np.ndarray, raw_actions: np.ndarray) -> None:
543
+ raw_actions[...] = self._noop_action_value
544
+ num_agents = min(raw_observations.shape[0], self._policy_env_info.num_agents)
545
+ active_agents = min(num_agents, len(self._vibe_distribution))
546
+ for agent_id in range(active_agents):
547
+ obs = self._raw_obs_to_agent_obs(agent_id, raw_observations[agent_id])
548
+ action = self.agent_policy(agent_id).step(obs)
549
+ action_index = self._action_name_to_index.get(action.name, 0)
550
+ raw_actions[agent_id] = dtype_actions.type(action_index)
551
+
552
+ def _raw_obs_to_agent_obs(self, agent_id: int, raw_obs: np.ndarray) -> AgentObservation:
553
+ tokens: list[ObservationToken] = []
554
+ for token in raw_obs:
555
+ feature_id = int(token[1])
556
+ if feature_id == 0xFF:
557
+ break
558
+ feature = self._feature_by_id.get(feature_id)
559
+ if feature is None:
560
+ continue
561
+ location_packed = int(token[0])
562
+ value = int(token[2])
563
+ tokens.append(
564
+ ObservationToken(
565
+ feature=feature,
566
+ value=value,
567
+ raw_token=(location_packed, feature_id, value),
568
+ )
569
+ )
570
+ return AgentObservation(agent_id=agent_id, tokens=tokens)
@@ -0,0 +1,7 @@
1
+ """Services for Pinky policy."""
2
+
3
+ from .map_tracker import MapTracker
4
+ from .navigator import Navigator
5
+ from .safety import SafetyManager
6
+
7
+ __all__ = ["Navigator", "MapTracker", "SafetyManager"]