cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. cogames_agents/__init__.py +0 -0
  2. cogames_agents/evals/__init__.py +5 -0
  3. cogames_agents/evals/planky_evals.py +415 -0
  4. cogames_agents/policy/__init__.py +0 -0
  5. cogames_agents/policy/evolution/__init__.py +0 -0
  6. cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
  7. cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
  8. cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
  9. cogames_agents/policy/nim_agents/__init__.py +20 -0
  10. cogames_agents/policy/nim_agents/agents.py +98 -0
  11. cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
  12. cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
  13. cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
  14. cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
  15. cogames_agents/policy/nim_agents/common.nim +1054 -0
  16. cogames_agents/policy/nim_agents/install.sh +1 -0
  17. cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
  18. cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
  19. cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
  20. cogames_agents/policy/nim_agents/nimby.lock +3 -0
  21. cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
  22. cogames_agents/policy/nim_agents/random_agents.nim +68 -0
  23. cogames_agents/policy/nim_agents/test_agents.py +53 -0
  24. cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
  25. cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
  26. cogames_agents/policy/scripted_agent/README.md +360 -0
  27. cogames_agents/policy/scripted_agent/__init__.py +0 -0
  28. cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
  29. cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
  30. cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
  31. cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
  32. cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
  33. cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
  34. cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
  35. cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
  36. cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
  37. cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
  38. cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
  39. cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
  40. cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
  41. cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
  42. cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
  43. cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
  44. cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
  45. cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
  46. cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
  47. cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
  48. cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
  49. cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
  50. cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
  51. cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
  52. cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
  53. cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
  54. cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
  55. cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
  56. cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
  57. cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
  58. cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
  59. cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
  60. cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
  61. cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
  62. cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
  63. cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
  64. cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
  65. cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
  66. cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
  67. cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
  68. cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
  69. cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
  70. cogames_agents/policy/scripted_agent/common/roles.py +34 -0
  71. cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
  72. cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
  73. cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
  74. cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
  75. cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
  76. cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
  77. cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
  78. cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
  79. cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
  80. cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
  81. cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
  82. cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
  83. cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
  84. cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
  85. cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
  86. cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
  87. cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
  88. cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
  89. cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
  90. cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
  91. cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
  92. cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
  93. cogames_agents/policy/scripted_agent/planky/README.md +214 -0
  94. cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
  95. cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
  96. cogames_agents/policy/scripted_agent/planky/context.py +68 -0
  97. cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
  98. cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
  99. cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
  100. cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
  101. cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
  102. cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
  103. cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
  104. cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
  105. cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
  106. cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
  107. cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
  108. cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
  109. cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
  110. cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
  111. cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
  112. cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
  113. cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
  114. cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
  115. cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
  116. cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
  117. cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
  118. cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
  119. cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
  120. cogames_agents/policy/scripted_agent/types.py +239 -0
  121. cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
  122. cogames_agents/policy/scripted_agent/utils.py +381 -0
  123. cogames_agents/policy/scripted_registry.py +80 -0
  124. cogames_agents/py.typed +0 -0
  125. cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
  126. cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
  127. cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
  128. cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,242 @@
1
+ import random
2
+
3
+ from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
4
+ from mettagrid.simulator import Action
5
+ from mettagrid.simulator.interface import AgentObservation
6
+
7
+ from .common.geometry import manhattan
8
+ from .types import BaselineHyperparameters, CellType, SimpleAgentState
9
+ from .utils import (
10
+ change_vibe_action,
11
+ is_station,
12
+ is_wall,
13
+ parse_observation,
14
+ read_inventory_from_obs,
15
+ update_agent_position,
16
+ use_object_at,
17
+ )
18
+
19
+
20
+ class DemoPolicyImpl(StatefulPolicyImpl[SimpleAgentState]):
21
+ def __init__(self, policy_env_info, agent_id, hyperparams, *, heart_recipe=None):
22
+ self._agent_id = agent_id
23
+ self._hyperparams = hyperparams
24
+ self._policy_env_info = policy_env_info
25
+ self._action_names = policy_env_info.action_names
26
+ self._move_deltas = {"north": (-1, 0), "south": (1, 0), "east": (0, 1), "west": (0, -1)}
27
+
28
+ self._obs_hr = policy_env_info.obs_height // 2
29
+ self._obs_wr = policy_env_info.obs_width // 2
30
+
31
+ if heart_recipe:
32
+ self._heart_recipe = {k: v for k, v in heart_recipe.items() if k != "energy"}
33
+ else:
34
+ self._heart_recipe = None
35
+
36
+ def initial_agent_state(self):
37
+ center = 25
38
+ return SimpleAgentState(
39
+ agent_id=self._agent_id,
40
+ map_height=50,
41
+ map_width=50,
42
+ occupancy=[[CellType.FREE.value] * 50 for _ in range(50)],
43
+ row=center,
44
+ col=center,
45
+ heart_recipe=self._heart_recipe,
46
+ )
47
+
48
+ # ------------------------------------------------------------
49
+ # Utility helpers (kept tiny)
50
+ # ------------------------------------------------------------
51
+
52
+ def _adjacent(self, s, pos):
53
+ return manhattan((s.row, s.col), pos) == 1
54
+
55
+ def _random_step(self, s, parsed):
56
+ dirs = list(self._move_deltas.keys())
57
+ random.shuffle(dirs)
58
+ blocked = {
59
+ (r, c)
60
+ for (r, c), obj in parsed.nearby_objects.items()
61
+ if self._adjacent(s, (r, c))
62
+ and (
63
+ is_wall(obj.name)
64
+ or "extractor" in obj.name
65
+ or is_station(obj.name, "hub")
66
+ or is_station(obj.name, "chest")
67
+ or is_station(obj.name, "junction")
68
+ or (obj.name == "agent" and obj.agent_group != s.agent_id)
69
+ )
70
+ }
71
+ for d in dirs:
72
+ dr, dc = self._move_deltas[d]
73
+ nr, nc = s.row + dr, s.col + dc
74
+ if (nr, nc) not in blocked:
75
+ return Action(name=f"move_{d}")
76
+ return Action(name="noop")
77
+
78
+ def _step_towards(self, s, target, parsed):
79
+ """Single-step greedy pursuit, else random."""
80
+ r, c = s.row, s.col
81
+ tr, tc = target
82
+ cand = []
83
+ if abs(tr - r) >= abs(tc - c):
84
+ if tr < r:
85
+ cand.append("north")
86
+ elif tr > r:
87
+ cand.append("south")
88
+ if tc < c:
89
+ cand.append("west")
90
+ elif tc > c:
91
+ cand.append("east")
92
+ else:
93
+ if tc < c:
94
+ cand.append("west")
95
+ elif tc > c:
96
+ cand.append("east")
97
+ if tr < r:
98
+ cand.append("north")
99
+ elif tr > r:
100
+ cand.append("south")
101
+
102
+ blocked = {
103
+ (rr, cc)
104
+ for (rr, cc), obj in parsed.nearby_objects.items()
105
+ if self._adjacent(s, (rr, cc))
106
+ and (
107
+ is_wall(obj.name)
108
+ or "extractor" in obj.name
109
+ or is_station(obj.name, "hub")
110
+ or is_station(obj.name, "chest")
111
+ or is_station(obj.name, "junction")
112
+ or (obj.name == "agent" and obj.agent_group != s.agent_id)
113
+ )
114
+ }
115
+
116
+ for d in cand:
117
+ dr, dc = self._move_deltas[d]
118
+ nr, nc = r + dr, c + dc
119
+ if (nr, nc) not in blocked:
120
+ return Action(name=f"move_{d}")
121
+
122
+ return self._random_step(s, parsed)
123
+
124
+ def _closest(self, s, parsed, pred):
125
+ items = [pos for pos, obj in parsed.nearby_objects.items() if pred(obj)]
126
+ return min(items, key=lambda p: manhattan((s.row, s.col), p)) if items else None
127
+
128
+ def _rtype(self, name):
129
+ name = name.lower().replace("clipped_", "")
130
+ if "_extractor" not in name:
131
+ return None
132
+ name = name.replace("_extractor", "")
133
+ return name if name in ("carbon", "oxygen", "germanium", "silicon") else None
134
+
135
+ # ------------------------------------------------------------
136
+ # Main step
137
+ # ------------------------------------------------------------
138
+ def step_with_state(self, obs: AgentObservation, s: SimpleAgentState):
139
+ s.step_count += 1
140
+ read_inventory_from_obs(s, obs, obs_hr=self._obs_hr, obs_wr=self._obs_wr)
141
+ update_agent_position(s, move_deltas=self._move_deltas)
142
+
143
+ parsed = parse_observation(
144
+ s,
145
+ obs,
146
+ obs_hr=self._obs_hr,
147
+ obs_wr=self._obs_wr,
148
+ spatial_feature_names={"tag", "cooldown_remaining", "clipped", "remaining_uses"},
149
+ agent_feature_key_by_name={"agent:group": "agent_group", "agent:frozen": "agent_frozen"},
150
+ protocol_input_prefix="protocol_input:",
151
+ protocol_output_prefix="protocol_output:",
152
+ tag_names=self._policy_env_info.tag_id_to_name,
153
+ )
154
+
155
+ # Learn recipe if visible
156
+ if s.heart_recipe is None:
157
+ for _pos, obj in parsed.nearby_objects.items():
158
+ if obj.name == "hub" and obj.protocol_outputs.get("heart", 0) > 0:
159
+ s.heart_recipe = {k: v for k, v in obj.protocol_inputs.items() if k != "energy"}
160
+
161
+ # ---------------- PRE-PHASE: find hub to learn recipe ----------------
162
+ if s.heart_recipe is None:
163
+ if s.current_glyph != "heart_a":
164
+ s.current_glyph = "heart_a"
165
+ return change_vibe_action("heart_a", action_names=self._action_names), s
166
+
167
+ hub = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "hub"))
168
+ if hub:
169
+ if self._adjacent(s, hub):
170
+ return use_object_at(s, hub), s
171
+ return self._step_towards(s, hub, parsed), s
172
+
173
+ return self._random_step(s, parsed), s
174
+
175
+ # ---------------- MAIN PHASE ----------------
176
+
177
+ # Deliver hearts
178
+ if s.hearts > 0:
179
+ chest = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "chest"))
180
+ if chest:
181
+ if s.current_glyph != "default":
182
+ s.current_glyph = "default"
183
+ return change_vibe_action("default", action_names=self._action_names), s
184
+ if self._adjacent(s, chest):
185
+ return use_object_at(s, chest), s
186
+ return self._step_towards(s, chest, parsed), s
187
+
188
+ # Assemble
189
+ if (
190
+ s.carbon >= s.heart_recipe.get("carbon", 0)
191
+ and s.oxygen >= s.heart_recipe.get("oxygen", 0)
192
+ and s.germanium >= s.heart_recipe.get("germanium", 0)
193
+ and s.silicon >= s.heart_recipe.get("silicon", 0)
194
+ ):
195
+ hub = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "hub"))
196
+ if hub:
197
+ if s.current_glyph != "heart_a":
198
+ s.current_glyph = "heart_a"
199
+ return change_vibe_action("heart_a", action_names=self._action_names), s
200
+ if self._adjacent(s, hub):
201
+ return use_object_at(s, hub), s
202
+ return self._step_towards(s, hub, parsed), s
203
+
204
+ # Gather needed resources
205
+ deficits = {
206
+ r: s.heart_recipe.get(r, 0) - getattr(s, r, 0) for r in ("carbon", "oxygen", "germanium", "silicon")
207
+ }
208
+ needed = [
209
+ (pos, obj, self._rtype(obj.name.lower()))
210
+ for pos, obj in parsed.nearby_objects.items()
211
+ if "extractor" in obj.name.lower()
212
+ ]
213
+
214
+ needed = [(pos, obj, r) for pos, obj, r in needed if r and deficits[r] > 0]
215
+
216
+ if needed:
217
+ pos, obj, r = min(needed, key=lambda x: manhattan((s.row, s.col), x[0]))
218
+ if self._adjacent(s, pos):
219
+ return use_object_at(s, pos), s
220
+ return self._step_towards(s, pos, parsed), s
221
+
222
+ # Otherwise wander
223
+ return self._random_step(s, parsed), s
224
+
225
+
226
+ class DemoPolicy(MultiAgentPolicy):
227
+ short_names = ["tiny_baseline"]
228
+
229
+ def __init__(self, policy_env_info, device: str = "cpu", hyperparams=None, *, heart_recipe=None):
230
+ super().__init__(policy_env_info, device=device)
231
+ self._hyperparams = hyperparams or BaselineHyperparameters()
232
+ self._heart_recipe = heart_recipe
233
+ self._agent_policies = {}
234
+
235
+ def agent_policy(self, agent_id):
236
+ if agent_id not in self._agent_policies:
237
+ self._agent_policies[agent_id] = StatefulAgentPolicy(
238
+ DemoPolicyImpl(self._policy_env_info, agent_id, self._hyperparams, heart_recipe=self._heart_recipe),
239
+ self._policy_env_info,
240
+ agent_id=agent_id,
241
+ )
242
+ return self._agent_policies[agent_id]
@@ -0,0 +1,126 @@
1
+ """
2
+ Pathfinding utilities for scripted agents.
3
+
4
+ This module contains A* pathfinding implementation and related utilities
5
+ for navigating the grid world.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections import deque
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from cogames_agents.policy.scripted_agent.types import CellType, SimpleAgentState
15
+
16
+
17
+ def compute_goal_cells(
18
+ state: SimpleAgentState, target: tuple[int, int], reach_adjacent: bool, cell_type: type[CellType]
19
+ ) -> list[tuple[int, int]]:
20
+ """
21
+ Compute the set of goal cells for pathfinding.
22
+ """
23
+ if not reach_adjacent:
24
+ return [target]
25
+
26
+ goals: list[tuple[int, int]] = []
27
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
28
+ nr, nc = target[0] + dr, target[1] + dc
29
+ if is_traversable(state, nr, nc, cell_type):
30
+ goals.append((nr, nc))
31
+
32
+ # If no adjacent traversable tiles are known yet, allow exploring toward unknown ones
33
+ if not goals:
34
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
35
+ nr, nc = target[0] + dr, target[1] + dc
36
+ if is_within_bounds(state, nr, nc) and state.occupancy[nr][nc] != cell_type.OBSTACLE.value:
37
+ goals.append((nr, nc))
38
+ return goals
39
+
40
+
41
+ def shortest_path(
42
+ state: SimpleAgentState,
43
+ start: tuple[int, int],
44
+ goals: list[tuple[int, int]],
45
+ allow_goal_block: bool,
46
+ cell_type: type[CellType],
47
+ ) -> list[tuple[int, int]]:
48
+ """
49
+ Find shortest path from start to any goal using BFS.
50
+ """
51
+ goal_set = set(goals)
52
+ queue: deque[tuple[int, int]] = deque([start])
53
+ came_from: dict[tuple[int, int], tuple[int, int] | None] = {start: None}
54
+
55
+ def walkable(r: int, c: int) -> bool:
56
+ if (r, c) in goal_set and allow_goal_block:
57
+ return True
58
+ return is_traversable(state, r, c, cell_type)
59
+
60
+ while queue:
61
+ current = queue.popleft()
62
+ if current in goal_set:
63
+ return reconstruct_path(came_from, current)
64
+
65
+ for nr, nc in get_neighbors(state, current):
66
+ if (nr, nc) not in came_from and walkable(nr, nc):
67
+ came_from[(nr, nc)] = current
68
+ queue.append((nr, nc))
69
+
70
+ return []
71
+
72
+
73
+ def reconstruct_path(
74
+ came_from: dict[tuple[int, int], tuple[int, int] | None],
75
+ current: tuple[int, int],
76
+ ) -> list[tuple[int, int]]:
77
+ """
78
+ Reconstruct path from BFS came_from dict.
79
+ """
80
+ path: list[tuple[int, int]] = []
81
+ while came_from[current] is not None:
82
+ path.append(current)
83
+ prev = came_from[current]
84
+ assert prev is not None # Loop condition ensures this
85
+ current = prev
86
+ path.reverse()
87
+ return path
88
+
89
+
90
+ def get_neighbors(state: SimpleAgentState, pos: tuple[int, int]) -> list[tuple[int, int]]:
91
+ """
92
+ Get valid neighboring positions (4-connected grid).
93
+ """
94
+ r, c = pos
95
+ candidates = [(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)]
96
+ return [(nr, nc) for nr, nc in candidates if is_within_bounds(state, nr, nc)]
97
+
98
+
99
+ def is_within_bounds(state: SimpleAgentState, r: int, c: int) -> bool:
100
+ """
101
+ Check if position is within map bounds.
102
+ """
103
+ return 0 <= r < state.map_height and 0 <= c < state.map_width
104
+
105
+
106
+ def is_passable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
107
+ """
108
+ Check if a cell is passable (not an obstacle).
109
+ """
110
+ if not is_within_bounds(state, r, c):
111
+ return False
112
+ return is_traversable(state, r, c, cell_type)
113
+
114
+
115
+ def is_traversable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
116
+ """
117
+ Check if a cell is traversable (free and no agent there).
118
+ """
119
+ if not is_within_bounds(state, r, c):
120
+ return False
121
+ # Don't walk through other agents
122
+ if (r, c) in state.agent_occupancy:
123
+ return False
124
+ cell = state.occupancy[r][c]
125
+ # Only traverse cells we KNOW are free, not unknown cells
126
+ return cell == cell_type.FREE.value
@@ -0,0 +1,317 @@
1
+ # Pinky Policy Design
2
+
3
+ Pinky is a scripted multi-agent policy for CogsGuard. Each agent is assigned a role and executes behavior-tree style
4
+ decision making.
5
+
6
+ ## Current Implementation
7
+
8
+ ### Roles
9
+
10
+ Roles are assigned at spawn via URI parameters (e.g., `pinky?miner=2&scout=1`).
11
+
12
+ | Role | Risk Tolerance | Gear Bonus | Primary Action |
13
+ | ------------- | -------------- | ------------------ | -------------------------------------------- |
14
+ | **MINER** | Conservative | +40 cargo capacity | Harvest resources, deposit at cogs buildings |
15
+ | **SCOUT** | Aggressive | +400 HP | Explore map frontiers |
16
+ | **ALIGNER** | Moderate | +20 influence | Convert neutral junctions to cogs |
17
+ | **SCRAMBLER** | Aggressive | +200 HP | Neutralize enemy (clips) junctions |
18
+
19
+ ### Modes (Current)
20
+
21
+ Modes are set via `debug_info.mode` for debugging output. They describe what the agent is currently doing:
22
+
23
+ **Universal modes** (all roles):
24
+
25
+ - `retreat` - HP critical, returning to safe zone
26
+ - `get_gear` - Moving to/using role station to acquire gear
27
+ - `explore` - Random or directed exploration
28
+
29
+ **Miner-specific:**
30
+
31
+ - `mine` - Moving toward extractors
32
+ - `deposit` - Returning cargo to cogs depot
33
+
34
+ **Scout-specific:**
35
+
36
+ - `explore` - Frontier-based exploration (BFS to unexplored cells)
37
+
38
+ **Aligner-specific:**
39
+
40
+ - `get_hearts` - Acquiring hearts from chest
41
+ - `align` - Converting neutral junction to cogs
42
+
43
+ **Scrambler-specific:**
44
+
45
+ - `get_hearts` - Acquiring hearts from chest
46
+ - `scramble` - Neutralizing enemy junction
47
+
48
+ **Policy-level:**
49
+
50
+ - `activate` - Changing vibe to assigned role (step 1)
51
+ - `inactive` - Agent has non-role vibe, nooping
52
+
53
+ ### State Structure (Current)
54
+
55
+ ```python
56
+ @dataclass
57
+ class AgentState:
58
+ agent_id: int
59
+ role: Role # MINER, SCOUT, ALIGNER, SCRAMBLER
60
+ vibe: str # Current vibe from observation
61
+ step: int # Step counter
62
+
63
+ # Position
64
+ row: int
65
+ col: int
66
+
67
+ # Inventory
68
+ energy: int
69
+ hp: int
70
+ carbon: int, oxygen: int, germanium: int, silicon: int
71
+ heart: int
72
+ influence: int
73
+
74
+ # Gear flags
75
+ miner_gear: bool
76
+ scout_gear: bool
77
+ aligner_gear: bool
78
+ scrambler_gear: bool
79
+
80
+ # Knowledge
81
+ map: MapKnowledge # Occupancy grid, structures, stations
82
+ nav: NavigationState # Path cache, exploration direction
83
+
84
+ # Debug
85
+ debug_info: DebugInfo # mode, goal, target_object, target_pos
86
+ ```
87
+
88
+ ### Decision Flow (Current)
89
+
90
+ Each behavior follows a priority-based decision tree:
91
+
92
+ ```
93
+ MinerBehavior.act():
94
+ 1. HP <= 15? → retreat
95
+ 2. No miner_gear? → get_gear (or explore for station)
96
+ 3. Cargo full? → deposit
97
+ 4. Otherwise → mine (move toward extractors)
98
+
99
+ ScoutBehavior.act():
100
+ 1. HP < 50? → retreat
101
+ 2. No scout_gear? → get_gear
102
+ 3. Otherwise → explore_frontier
103
+
104
+ AlignerBehavior.act():
105
+ 1. Should retreat? → retreat
106
+ 2. No aligner_gear? → get_gear
107
+ 3. No hearts? → get_hearts
108
+ 4. Otherwise → align_junction
109
+
110
+ ScramblerBehavior.act():
111
+ 1. HP < 30? → retreat
112
+ 2. No scrambler_gear? → get_gear
113
+ 3. No hearts? → get_hearts
114
+ 4. Otherwise → scramble_junction
115
+ ```
116
+
117
+ ### Limitations of Current Design
118
+
119
+ 1. **No explicit state machine** - Mode transitions are implicit in if/else priority chains
120
+ 2. **No goal/destination tracking** - Each step re-evaluates from scratch
121
+ 3. **No role selection** - Roles are fixed at spawn, agents can't adapt
122
+ 4. **No mood/urgency** - All decisions binary (do/don't)
123
+ 5. **Debug-only modes** - Modes exist for logging, not for control flow
124
+
125
+ ---
126
+
127
+ ## Proposed Design
128
+
129
+ ### Roles (Expanded)
130
+
131
+ | Role | Description |
132
+ | --------------- | ---------------------------------------- |
133
+ | `resting` | Inactive, waiting for assignment |
134
+ | `choosing_role` | Evaluating team composition to pick role |
135
+ | `miner` | Resource gathering specialist |
136
+ | `scout` | Map exploration specialist |
137
+ | `aligner` | Territory expansion specialist |
138
+ | `scrambler` | Enemy territory disruption specialist |
139
+
140
+ ### Modes (Explicit State Machine)
141
+
142
+ Modes should be **first-class state** that drives behavior, not just debug labels.
143
+
144
+ **Universal Modes** (available to all roles):
145
+
146
+ | Mode | Description | Exit Condition |
147
+ | ---------- | ------------------------- | --------------------------- |
148
+ | `idle` | No current task | Goal assigned |
149
+ | `get_gear` | Acquiring role equipment | Gear obtained |
150
+ | `retreat` | Returning to safety | HP restored above threshold |
151
+ | `explore` | Searching for something | Target found |
152
+ | `move_to` | Navigating to destination | Arrived at destination |
153
+
154
+ **Miner Modes:**
155
+
156
+ | Mode | Description | Exit Condition |
157
+ | --------- | -------------------- | -------------------------------- |
158
+ | `harvest` | Extracting resources | Cargo full or extractor depleted |
159
+ | `deposit` | Delivering cargo | Cargo empty |
160
+
161
+ **Scout Modes:**
162
+
163
+ | Mode | Description | Exit Condition |
164
+ | ------------------ | ----------------------- | ------------------ |
165
+ | `frontier_explore` | BFS to unexplored areas | Map fully explored |
166
+ | `report` | Returning with intel | At cogs building |
167
+
168
+ **Aligner Modes:**
169
+
170
+ | Mode | Description | Exit Condition |
171
+ | ---------------- | --------------------------- | ------------------ |
172
+ | `acquire_hearts` | Getting hearts from chest | Have hearts |
173
+ | `align_junction` | Converting neutral junction | Junction converted |
174
+
175
+ **Scrambler Modes:**
176
+
177
+ | Mode | Description | Exit Condition |
178
+ | ---------------- | --------------------------- | -------------------- |
179
+ | `acquire_hearts` | Getting hearts from chest | Have hearts |
180
+ | `raid_junction` | Neutralizing enemy junction | Junction neutralized |
181
+
182
+ ### Goals and Destinations
183
+
184
+ Explicit goal tracking separates **intent** from **execution**:
185
+
186
+ ```python
187
+ @dataclass
188
+ class AgentGoal:
189
+ """What the agent is trying to achieve."""
190
+
191
+ # High-level intent
192
+ goal: str # "get_gear", "harvest_carbon", "deposit_cargo"
193
+
194
+ # Target
195
+ destination: Optional[str] # "miner_station", "carbon_extractor", "hub"
196
+ destination_pos: Optional[tuple[int, int]]
197
+
198
+ # Progress
199
+ started_at_step: int
200
+ timeout_steps: int = 100 # Give up and re-evaluate
201
+
202
+ # Completion
203
+ success_condition: str # "has_miner_gear", "cargo_full", "cargo_empty"
204
+ ```
205
+
206
+ ### Mood / Urgency
207
+
208
+ Mood modifies behavior parameters:
209
+
210
+ | Mood | Trigger | Effect |
211
+ | ----------- | ----------------------- | ---------------------------------------- |
212
+ | `calm` | HP > 80%, safe zone | Normal risk tolerance |
213
+ | `cautious` | HP 50-80% or near enemy | Reduced exploration range |
214
+ | `urgent` | HP 20-50% | Prioritize retreat paths |
215
+ | `desperate` | HP < 20% | Shortest path to safety, ignore all else |
216
+
217
+ ### Proposed State Structure
218
+
219
+ ```python
220
+ @dataclass
221
+ class AgentState:
222
+ agent_id: int
223
+
224
+ # Identity
225
+ role: Role # resting, choosing_role, miner, scout, aligner, scrambler
226
+
227
+ # Behavioral state machine
228
+ mode: Mode # Current mode (idle, get_gear, retreat, harvest, etc.)
229
+ mood: Mood # calm, cautious, urgent, desperate
230
+
231
+ # Current goal
232
+ goal: Optional[AgentGoal] # What we're trying to achieve
233
+
234
+ # ... rest of inventory, map, nav state ...
235
+ ```
236
+
237
+ ### State Transition Diagram
238
+
239
+ ```
240
+ ┌─────────────────────────────────────────┐
241
+ │ │
242
+ ▼ │
243
+ ┌──────────┐ │
244
+ spawn───►│ resting │ │
245
+ └────┬─────┘ │
246
+ │ team needs role │
247
+ ▼ │
248
+ ┌──────────────┐ │
249
+ │choosing_role │ │
250
+ └──────┬───────┘ │
251
+ │ role selected │
252
+ ┌──────────┼──────────┬──────────┐ │
253
+ ▼ ▼ ▼ ▼ │
254
+ ┌───────┐ ┌───────┐ ┌─────────┐ ┌───────────┐ │
255
+ │ miner │ │ scout │ │ aligner │ │ scrambler │ │
256
+ └───┬───┘ └───┬───┘ └────┬────┘ └─────┬─────┘ │
257
+ │ │ │ │ │
258
+ └─────────┴──────────┴────────────┘ │
259
+ │ │
260
+ │ HP critical or role no longer needed
261
+ │ │
262
+ └───────────────────────────────────┘
263
+ ```
264
+
265
+ ### Role-Specific Mode Transitions
266
+
267
+ **Miner:**
268
+
269
+ ```
270
+ idle ──► get_gear ──► explore ──► harvest ──► deposit ──► harvest
271
+ │ │ │ │
272
+ └──────────────┴───────────┴───────────┴──► retreat ──► idle
273
+ (HP critical)
274
+ ```
275
+
276
+ **Aligner:**
277
+
278
+ ```
279
+ idle ──► get_gear ──► acquire_hearts ──► align_junction ──► acquire_hearts
280
+ │ │ │
281
+ └────────────────┴──────────────────┴──► retreat ──► idle
282
+ ```
283
+
284
+ ### Transition Triggers
285
+
286
+ | From | To | Trigger |
287
+ | ------------------ | ------------------ | ------------------------- |
288
+ | `idle` | `get_gear` | Role assigned, no gear |
289
+ | `get_gear` | role mode | Gear acquired |
290
+ | any | `retreat` | HP < threshold for mood |
291
+ | `retreat` | `idle` | HP restored, in safe zone |
292
+ | `harvest` | `deposit` | Cargo full |
293
+ | `deposit` | `harvest` | Cargo empty |
294
+ | `acquire_hearts` | `align`/`scramble` | Have hearts |
295
+ | `align`/`scramble` | `acquire_hearts` | Hearts depleted |
296
+ | any | `idle` | Goal timeout reached |
297
+
298
+ ---
299
+
300
+ ## Migration Path
301
+
302
+ 1. **Add `Mode` enum** with all modes (keep current logic)
303
+ 2. **Add `goal` field** to AgentState
304
+ 3. **Refactor behaviors** to set mode/goal explicitly
305
+ 4. **Add mood system** for risk tolerance modulation
306
+ 5. **Add role selection** for `choosing_role` state
307
+ 6. **Add `resting`** state for unassigned agents
308
+
309
+ ---
310
+
311
+ ## Debug Output Format
312
+
313
+ Current: `role:mode:goal:target:action`
314
+
315
+ Proposed: `role:mode:mood:goal→dest:action`
316
+
317
+ Example: `miner:harvest:calm:get_carbon→carbon_extractor(5,12):move_east`
@@ -0,0 +1,5 @@
1
+ """Pinky policy for CogsGuard game."""
2
+
3
+ from .policy import PinkyPolicy
4
+
5
+ __all__ = ["PinkyPolicy"]