cogames 0.3.49__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. cogames/cli/client.py +60 -6
  2. cogames/cli/docsync/__init__.py +0 -0
  3. cogames/cli/docsync/_nb_md_directive_processing.py +180 -0
  4. cogames/cli/docsync/_nb_md_sync.py +103 -0
  5. cogames/cli/docsync/_nb_py_sync.py +122 -0
  6. cogames/cli/docsync/_three_way_sync.py +115 -0
  7. cogames/cli/docsync/_utils.py +76 -0
  8. cogames/cli/docsync/docsync.py +156 -0
  9. cogames/cli/leaderboard.py +112 -28
  10. cogames/cli/mission.py +64 -53
  11. cogames/cli/policy.py +46 -10
  12. cogames/cli/submit.py +268 -67
  13. cogames/cogs_vs_clips/cog.py +79 -0
  14. cogames/cogs_vs_clips/cogs_vs_clips_mapgen.md +19 -16
  15. cogames/cogs_vs_clips/cogsguard_reward_variants.py +153 -0
  16. cogames/cogs_vs_clips/cogsguard_tutorial.py +56 -0
  17. cogames/cogs_vs_clips/evals/README.md +10 -16
  18. cogames/cogs_vs_clips/evals/cogsguard_evals.py +81 -0
  19. cogames/cogs_vs_clips/evals/diagnostic_evals.py +49 -444
  20. cogames/cogs_vs_clips/evals/difficulty_variants.py +13 -326
  21. cogames/cogs_vs_clips/evals/integrated_evals.py +5 -45
  22. cogames/cogs_vs_clips/evals/spanning_evals.py +9 -180
  23. cogames/cogs_vs_clips/mission.py +187 -146
  24. cogames/cogs_vs_clips/missions.py +46 -137
  25. cogames/cogs_vs_clips/procedural.py +8 -8
  26. cogames/cogs_vs_clips/sites.py +107 -3
  27. cogames/cogs_vs_clips/stations.py +198 -186
  28. cogames/cogs_vs_clips/tutorial_missions.py +1 -1
  29. cogames/cogs_vs_clips/variants.py +25 -476
  30. cogames/device.py +13 -1
  31. cogames/{policy/scripted_agent/README.md → docs/SCRIPTED_AGENT.md} +82 -58
  32. cogames/evaluate.py +18 -30
  33. cogames/main.py +1434 -243
  34. cogames/maps/canidate1_1000.map +1 -1
  35. cogames/maps/canidate1_1000_stations.map +2 -2
  36. cogames/maps/canidate1_500.map +1 -1
  37. cogames/maps/canidate1_500_stations.map +2 -2
  38. cogames/maps/canidate2_1000.map +1 -1
  39. cogames/maps/canidate2_1000_stations.map +2 -2
  40. cogames/maps/canidate2_500.map +1 -1
  41. cogames/maps/canidate2_500_stations.map +2 -2
  42. cogames/maps/canidate3_1000.map +1 -1
  43. cogames/maps/canidate3_1000_stations.map +2 -2
  44. cogames/maps/canidate3_500.map +1 -1
  45. cogames/maps/canidate3_500_stations.map +2 -2
  46. cogames/maps/canidate4_500.map +1 -1
  47. cogames/maps/canidate4_500_stations.map +2 -2
  48. cogames/maps/cave_base_50.map +2 -2
  49. cogames/maps/diagnostic_evals/diagnostic_agile.map +2 -2
  50. cogames/maps/diagnostic_evals/diagnostic_agile_hard.map +2 -2
  51. cogames/maps/diagnostic_evals/diagnostic_charge_up.map +2 -2
  52. cogames/maps/diagnostic_evals/diagnostic_charge_up_hard.map +2 -2
  53. cogames/maps/diagnostic_evals/diagnostic_chest_navigation1.map +2 -2
  54. cogames/maps/diagnostic_evals/diagnostic_chest_navigation1_hard.map +2 -2
  55. cogames/maps/diagnostic_evals/diagnostic_chest_navigation2.map +2 -2
  56. cogames/maps/diagnostic_evals/diagnostic_chest_navigation2_hard.map +2 -2
  57. cogames/maps/diagnostic_evals/diagnostic_chest_navigation3.map +2 -2
  58. cogames/maps/diagnostic_evals/diagnostic_chest_navigation3_hard.map +2 -2
  59. cogames/maps/diagnostic_evals/diagnostic_chest_near.map +2 -2
  60. cogames/maps/diagnostic_evals/diagnostic_chest_search.map +2 -2
  61. cogames/maps/diagnostic_evals/diagnostic_chest_search_hard.map +2 -2
  62. cogames/maps/diagnostic_evals/diagnostic_extract_lab.map +2 -2
  63. cogames/maps/diagnostic_evals/diagnostic_extract_lab_hard.map +2 -2
  64. cogames/maps/diagnostic_evals/diagnostic_memory.map +2 -2
  65. cogames/maps/diagnostic_evals/diagnostic_memory_hard.map +2 -2
  66. cogames/maps/diagnostic_evals/diagnostic_radial.map +2 -2
  67. cogames/maps/diagnostic_evals/diagnostic_radial_hard.map +2 -2
  68. cogames/maps/diagnostic_evals/diagnostic_resource_lab.map +2 -2
  69. cogames/maps/diagnostic_evals/diagnostic_unclip.map +2 -2
  70. cogames/maps/evals/eval_balanced_spread.map +9 -5
  71. cogames/maps/evals/eval_clip_oxygen.map +9 -5
  72. cogames/maps/evals/eval_collect_resources.map +9 -5
  73. cogames/maps/evals/eval_collect_resources_hard.map +9 -5
  74. cogames/maps/evals/eval_collect_resources_medium.map +9 -5
  75. cogames/maps/evals/eval_divide_and_conquer.map +9 -5
  76. cogames/maps/evals/eval_energy_starved.map +9 -5
  77. cogames/maps/evals/eval_multi_coordinated_collect_hard.map +9 -5
  78. cogames/maps/evals/eval_oxygen_bottleneck.map +9 -5
  79. cogames/maps/evals/eval_single_use_world.map +9 -5
  80. cogames/maps/evals/extractor_hub_100x100.map +9 -5
  81. cogames/maps/evals/extractor_hub_30x30.map +9 -5
  82. cogames/maps/evals/extractor_hub_50x50.map +9 -5
  83. cogames/maps/evals/extractor_hub_70x70.map +9 -5
  84. cogames/maps/evals/extractor_hub_80x80.map +9 -5
  85. cogames/maps/machina_100_stations.map +2 -2
  86. cogames/maps/machina_200_stations.map +2 -2
  87. cogames/maps/machina_200_stations_small.map +2 -2
  88. cogames/maps/machina_eval_exp01.map +2 -2
  89. cogames/maps/machina_eval_template_large.map +2 -2
  90. cogames/maps/machinatrainer4agents.map +2 -2
  91. cogames/maps/machinatrainer4agentsbase.map +2 -2
  92. cogames/maps/machinatrainerbig.map +2 -2
  93. cogames/maps/machinatrainersmall.map +2 -2
  94. cogames/maps/planky_evals/aligner_avoid_aoe.map +28 -0
  95. cogames/maps/planky_evals/aligner_full_cycle.map +28 -0
  96. cogames/maps/planky_evals/aligner_gear.map +24 -0
  97. cogames/maps/planky_evals/aligner_hearts.map +24 -0
  98. cogames/maps/planky_evals/aligner_junction.map +26 -0
  99. cogames/maps/planky_evals/exploration_distant.map +28 -0
  100. cogames/maps/planky_evals/maze.map +32 -0
  101. cogames/maps/planky_evals/miner_best_resource.map +26 -0
  102. cogames/maps/planky_evals/miner_deposit.map +24 -0
  103. cogames/maps/planky_evals/miner_extract.map +26 -0
  104. cogames/maps/planky_evals/miner_full_cycle.map +28 -0
  105. cogames/maps/planky_evals/miner_gear.map +24 -0
  106. cogames/maps/planky_evals/multi_role.map +28 -0
  107. cogames/maps/planky_evals/resource_chain.map +30 -0
  108. cogames/maps/planky_evals/scout_explore.map +32 -0
  109. cogames/maps/planky_evals/scout_gear.map +24 -0
  110. cogames/maps/planky_evals/scrambler_full_cycle.map +28 -0
  111. cogames/maps/planky_evals/scrambler_gear.map +24 -0
  112. cogames/maps/planky_evals/scrambler_target.map +26 -0
  113. cogames/maps/planky_evals/stuck_corridor.map +32 -0
  114. cogames/maps/planky_evals/survive_retreat.map +26 -0
  115. cogames/maps/training_facility_clipped.map +2 -2
  116. cogames/maps/training_facility_open_1.map +2 -2
  117. cogames/maps/training_facility_open_2.map +2 -2
  118. cogames/maps/training_facility_open_3.map +2 -2
  119. cogames/maps/training_facility_tight_4.map +2 -2
  120. cogames/maps/training_facility_tight_5.map +2 -2
  121. cogames/maps/vanilla_large.map +2 -2
  122. cogames/maps/vanilla_small.map +2 -2
  123. cogames/pickup.py +183 -0
  124. cogames/play.py +166 -33
  125. cogames/policy/chaos_monkey.py +54 -0
  126. cogames/policy/nim_agents/__init__.py +27 -10
  127. cogames/policy/nim_agents/agents.py +121 -60
  128. cogames/policy/nim_agents/thinky_eval.py +35 -222
  129. cogames/policy/pufferlib_policy.py +67 -32
  130. cogames/policy/starter_agent.py +184 -0
  131. cogames/policy/trainable_policy_template.py +4 -1
  132. cogames/train.py +51 -13
  133. cogames/verbose.py +2 -2
  134. cogames-0.3.64.dist-info/METADATA +1842 -0
  135. cogames-0.3.64.dist-info/RECORD +159 -0
  136. cogames-0.3.64.dist-info/licenses/LICENSE +21 -0
  137. cogames-0.3.64.dist-info/top_level.txt +2 -0
  138. metta_alo/__init__.py +0 -0
  139. metta_alo/job_specs.py +17 -0
  140. metta_alo/policy.py +16 -0
  141. metta_alo/pure_single_episode_runner.py +75 -0
  142. metta_alo/py.typed +0 -0
  143. metta_alo/rollout.py +322 -0
  144. metta_alo/scoring.py +168 -0
  145. cogames/maps/diagnostic_evals/diagnostic_assembler_near.map +0 -49
  146. cogames/maps/diagnostic_evals/diagnostic_assembler_search.map +0 -49
  147. cogames/maps/diagnostic_evals/diagnostic_assembler_search_hard.map +0 -89
  148. cogames/policy/nim_agents/common.nim +0 -887
  149. cogames/policy/nim_agents/install.sh +0 -1
  150. cogames/policy/nim_agents/ladybug_agent.nim +0 -984
  151. cogames/policy/nim_agents/nim_agents.nim +0 -55
  152. cogames/policy/nim_agents/nim_agents.nims +0 -14
  153. cogames/policy/nim_agents/nimby.lock +0 -3
  154. cogames/policy/nim_agents/racecar_agents.nim +0 -884
  155. cogames/policy/nim_agents/random_agents.nim +0 -68
  156. cogames/policy/nim_agents/test_agents.py +0 -53
  157. cogames/policy/nim_agents/thinky_agents.nim +0 -717
  158. cogames/policy/scripted_agent/baseline_agent.py +0 -1049
  159. cogames/policy/scripted_agent/demo_policy.py +0 -244
  160. cogames/policy/scripted_agent/pathfinding.py +0 -126
  161. cogames/policy/scripted_agent/starter_agent.py +0 -136
  162. cogames/policy/scripted_agent/types.py +0 -235
  163. cogames/policy/scripted_agent/unclipping_agent.py +0 -476
  164. cogames/policy/scripted_agent/utils.py +0 -385
  165. cogames-0.3.49.dist-info/METADATA +0 -406
  166. cogames-0.3.49.dist-info/RECORD +0 -136
  167. cogames-0.3.49.dist-info/top_level.txt +0 -1
  168. {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/WHEEL +0 -0
  169. {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/entry_points.txt +0 -0
@@ -1,244 +0,0 @@
1
- import random
2
-
3
- from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
4
- from mettagrid.simulator.interface import AgentObservation
5
-
6
- from .types import BaselineHyperparameters, CellType, SimpleAgentState
7
- from .utils import (
8
- change_vibe_action,
9
- is_station,
10
- is_wall,
11
- parse_observation,
12
- read_inventory_from_obs,
13
- update_agent_position,
14
- use_object_at,
15
- )
16
-
17
-
18
- def manhattan(a, b):
19
- return abs(a[0] - b[0]) + abs(a[1] - b[1])
20
-
21
-
22
- class DemoPolicyImpl(StatefulPolicyImpl[SimpleAgentState]):
23
- def __init__(self, policy_env_info, agent_id, hyperparams, *, heart_recipe=None):
24
- self._agent_id = agent_id
25
- self._hyperparams = hyperparams
26
- self._policy_env_info = policy_env_info
27
- self._actions = policy_env_info.actions
28
- self._move_deltas = {"north": (-1, 0), "south": (1, 0), "east": (0, 1), "west": (0, -1)}
29
-
30
- self._obs_hr = policy_env_info.obs_height // 2
31
- self._obs_wr = policy_env_info.obs_width // 2
32
-
33
- if heart_recipe:
34
- self._heart_recipe = {k: v for k, v in heart_recipe.items() if k != "energy"}
35
- else:
36
- self._heart_recipe = None
37
-
38
- def initial_agent_state(self):
39
- center = 25
40
- return SimpleAgentState(
41
- agent_id=self._agent_id,
42
- map_height=50,
43
- map_width=50,
44
- occupancy=[[CellType.FREE.value] * 50 for _ in range(50)],
45
- row=center,
46
- col=center,
47
- heart_recipe=self._heart_recipe,
48
- )
49
-
50
- # ------------------------------------------------------------
51
- # Utility helpers (kept tiny)
52
- # ------------------------------------------------------------
53
-
54
- def _adjacent(self, s, pos):
55
- return manhattan((s.row, s.col), pos) == 1
56
-
57
- def _random_step(self, s, parsed):
58
- dirs = list(self._move_deltas.keys())
59
- random.shuffle(dirs)
60
- blocked = {
61
- (r, c)
62
- for (r, c), obj in parsed.nearby_objects.items()
63
- if self._adjacent(s, (r, c))
64
- and (
65
- is_wall(obj.name)
66
- or "extractor" in obj.name
67
- or is_station(obj.name, "assembler")
68
- or is_station(obj.name, "chest")
69
- or is_station(obj.name, "charger")
70
- or (obj.name == "agent" and obj.agent_group != s.agent_id)
71
- )
72
- }
73
- for d in dirs:
74
- dr, dc = self._move_deltas[d]
75
- nr, nc = s.row + dr, s.col + dc
76
- if (nr, nc) not in blocked:
77
- return self._actions.move.Move(d)
78
- return self._actions.noop.Noop()
79
-
80
- def _step_towards(self, s, target, parsed):
81
- """Single-step greedy pursuit, else random."""
82
- r, c = s.row, s.col
83
- tr, tc = target
84
- cand = []
85
- if abs(tr - r) >= abs(tc - c):
86
- if tr < r:
87
- cand.append("north")
88
- elif tr > r:
89
- cand.append("south")
90
- if tc < c:
91
- cand.append("west")
92
- elif tc > c:
93
- cand.append("east")
94
- else:
95
- if tc < c:
96
- cand.append("west")
97
- elif tc > c:
98
- cand.append("east")
99
- if tr < r:
100
- cand.append("north")
101
- elif tr > r:
102
- cand.append("south")
103
-
104
- blocked = {
105
- (rr, cc)
106
- for (rr, cc), obj in parsed.nearby_objects.items()
107
- if self._adjacent(s, (rr, cc))
108
- and (
109
- is_wall(obj.name)
110
- or "extractor" in obj.name
111
- or is_station(obj.name, "assembler")
112
- or is_station(obj.name, "chest")
113
- or is_station(obj.name, "charger")
114
- or (obj.name == "agent" and obj.agent_group != s.agent_id)
115
- )
116
- }
117
-
118
- for d in cand:
119
- dr, dc = self._move_deltas[d]
120
- nr, nc = r + dr, c + dc
121
- if (nr, nc) not in blocked:
122
- return self._actions.move.Move(d)
123
-
124
- return self._random_step(s, parsed)
125
-
126
- def _closest(self, s, parsed, pred):
127
- items = [pos for pos, obj in parsed.nearby_objects.items() if pred(obj)]
128
- return min(items, key=lambda p: manhattan((s.row, s.col), p)) if items else None
129
-
130
- def _rtype(self, name):
131
- name = name.lower().replace("clipped_", "")
132
- if "_extractor" not in name:
133
- return None
134
- name = name.replace("_extractor", "")
135
- return name if name in ("carbon", "oxygen", "germanium", "silicon") else None
136
-
137
- # ------------------------------------------------------------
138
- # Main step
139
- # ------------------------------------------------------------
140
- def step_with_state(self, obs: AgentObservation, s: SimpleAgentState):
141
- s.step_count += 1
142
- read_inventory_from_obs(s, obs, obs_hr=self._obs_hr, obs_wr=self._obs_wr)
143
- update_agent_position(s, move_deltas=self._move_deltas)
144
-
145
- parsed = parse_observation(
146
- s,
147
- obs,
148
- obs_hr=self._obs_hr,
149
- obs_wr=self._obs_wr,
150
- spatial_feature_names={"tag", "cooldown_remaining", "clipped", "remaining_uses"},
151
- agent_feature_key_by_name={"agent:group": "agent_group", "agent:frozen": "agent_frozen"},
152
- protocol_input_prefix="protocol_input:",
153
- protocol_output_prefix="protocol_output:",
154
- tag_names=self._policy_env_info.tag_id_to_name,
155
- )
156
-
157
- # Learn recipe if visible
158
- if s.heart_recipe is None:
159
- for _pos, obj in parsed.nearby_objects.items():
160
- if obj.name == "assembler" and obj.protocol_outputs.get("heart", 0) > 0:
161
- s.heart_recipe = {k: v for k, v in obj.protocol_inputs.items() if k != "energy"}
162
-
163
- # ---------------- PRE-PHASE: find assembler to learn recipe ----------------
164
- if s.heart_recipe is None:
165
- if s.current_glyph != "heart_a":
166
- s.current_glyph = "heart_a"
167
- return change_vibe_action("heart_a", actions=self._actions), s
168
-
169
- assembler = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "assembler"))
170
- if assembler:
171
- if self._adjacent(s, assembler):
172
- return use_object_at(s, assembler, actions=self._actions, move_deltas=self._move_deltas), s
173
- return self._step_towards(s, assembler, parsed), s
174
-
175
- return self._random_step(s, parsed), s
176
-
177
- # ---------------- MAIN PHASE ----------------
178
-
179
- # Deliver hearts
180
- if s.hearts > 0:
181
- chest = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "chest"))
182
- if chest:
183
- if s.current_glyph != "default":
184
- s.current_glyph = "default"
185
- return change_vibe_action("default", actions=self._actions), s
186
- if self._adjacent(s, chest):
187
- return use_object_at(s, chest, actions=self._actions, move_deltas=self._move_deltas), s
188
- return self._step_towards(s, chest, parsed), s
189
-
190
- # Assemble
191
- if (
192
- s.carbon >= s.heart_recipe.get("carbon", 0)
193
- and s.oxygen >= s.heart_recipe.get("oxygen", 0)
194
- and s.germanium >= s.heart_recipe.get("germanium", 0)
195
- and s.silicon >= s.heart_recipe.get("silicon", 0)
196
- ):
197
- assembler = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "assembler"))
198
- if assembler:
199
- if s.current_glyph != "heart_a":
200
- s.current_glyph = "heart_a"
201
- return change_vibe_action("heart_a", actions=self._actions), s
202
- if self._adjacent(s, assembler):
203
- return use_object_at(s, assembler, actions=self._actions, move_deltas=self._move_deltas), s
204
- return self._step_towards(s, assembler, parsed), s
205
-
206
- # Gather needed resources
207
- deficits = {
208
- r: s.heart_recipe.get(r, 0) - getattr(s, r, 0) for r in ("carbon", "oxygen", "germanium", "silicon")
209
- }
210
- needed = [
211
- (pos, obj, self._rtype(obj.name.lower()))
212
- for pos, obj in parsed.nearby_objects.items()
213
- if "extractor" in obj.name.lower()
214
- ]
215
-
216
- needed = [(pos, obj, r) for pos, obj, r in needed if r and deficits[r] > 0]
217
-
218
- if needed:
219
- pos, obj, r = min(needed, key=lambda x: manhattan((s.row, s.col), x[0]))
220
- if self._adjacent(s, pos):
221
- return use_object_at(s, pos, actions=self._actions, move_deltas=self._move_deltas), s
222
- return self._step_towards(s, pos, parsed), s
223
-
224
- # Otherwise wander
225
- return self._random_step(s, parsed), s
226
-
227
-
228
- class DemoPolicy(MultiAgentPolicy):
229
- short_names = ["tiny_baseline"]
230
-
231
- def __init__(self, policy_env_info, device: str = "cpu", hyperparams=None, *, heart_recipe=None):
232
- super().__init__(policy_env_info, device=device)
233
- self._hyperparams = hyperparams or BaselineHyperparameters()
234
- self._heart_recipe = heart_recipe
235
- self._agent_policies = {}
236
-
237
- def agent_policy(self, agent_id):
238
- if agent_id not in self._agent_policies:
239
- self._agent_policies[agent_id] = StatefulAgentPolicy(
240
- DemoPolicyImpl(self._policy_env_info, agent_id, self._hyperparams, heart_recipe=self._heart_recipe),
241
- self._policy_env_info,
242
- agent_id=agent_id,
243
- )
244
- return self._agent_policies[agent_id]
@@ -1,126 +0,0 @@
1
- """
2
- Pathfinding utilities for scripted agents.
3
-
4
- This module contains A* pathfinding implementation and related utilities
5
- for navigating the grid world.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from collections import deque
11
- from typing import TYPE_CHECKING
12
-
13
- if TYPE_CHECKING:
14
- from cogames.policy.scripted_agent.types import CellType, SimpleAgentState
15
-
16
-
17
- def compute_goal_cells(
18
- state: SimpleAgentState, target: tuple[int, int], reach_adjacent: bool, cell_type: type[CellType]
19
- ) -> list[tuple[int, int]]:
20
- """
21
- Compute the set of goal cells for pathfinding.
22
- """
23
- if not reach_adjacent:
24
- return [target]
25
-
26
- goals: list[tuple[int, int]] = []
27
- for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
28
- nr, nc = target[0] + dr, target[1] + dc
29
- if is_traversable(state, nr, nc, cell_type):
30
- goals.append((nr, nc))
31
-
32
- # If no adjacent traversable tiles are known yet, allow exploring toward unknown ones
33
- if not goals:
34
- for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
35
- nr, nc = target[0] + dr, target[1] + dc
36
- if is_within_bounds(state, nr, nc) and state.occupancy[nr][nc] != cell_type.OBSTACLE.value:
37
- goals.append((nr, nc))
38
- return goals
39
-
40
-
41
- def shortest_path(
42
- state: SimpleAgentState,
43
- start: tuple[int, int],
44
- goals: list[tuple[int, int]],
45
- allow_goal_block: bool,
46
- cell_type: type[CellType],
47
- ) -> list[tuple[int, int]]:
48
- """
49
- Find shortest path from start to any goal using BFS.
50
- """
51
- goal_set = set(goals)
52
- queue: deque[tuple[int, int]] = deque([start])
53
- came_from: dict[tuple[int, int], tuple[int, int] | None] = {start: None}
54
-
55
- def walkable(r: int, c: int) -> bool:
56
- if (r, c) in goal_set and allow_goal_block:
57
- return True
58
- return is_traversable(state, r, c, cell_type)
59
-
60
- while queue:
61
- current = queue.popleft()
62
- if current in goal_set:
63
- return reconstruct_path(came_from, current)
64
-
65
- for nr, nc in get_neighbors(state, current):
66
- if (nr, nc) not in came_from and walkable(nr, nc):
67
- came_from[(nr, nc)] = current
68
- queue.append((nr, nc))
69
-
70
- return []
71
-
72
-
73
- def reconstruct_path(
74
- came_from: dict[tuple[int, int], tuple[int, int] | None],
75
- current: tuple[int, int],
76
- ) -> list[tuple[int, int]]:
77
- """
78
- Reconstruct path from BFS came_from dict.
79
- """
80
- path: list[tuple[int, int]] = []
81
- while came_from[current] is not None:
82
- path.append(current)
83
- prev = came_from[current]
84
- assert prev is not None # Loop condition ensures this
85
- current = prev
86
- path.reverse()
87
- return path
88
-
89
-
90
- def get_neighbors(state: SimpleAgentState, pos: tuple[int, int]) -> list[tuple[int, int]]:
91
- """
92
- Get valid neighboring positions (4-connected grid).
93
- """
94
- r, c = pos
95
- candidates = [(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)]
96
- return [(nr, nc) for nr, nc in candidates if is_within_bounds(state, nr, nc)]
97
-
98
-
99
- def is_within_bounds(state: SimpleAgentState, r: int, c: int) -> bool:
100
- """
101
- Check if position is within map bounds.
102
- """
103
- return 0 <= r < state.map_height and 0 <= c < state.map_width
104
-
105
-
106
- def is_passable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
107
- """
108
- Check if a cell is passable (not an obstacle).
109
- """
110
- if not is_within_bounds(state, r, c):
111
- return False
112
- return is_traversable(state, r, c, cell_type)
113
-
114
-
115
- def is_traversable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
116
- """
117
- Check if a cell is traversable (free and no agent there).
118
- """
119
- if not is_within_bounds(state, r, c):
120
- return False
121
- # Don't walk through other agents
122
- if (r, c) in state.agent_occupancy:
123
- return False
124
- cell = state.occupancy[r][c]
125
- # Only traverse cells we KNOW are free, not unknown cells
126
- return cell == cell_type.FREE.value
@@ -1,136 +0,0 @@
1
- """
2
- Sample Policy for the Cogames environment.
3
-
4
- Helps a Cog move carbon from an extractor to a chest.
5
-
6
- Note to users of this policy:
7
- We don't intend for scripted policies to be the final word on how policies are generated (e.g., we expect the
8
- environment to be complicated enough that trained agents will be necessary). So we expect that scripting policies
9
- is a good way to start, but don't want you to get stuck here. Feel free to prove us wrong!
10
-
11
- Note to cogames developers:
12
- This policy should be kept relatively minimalist, without dependencies on intricate algorithms.
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- import random
18
- from dataclasses import dataclass
19
- from typing import Tuple
20
-
21
- from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
22
- from mettagrid.policy.policy_env_interface import PolicyEnvInterface
23
- from mettagrid.simulator import Action
24
- from mettagrid.simulator.interface import AgentObservation
25
-
26
-
27
- @dataclass
28
- class StarterCogState:
29
- target_location: tuple[int, int] | None = None
30
- chest_location: tuple[int, int] | None = None
31
- resource_to_collect: str = "carbon"
32
- # Location of an extractor for the target resource
33
- extractor_location: tuple[int, int] | None = None
34
- # Current position relative to the starting position.
35
- # We expect some moves to fail, so all positions should be treated somewhat loosely.
36
- position: tuple[int, int] = (0, 0)
37
- have_inventory: bool = False
38
-
39
-
40
- class StarterCogPolicyImpl(StatefulPolicyImpl[StarterCogState]):
41
- def __init__(
42
- self,
43
- policy_env_info: PolicyEnvInterface,
44
- agent_id: int,
45
- ):
46
- self._agent_id = agent_id
47
- self._policy_env_info = policy_env_info
48
-
49
- # Action lookup
50
- self._actions = policy_env_info.actions
51
-
52
- def _parse_observation(self, obs: AgentObservation, state: StarterCogState) -> StarterCogState:
53
- """Parse the observation and update the state."""
54
-
55
- extractor_tag_value = self._policy_env_info.tags.index(f"{state.resource_to_collect}_extractor")
56
- chest_tag_value = self._policy_env_info.tags.index("chest")
57
- state.have_inventory = False
58
- for token in obs.tokens:
59
- if token.feature.name == "last_action":
60
- # Update our current (relative) position.
61
- # TODO: This is wrong if we moved to interact with a station.
62
- if token.value == self._policy_env_info.action_names.index("move_north"):
63
- state.position = (state.position[0] - 1, state.position[1])
64
- elif token.value == self._policy_env_info.action_names.index("move_south"):
65
- state.position = (state.position[0] + 1, state.position[1])
66
- elif token.value == self._policy_env_info.action_names.index("move_west"):
67
- state.position = (state.position[0], state.position[1] - 1)
68
- elif token.value == self._policy_env_info.action_names.index("move_east"):
69
- state.position = (state.position[0], state.position[1] + 1)
70
- break
71
- for token in obs.tokens:
72
- if token.location == (5, 5):
73
- if token.feature.name == f"inv:{state.resource_to_collect}":
74
- state.have_inventory = True
75
- continue
76
- token_location = (state.position[0] + token.location[0] - 5, state.position[1] + token.location[1] - 5)
77
- if token.feature.name == "tag":
78
- if token.value == extractor_tag_value:
79
- state.extractor_location = token_location
80
- elif token.value == chest_tag_value:
81
- state.chest_location = token_location
82
- # It would probably be a good idea to keep track of obstacles
83
- return state
84
-
85
- def _go_to(self, state: StarterCogState, target: tuple[int, int]) -> Tuple[Action, StarterCogState]:
86
- """Go to the target location."""
87
- # Let's just go straight there! I hope we don't run into anything and get stuck.
88
- possible_actions = []
89
- if state.position[0] < target[0]:
90
- possible_actions.append("move_south")
91
- if state.position[0] > target[0]:
92
- possible_actions.append("move_north")
93
- if state.position[1] < target[1]:
94
- possible_actions.append("move_east")
95
- if state.position[1] > target[1]:
96
- possible_actions.append("move_west")
97
- action = Action(name=random.choice(possible_actions))
98
- return action, state
99
-
100
- def step_with_state(self, obs: AgentObservation, state: StarterCogState) -> Tuple[Action, StarterCogState]:
101
- """Compute the action for this Cog."""
102
- state = self._parse_observation(obs, state)
103
- if state.have_inventory:
104
- if state.chest_location is not None:
105
- return self._go_to(state, state.chest_location)
106
- else:
107
- if state.extractor_location is not None:
108
- return self._go_to(state, state.extractor_location)
109
- direction = random.choice(["north", "south", "east", "west"])
110
- return Action(name="move_" + direction), state
111
-
112
- def initial_agent_state(self) -> StarterCogState:
113
- """Get the initial state for a new agent."""
114
- return StarterCogState()
115
-
116
-
117
- # ============================================================================
118
- # Policy Wrapper Classes
119
- # ============================================================================
120
-
121
-
122
- class StarterPolicy(MultiAgentPolicy):
123
- # short_names = ["scripted_starter"] # Uncomment to register a shorthand
124
-
125
- def __init__(self, policy_env_info: PolicyEnvInterface, device: str = "cpu"):
126
- super().__init__(policy_env_info, device=device)
127
- self._agent_policies: dict[int, StatefulAgentPolicy[StarterCogState]] = {}
128
-
129
- def agent_policy(self, agent_id: int) -> StatefulAgentPolicy[StarterCogState]:
130
- if agent_id not in self._agent_policies:
131
- self._agent_policies[agent_id] = StatefulAgentPolicy(
132
- StarterCogPolicyImpl(self._policy_env_info, agent_id),
133
- self._policy_env_info,
134
- agent_id=agent_id,
135
- )
136
- return self._agent_policies[agent_id]