cogames 0.3.49__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cogames/cli/client.py +60 -6
- cogames/cli/docsync/__init__.py +0 -0
- cogames/cli/docsync/_nb_md_directive_processing.py +180 -0
- cogames/cli/docsync/_nb_md_sync.py +103 -0
- cogames/cli/docsync/_nb_py_sync.py +122 -0
- cogames/cli/docsync/_three_way_sync.py +115 -0
- cogames/cli/docsync/_utils.py +76 -0
- cogames/cli/docsync/docsync.py +156 -0
- cogames/cli/leaderboard.py +112 -28
- cogames/cli/mission.py +64 -53
- cogames/cli/policy.py +46 -10
- cogames/cli/submit.py +268 -67
- cogames/cogs_vs_clips/cog.py +79 -0
- cogames/cogs_vs_clips/cogs_vs_clips_mapgen.md +19 -16
- cogames/cogs_vs_clips/cogsguard_reward_variants.py +153 -0
- cogames/cogs_vs_clips/cogsguard_tutorial.py +56 -0
- cogames/cogs_vs_clips/evals/README.md +10 -16
- cogames/cogs_vs_clips/evals/cogsguard_evals.py +81 -0
- cogames/cogs_vs_clips/evals/diagnostic_evals.py +49 -444
- cogames/cogs_vs_clips/evals/difficulty_variants.py +13 -326
- cogames/cogs_vs_clips/evals/integrated_evals.py +5 -45
- cogames/cogs_vs_clips/evals/spanning_evals.py +9 -180
- cogames/cogs_vs_clips/mission.py +187 -146
- cogames/cogs_vs_clips/missions.py +46 -137
- cogames/cogs_vs_clips/procedural.py +8 -8
- cogames/cogs_vs_clips/sites.py +107 -3
- cogames/cogs_vs_clips/stations.py +198 -186
- cogames/cogs_vs_clips/tutorial_missions.py +1 -1
- cogames/cogs_vs_clips/variants.py +25 -476
- cogames/device.py +13 -1
- cogames/{policy/scripted_agent/README.md → docs/SCRIPTED_AGENT.md} +82 -58
- cogames/evaluate.py +18 -30
- cogames/main.py +1434 -243
- cogames/maps/canidate1_1000.map +1 -1
- cogames/maps/canidate1_1000_stations.map +2 -2
- cogames/maps/canidate1_500.map +1 -1
- cogames/maps/canidate1_500_stations.map +2 -2
- cogames/maps/canidate2_1000.map +1 -1
- cogames/maps/canidate2_1000_stations.map +2 -2
- cogames/maps/canidate2_500.map +1 -1
- cogames/maps/canidate2_500_stations.map +2 -2
- cogames/maps/canidate3_1000.map +1 -1
- cogames/maps/canidate3_1000_stations.map +2 -2
- cogames/maps/canidate3_500.map +1 -1
- cogames/maps/canidate3_500_stations.map +2 -2
- cogames/maps/canidate4_500.map +1 -1
- cogames/maps/canidate4_500_stations.map +2 -2
- cogames/maps/cave_base_50.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_agile.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_agile_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_charge_up.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_charge_up_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation1.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation1_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation2.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation2_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation3.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation3_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_near.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_search.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_search_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_extract_lab.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_extract_lab_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_memory.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_memory_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_radial.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_radial_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_resource_lab.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_unclip.map +2 -2
- cogames/maps/evals/eval_balanced_spread.map +9 -5
- cogames/maps/evals/eval_clip_oxygen.map +9 -5
- cogames/maps/evals/eval_collect_resources.map +9 -5
- cogames/maps/evals/eval_collect_resources_hard.map +9 -5
- cogames/maps/evals/eval_collect_resources_medium.map +9 -5
- cogames/maps/evals/eval_divide_and_conquer.map +9 -5
- cogames/maps/evals/eval_energy_starved.map +9 -5
- cogames/maps/evals/eval_multi_coordinated_collect_hard.map +9 -5
- cogames/maps/evals/eval_oxygen_bottleneck.map +9 -5
- cogames/maps/evals/eval_single_use_world.map +9 -5
- cogames/maps/evals/extractor_hub_100x100.map +9 -5
- cogames/maps/evals/extractor_hub_30x30.map +9 -5
- cogames/maps/evals/extractor_hub_50x50.map +9 -5
- cogames/maps/evals/extractor_hub_70x70.map +9 -5
- cogames/maps/evals/extractor_hub_80x80.map +9 -5
- cogames/maps/machina_100_stations.map +2 -2
- cogames/maps/machina_200_stations.map +2 -2
- cogames/maps/machina_200_stations_small.map +2 -2
- cogames/maps/machina_eval_exp01.map +2 -2
- cogames/maps/machina_eval_template_large.map +2 -2
- cogames/maps/machinatrainer4agents.map +2 -2
- cogames/maps/machinatrainer4agentsbase.map +2 -2
- cogames/maps/machinatrainerbig.map +2 -2
- cogames/maps/machinatrainersmall.map +2 -2
- cogames/maps/planky_evals/aligner_avoid_aoe.map +28 -0
- cogames/maps/planky_evals/aligner_full_cycle.map +28 -0
- cogames/maps/planky_evals/aligner_gear.map +24 -0
- cogames/maps/planky_evals/aligner_hearts.map +24 -0
- cogames/maps/planky_evals/aligner_junction.map +26 -0
- cogames/maps/planky_evals/exploration_distant.map +28 -0
- cogames/maps/planky_evals/maze.map +32 -0
- cogames/maps/planky_evals/miner_best_resource.map +26 -0
- cogames/maps/planky_evals/miner_deposit.map +24 -0
- cogames/maps/planky_evals/miner_extract.map +26 -0
- cogames/maps/planky_evals/miner_full_cycle.map +28 -0
- cogames/maps/planky_evals/miner_gear.map +24 -0
- cogames/maps/planky_evals/multi_role.map +28 -0
- cogames/maps/planky_evals/resource_chain.map +30 -0
- cogames/maps/planky_evals/scout_explore.map +32 -0
- cogames/maps/planky_evals/scout_gear.map +24 -0
- cogames/maps/planky_evals/scrambler_full_cycle.map +28 -0
- cogames/maps/planky_evals/scrambler_gear.map +24 -0
- cogames/maps/planky_evals/scrambler_target.map +26 -0
- cogames/maps/planky_evals/stuck_corridor.map +32 -0
- cogames/maps/planky_evals/survive_retreat.map +26 -0
- cogames/maps/training_facility_clipped.map +2 -2
- cogames/maps/training_facility_open_1.map +2 -2
- cogames/maps/training_facility_open_2.map +2 -2
- cogames/maps/training_facility_open_3.map +2 -2
- cogames/maps/training_facility_tight_4.map +2 -2
- cogames/maps/training_facility_tight_5.map +2 -2
- cogames/maps/vanilla_large.map +2 -2
- cogames/maps/vanilla_small.map +2 -2
- cogames/pickup.py +183 -0
- cogames/play.py +166 -33
- cogames/policy/chaos_monkey.py +54 -0
- cogames/policy/nim_agents/__init__.py +27 -10
- cogames/policy/nim_agents/agents.py +121 -60
- cogames/policy/nim_agents/thinky_eval.py +35 -222
- cogames/policy/pufferlib_policy.py +67 -32
- cogames/policy/starter_agent.py +184 -0
- cogames/policy/trainable_policy_template.py +4 -1
- cogames/train.py +51 -13
- cogames/verbose.py +2 -2
- cogames-0.3.64.dist-info/METADATA +1842 -0
- cogames-0.3.64.dist-info/RECORD +159 -0
- cogames-0.3.64.dist-info/licenses/LICENSE +21 -0
- cogames-0.3.64.dist-info/top_level.txt +2 -0
- metta_alo/__init__.py +0 -0
- metta_alo/job_specs.py +17 -0
- metta_alo/policy.py +16 -0
- metta_alo/pure_single_episode_runner.py +75 -0
- metta_alo/py.typed +0 -0
- metta_alo/rollout.py +322 -0
- metta_alo/scoring.py +168 -0
- cogames/maps/diagnostic_evals/diagnostic_assembler_near.map +0 -49
- cogames/maps/diagnostic_evals/diagnostic_assembler_search.map +0 -49
- cogames/maps/diagnostic_evals/diagnostic_assembler_search_hard.map +0 -89
- cogames/policy/nim_agents/common.nim +0 -887
- cogames/policy/nim_agents/install.sh +0 -1
- cogames/policy/nim_agents/ladybug_agent.nim +0 -984
- cogames/policy/nim_agents/nim_agents.nim +0 -55
- cogames/policy/nim_agents/nim_agents.nims +0 -14
- cogames/policy/nim_agents/nimby.lock +0 -3
- cogames/policy/nim_agents/racecar_agents.nim +0 -884
- cogames/policy/nim_agents/random_agents.nim +0 -68
- cogames/policy/nim_agents/test_agents.py +0 -53
- cogames/policy/nim_agents/thinky_agents.nim +0 -717
- cogames/policy/scripted_agent/baseline_agent.py +0 -1049
- cogames/policy/scripted_agent/demo_policy.py +0 -244
- cogames/policy/scripted_agent/pathfinding.py +0 -126
- cogames/policy/scripted_agent/starter_agent.py +0 -136
- cogames/policy/scripted_agent/types.py +0 -235
- cogames/policy/scripted_agent/unclipping_agent.py +0 -476
- cogames/policy/scripted_agent/utils.py +0 -385
- cogames-0.3.49.dist-info/METADATA +0 -406
- cogames-0.3.49.dist-info/RECORD +0 -136
- cogames-0.3.49.dist-info/top_level.txt +0 -1
- {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/WHEEL +0 -0
- {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/entry_points.txt +0 -0
|
@@ -1,244 +0,0 @@
|
|
|
1
|
-
import random
|
|
2
|
-
|
|
3
|
-
from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
|
|
4
|
-
from mettagrid.simulator.interface import AgentObservation
|
|
5
|
-
|
|
6
|
-
from .types import BaselineHyperparameters, CellType, SimpleAgentState
|
|
7
|
-
from .utils import (
|
|
8
|
-
change_vibe_action,
|
|
9
|
-
is_station,
|
|
10
|
-
is_wall,
|
|
11
|
-
parse_observation,
|
|
12
|
-
read_inventory_from_obs,
|
|
13
|
-
update_agent_position,
|
|
14
|
-
use_object_at,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def manhattan(a, b):
|
|
19
|
-
return abs(a[0] - b[0]) + abs(a[1] - b[1])
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class DemoPolicyImpl(StatefulPolicyImpl[SimpleAgentState]):
|
|
23
|
-
def __init__(self, policy_env_info, agent_id, hyperparams, *, heart_recipe=None):
|
|
24
|
-
self._agent_id = agent_id
|
|
25
|
-
self._hyperparams = hyperparams
|
|
26
|
-
self._policy_env_info = policy_env_info
|
|
27
|
-
self._actions = policy_env_info.actions
|
|
28
|
-
self._move_deltas = {"north": (-1, 0), "south": (1, 0), "east": (0, 1), "west": (0, -1)}
|
|
29
|
-
|
|
30
|
-
self._obs_hr = policy_env_info.obs_height // 2
|
|
31
|
-
self._obs_wr = policy_env_info.obs_width // 2
|
|
32
|
-
|
|
33
|
-
if heart_recipe:
|
|
34
|
-
self._heart_recipe = {k: v for k, v in heart_recipe.items() if k != "energy"}
|
|
35
|
-
else:
|
|
36
|
-
self._heart_recipe = None
|
|
37
|
-
|
|
38
|
-
def initial_agent_state(self):
|
|
39
|
-
center = 25
|
|
40
|
-
return SimpleAgentState(
|
|
41
|
-
agent_id=self._agent_id,
|
|
42
|
-
map_height=50,
|
|
43
|
-
map_width=50,
|
|
44
|
-
occupancy=[[CellType.FREE.value] * 50 for _ in range(50)],
|
|
45
|
-
row=center,
|
|
46
|
-
col=center,
|
|
47
|
-
heart_recipe=self._heart_recipe,
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
# ------------------------------------------------------------
|
|
51
|
-
# Utility helpers (kept tiny)
|
|
52
|
-
# ------------------------------------------------------------
|
|
53
|
-
|
|
54
|
-
def _adjacent(self, s, pos):
|
|
55
|
-
return manhattan((s.row, s.col), pos) == 1
|
|
56
|
-
|
|
57
|
-
def _random_step(self, s, parsed):
|
|
58
|
-
dirs = list(self._move_deltas.keys())
|
|
59
|
-
random.shuffle(dirs)
|
|
60
|
-
blocked = {
|
|
61
|
-
(r, c)
|
|
62
|
-
for (r, c), obj in parsed.nearby_objects.items()
|
|
63
|
-
if self._adjacent(s, (r, c))
|
|
64
|
-
and (
|
|
65
|
-
is_wall(obj.name)
|
|
66
|
-
or "extractor" in obj.name
|
|
67
|
-
or is_station(obj.name, "assembler")
|
|
68
|
-
or is_station(obj.name, "chest")
|
|
69
|
-
or is_station(obj.name, "charger")
|
|
70
|
-
or (obj.name == "agent" and obj.agent_group != s.agent_id)
|
|
71
|
-
)
|
|
72
|
-
}
|
|
73
|
-
for d in dirs:
|
|
74
|
-
dr, dc = self._move_deltas[d]
|
|
75
|
-
nr, nc = s.row + dr, s.col + dc
|
|
76
|
-
if (nr, nc) not in blocked:
|
|
77
|
-
return self._actions.move.Move(d)
|
|
78
|
-
return self._actions.noop.Noop()
|
|
79
|
-
|
|
80
|
-
def _step_towards(self, s, target, parsed):
|
|
81
|
-
"""Single-step greedy pursuit, else random."""
|
|
82
|
-
r, c = s.row, s.col
|
|
83
|
-
tr, tc = target
|
|
84
|
-
cand = []
|
|
85
|
-
if abs(tr - r) >= abs(tc - c):
|
|
86
|
-
if tr < r:
|
|
87
|
-
cand.append("north")
|
|
88
|
-
elif tr > r:
|
|
89
|
-
cand.append("south")
|
|
90
|
-
if tc < c:
|
|
91
|
-
cand.append("west")
|
|
92
|
-
elif tc > c:
|
|
93
|
-
cand.append("east")
|
|
94
|
-
else:
|
|
95
|
-
if tc < c:
|
|
96
|
-
cand.append("west")
|
|
97
|
-
elif tc > c:
|
|
98
|
-
cand.append("east")
|
|
99
|
-
if tr < r:
|
|
100
|
-
cand.append("north")
|
|
101
|
-
elif tr > r:
|
|
102
|
-
cand.append("south")
|
|
103
|
-
|
|
104
|
-
blocked = {
|
|
105
|
-
(rr, cc)
|
|
106
|
-
for (rr, cc), obj in parsed.nearby_objects.items()
|
|
107
|
-
if self._adjacent(s, (rr, cc))
|
|
108
|
-
and (
|
|
109
|
-
is_wall(obj.name)
|
|
110
|
-
or "extractor" in obj.name
|
|
111
|
-
or is_station(obj.name, "assembler")
|
|
112
|
-
or is_station(obj.name, "chest")
|
|
113
|
-
or is_station(obj.name, "charger")
|
|
114
|
-
or (obj.name == "agent" and obj.agent_group != s.agent_id)
|
|
115
|
-
)
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
for d in cand:
|
|
119
|
-
dr, dc = self._move_deltas[d]
|
|
120
|
-
nr, nc = r + dr, c + dc
|
|
121
|
-
if (nr, nc) not in blocked:
|
|
122
|
-
return self._actions.move.Move(d)
|
|
123
|
-
|
|
124
|
-
return self._random_step(s, parsed)
|
|
125
|
-
|
|
126
|
-
def _closest(self, s, parsed, pred):
|
|
127
|
-
items = [pos for pos, obj in parsed.nearby_objects.items() if pred(obj)]
|
|
128
|
-
return min(items, key=lambda p: manhattan((s.row, s.col), p)) if items else None
|
|
129
|
-
|
|
130
|
-
def _rtype(self, name):
|
|
131
|
-
name = name.lower().replace("clipped_", "")
|
|
132
|
-
if "_extractor" not in name:
|
|
133
|
-
return None
|
|
134
|
-
name = name.replace("_extractor", "")
|
|
135
|
-
return name if name in ("carbon", "oxygen", "germanium", "silicon") else None
|
|
136
|
-
|
|
137
|
-
# ------------------------------------------------------------
|
|
138
|
-
# Main step
|
|
139
|
-
# ------------------------------------------------------------
|
|
140
|
-
def step_with_state(self, obs: AgentObservation, s: SimpleAgentState):
|
|
141
|
-
s.step_count += 1
|
|
142
|
-
read_inventory_from_obs(s, obs, obs_hr=self._obs_hr, obs_wr=self._obs_wr)
|
|
143
|
-
update_agent_position(s, move_deltas=self._move_deltas)
|
|
144
|
-
|
|
145
|
-
parsed = parse_observation(
|
|
146
|
-
s,
|
|
147
|
-
obs,
|
|
148
|
-
obs_hr=self._obs_hr,
|
|
149
|
-
obs_wr=self._obs_wr,
|
|
150
|
-
spatial_feature_names={"tag", "cooldown_remaining", "clipped", "remaining_uses"},
|
|
151
|
-
agent_feature_key_by_name={"agent:group": "agent_group", "agent:frozen": "agent_frozen"},
|
|
152
|
-
protocol_input_prefix="protocol_input:",
|
|
153
|
-
protocol_output_prefix="protocol_output:",
|
|
154
|
-
tag_names=self._policy_env_info.tag_id_to_name,
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
# Learn recipe if visible
|
|
158
|
-
if s.heart_recipe is None:
|
|
159
|
-
for _pos, obj in parsed.nearby_objects.items():
|
|
160
|
-
if obj.name == "assembler" and obj.protocol_outputs.get("heart", 0) > 0:
|
|
161
|
-
s.heart_recipe = {k: v for k, v in obj.protocol_inputs.items() if k != "energy"}
|
|
162
|
-
|
|
163
|
-
# ---------------- PRE-PHASE: find assembler to learn recipe ----------------
|
|
164
|
-
if s.heart_recipe is None:
|
|
165
|
-
if s.current_glyph != "heart_a":
|
|
166
|
-
s.current_glyph = "heart_a"
|
|
167
|
-
return change_vibe_action("heart_a", actions=self._actions), s
|
|
168
|
-
|
|
169
|
-
assembler = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "assembler"))
|
|
170
|
-
if assembler:
|
|
171
|
-
if self._adjacent(s, assembler):
|
|
172
|
-
return use_object_at(s, assembler, actions=self._actions, move_deltas=self._move_deltas), s
|
|
173
|
-
return self._step_towards(s, assembler, parsed), s
|
|
174
|
-
|
|
175
|
-
return self._random_step(s, parsed), s
|
|
176
|
-
|
|
177
|
-
# ---------------- MAIN PHASE ----------------
|
|
178
|
-
|
|
179
|
-
# Deliver hearts
|
|
180
|
-
if s.hearts > 0:
|
|
181
|
-
chest = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "chest"))
|
|
182
|
-
if chest:
|
|
183
|
-
if s.current_glyph != "default":
|
|
184
|
-
s.current_glyph = "default"
|
|
185
|
-
return change_vibe_action("default", actions=self._actions), s
|
|
186
|
-
if self._adjacent(s, chest):
|
|
187
|
-
return use_object_at(s, chest, actions=self._actions, move_deltas=self._move_deltas), s
|
|
188
|
-
return self._step_towards(s, chest, parsed), s
|
|
189
|
-
|
|
190
|
-
# Assemble
|
|
191
|
-
if (
|
|
192
|
-
s.carbon >= s.heart_recipe.get("carbon", 0)
|
|
193
|
-
and s.oxygen >= s.heart_recipe.get("oxygen", 0)
|
|
194
|
-
and s.germanium >= s.heart_recipe.get("germanium", 0)
|
|
195
|
-
and s.silicon >= s.heart_recipe.get("silicon", 0)
|
|
196
|
-
):
|
|
197
|
-
assembler = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "assembler"))
|
|
198
|
-
if assembler:
|
|
199
|
-
if s.current_glyph != "heart_a":
|
|
200
|
-
s.current_glyph = "heart_a"
|
|
201
|
-
return change_vibe_action("heart_a", actions=self._actions), s
|
|
202
|
-
if self._adjacent(s, assembler):
|
|
203
|
-
return use_object_at(s, assembler, actions=self._actions, move_deltas=self._move_deltas), s
|
|
204
|
-
return self._step_towards(s, assembler, parsed), s
|
|
205
|
-
|
|
206
|
-
# Gather needed resources
|
|
207
|
-
deficits = {
|
|
208
|
-
r: s.heart_recipe.get(r, 0) - getattr(s, r, 0) for r in ("carbon", "oxygen", "germanium", "silicon")
|
|
209
|
-
}
|
|
210
|
-
needed = [
|
|
211
|
-
(pos, obj, self._rtype(obj.name.lower()))
|
|
212
|
-
for pos, obj in parsed.nearby_objects.items()
|
|
213
|
-
if "extractor" in obj.name.lower()
|
|
214
|
-
]
|
|
215
|
-
|
|
216
|
-
needed = [(pos, obj, r) for pos, obj, r in needed if r and deficits[r] > 0]
|
|
217
|
-
|
|
218
|
-
if needed:
|
|
219
|
-
pos, obj, r = min(needed, key=lambda x: manhattan((s.row, s.col), x[0]))
|
|
220
|
-
if self._adjacent(s, pos):
|
|
221
|
-
return use_object_at(s, pos, actions=self._actions, move_deltas=self._move_deltas), s
|
|
222
|
-
return self._step_towards(s, pos, parsed), s
|
|
223
|
-
|
|
224
|
-
# Otherwise wander
|
|
225
|
-
return self._random_step(s, parsed), s
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
class DemoPolicy(MultiAgentPolicy):
|
|
229
|
-
short_names = ["tiny_baseline"]
|
|
230
|
-
|
|
231
|
-
def __init__(self, policy_env_info, device: str = "cpu", hyperparams=None, *, heart_recipe=None):
|
|
232
|
-
super().__init__(policy_env_info, device=device)
|
|
233
|
-
self._hyperparams = hyperparams or BaselineHyperparameters()
|
|
234
|
-
self._heart_recipe = heart_recipe
|
|
235
|
-
self._agent_policies = {}
|
|
236
|
-
|
|
237
|
-
def agent_policy(self, agent_id):
|
|
238
|
-
if agent_id not in self._agent_policies:
|
|
239
|
-
self._agent_policies[agent_id] = StatefulAgentPolicy(
|
|
240
|
-
DemoPolicyImpl(self._policy_env_info, agent_id, self._hyperparams, heart_recipe=self._heart_recipe),
|
|
241
|
-
self._policy_env_info,
|
|
242
|
-
agent_id=agent_id,
|
|
243
|
-
)
|
|
244
|
-
return self._agent_policies[agent_id]
|
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Pathfinding utilities for scripted agents.
|
|
3
|
-
|
|
4
|
-
This module contains A* pathfinding implementation and related utilities
|
|
5
|
-
for navigating the grid world.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
from collections import deque
|
|
11
|
-
from typing import TYPE_CHECKING
|
|
12
|
-
|
|
13
|
-
if TYPE_CHECKING:
|
|
14
|
-
from cogames.policy.scripted_agent.types import CellType, SimpleAgentState
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def compute_goal_cells(
|
|
18
|
-
state: SimpleAgentState, target: tuple[int, int], reach_adjacent: bool, cell_type: type[CellType]
|
|
19
|
-
) -> list[tuple[int, int]]:
|
|
20
|
-
"""
|
|
21
|
-
Compute the set of goal cells for pathfinding.
|
|
22
|
-
"""
|
|
23
|
-
if not reach_adjacent:
|
|
24
|
-
return [target]
|
|
25
|
-
|
|
26
|
-
goals: list[tuple[int, int]] = []
|
|
27
|
-
for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
|
|
28
|
-
nr, nc = target[0] + dr, target[1] + dc
|
|
29
|
-
if is_traversable(state, nr, nc, cell_type):
|
|
30
|
-
goals.append((nr, nc))
|
|
31
|
-
|
|
32
|
-
# If no adjacent traversable tiles are known yet, allow exploring toward unknown ones
|
|
33
|
-
if not goals:
|
|
34
|
-
for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
|
|
35
|
-
nr, nc = target[0] + dr, target[1] + dc
|
|
36
|
-
if is_within_bounds(state, nr, nc) and state.occupancy[nr][nc] != cell_type.OBSTACLE.value:
|
|
37
|
-
goals.append((nr, nc))
|
|
38
|
-
return goals
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def shortest_path(
|
|
42
|
-
state: SimpleAgentState,
|
|
43
|
-
start: tuple[int, int],
|
|
44
|
-
goals: list[tuple[int, int]],
|
|
45
|
-
allow_goal_block: bool,
|
|
46
|
-
cell_type: type[CellType],
|
|
47
|
-
) -> list[tuple[int, int]]:
|
|
48
|
-
"""
|
|
49
|
-
Find shortest path from start to any goal using BFS.
|
|
50
|
-
"""
|
|
51
|
-
goal_set = set(goals)
|
|
52
|
-
queue: deque[tuple[int, int]] = deque([start])
|
|
53
|
-
came_from: dict[tuple[int, int], tuple[int, int] | None] = {start: None}
|
|
54
|
-
|
|
55
|
-
def walkable(r: int, c: int) -> bool:
|
|
56
|
-
if (r, c) in goal_set and allow_goal_block:
|
|
57
|
-
return True
|
|
58
|
-
return is_traversable(state, r, c, cell_type)
|
|
59
|
-
|
|
60
|
-
while queue:
|
|
61
|
-
current = queue.popleft()
|
|
62
|
-
if current in goal_set:
|
|
63
|
-
return reconstruct_path(came_from, current)
|
|
64
|
-
|
|
65
|
-
for nr, nc in get_neighbors(state, current):
|
|
66
|
-
if (nr, nc) not in came_from and walkable(nr, nc):
|
|
67
|
-
came_from[(nr, nc)] = current
|
|
68
|
-
queue.append((nr, nc))
|
|
69
|
-
|
|
70
|
-
return []
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def reconstruct_path(
|
|
74
|
-
came_from: dict[tuple[int, int], tuple[int, int] | None],
|
|
75
|
-
current: tuple[int, int],
|
|
76
|
-
) -> list[tuple[int, int]]:
|
|
77
|
-
"""
|
|
78
|
-
Reconstruct path from BFS came_from dict.
|
|
79
|
-
"""
|
|
80
|
-
path: list[tuple[int, int]] = []
|
|
81
|
-
while came_from[current] is not None:
|
|
82
|
-
path.append(current)
|
|
83
|
-
prev = came_from[current]
|
|
84
|
-
assert prev is not None # Loop condition ensures this
|
|
85
|
-
current = prev
|
|
86
|
-
path.reverse()
|
|
87
|
-
return path
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def get_neighbors(state: SimpleAgentState, pos: tuple[int, int]) -> list[tuple[int, int]]:
|
|
91
|
-
"""
|
|
92
|
-
Get valid neighboring positions (4-connected grid).
|
|
93
|
-
"""
|
|
94
|
-
r, c = pos
|
|
95
|
-
candidates = [(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)]
|
|
96
|
-
return [(nr, nc) for nr, nc in candidates if is_within_bounds(state, nr, nc)]
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def is_within_bounds(state: SimpleAgentState, r: int, c: int) -> bool:
|
|
100
|
-
"""
|
|
101
|
-
Check if position is within map bounds.
|
|
102
|
-
"""
|
|
103
|
-
return 0 <= r < state.map_height and 0 <= c < state.map_width
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def is_passable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
|
|
107
|
-
"""
|
|
108
|
-
Check if a cell is passable (not an obstacle).
|
|
109
|
-
"""
|
|
110
|
-
if not is_within_bounds(state, r, c):
|
|
111
|
-
return False
|
|
112
|
-
return is_traversable(state, r, c, cell_type)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def is_traversable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
|
|
116
|
-
"""
|
|
117
|
-
Check if a cell is traversable (free and no agent there).
|
|
118
|
-
"""
|
|
119
|
-
if not is_within_bounds(state, r, c):
|
|
120
|
-
return False
|
|
121
|
-
# Don't walk through other agents
|
|
122
|
-
if (r, c) in state.agent_occupancy:
|
|
123
|
-
return False
|
|
124
|
-
cell = state.occupancy[r][c]
|
|
125
|
-
# Only traverse cells we KNOW are free, not unknown cells
|
|
126
|
-
return cell == cell_type.FREE.value
|
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Sample Policy for the Cogames environment.
|
|
3
|
-
|
|
4
|
-
Helps a Cog move carbon from an extractor to a chest.
|
|
5
|
-
|
|
6
|
-
Note to users of this policy:
|
|
7
|
-
We don't intend for scripted policies to be the final word on how policies are generated (e.g., we expect the
|
|
8
|
-
environment to be complicated enough that trained agents will be necessary). So we expect that scripting policies
|
|
9
|
-
is a good way to start, but don't want you to get stuck here. Feel free to prove us wrong!
|
|
10
|
-
|
|
11
|
-
Note to cogames developers:
|
|
12
|
-
This policy should be kept relatively minimalist, without dependencies on intricate algorithms.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
import random
|
|
18
|
-
from dataclasses import dataclass
|
|
19
|
-
from typing import Tuple
|
|
20
|
-
|
|
21
|
-
from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
|
|
22
|
-
from mettagrid.policy.policy_env_interface import PolicyEnvInterface
|
|
23
|
-
from mettagrid.simulator import Action
|
|
24
|
-
from mettagrid.simulator.interface import AgentObservation
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@dataclass
|
|
28
|
-
class StarterCogState:
|
|
29
|
-
target_location: tuple[int, int] | None = None
|
|
30
|
-
chest_location: tuple[int, int] | None = None
|
|
31
|
-
resource_to_collect: str = "carbon"
|
|
32
|
-
# Location of an extractor for the target resource
|
|
33
|
-
extractor_location: tuple[int, int] | None = None
|
|
34
|
-
# Current position relative to the starting position.
|
|
35
|
-
# We expect some moves to fail, so all positions should be treated somewhat loosely.
|
|
36
|
-
position: tuple[int, int] = (0, 0)
|
|
37
|
-
have_inventory: bool = False
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class StarterCogPolicyImpl(StatefulPolicyImpl[StarterCogState]):
|
|
41
|
-
def __init__(
|
|
42
|
-
self,
|
|
43
|
-
policy_env_info: PolicyEnvInterface,
|
|
44
|
-
agent_id: int,
|
|
45
|
-
):
|
|
46
|
-
self._agent_id = agent_id
|
|
47
|
-
self._policy_env_info = policy_env_info
|
|
48
|
-
|
|
49
|
-
# Action lookup
|
|
50
|
-
self._actions = policy_env_info.actions
|
|
51
|
-
|
|
52
|
-
def _parse_observation(self, obs: AgentObservation, state: StarterCogState) -> StarterCogState:
|
|
53
|
-
"""Parse the observation and update the state."""
|
|
54
|
-
|
|
55
|
-
extractor_tag_value = self._policy_env_info.tags.index(f"{state.resource_to_collect}_extractor")
|
|
56
|
-
chest_tag_value = self._policy_env_info.tags.index("chest")
|
|
57
|
-
state.have_inventory = False
|
|
58
|
-
for token in obs.tokens:
|
|
59
|
-
if token.feature.name == "last_action":
|
|
60
|
-
# Update our current (relative) position.
|
|
61
|
-
# TODO: This is wrong if we moved to interact with a station.
|
|
62
|
-
if token.value == self._policy_env_info.action_names.index("move_north"):
|
|
63
|
-
state.position = (state.position[0] - 1, state.position[1])
|
|
64
|
-
elif token.value == self._policy_env_info.action_names.index("move_south"):
|
|
65
|
-
state.position = (state.position[0] + 1, state.position[1])
|
|
66
|
-
elif token.value == self._policy_env_info.action_names.index("move_west"):
|
|
67
|
-
state.position = (state.position[0], state.position[1] - 1)
|
|
68
|
-
elif token.value == self._policy_env_info.action_names.index("move_east"):
|
|
69
|
-
state.position = (state.position[0], state.position[1] + 1)
|
|
70
|
-
break
|
|
71
|
-
for token in obs.tokens:
|
|
72
|
-
if token.location == (5, 5):
|
|
73
|
-
if token.feature.name == f"inv:{state.resource_to_collect}":
|
|
74
|
-
state.have_inventory = True
|
|
75
|
-
continue
|
|
76
|
-
token_location = (state.position[0] + token.location[0] - 5, state.position[1] + token.location[1] - 5)
|
|
77
|
-
if token.feature.name == "tag":
|
|
78
|
-
if token.value == extractor_tag_value:
|
|
79
|
-
state.extractor_location = token_location
|
|
80
|
-
elif token.value == chest_tag_value:
|
|
81
|
-
state.chest_location = token_location
|
|
82
|
-
# It would probably be a good idea to keep track of obstacles
|
|
83
|
-
return state
|
|
84
|
-
|
|
85
|
-
def _go_to(self, state: StarterCogState, target: tuple[int, int]) -> Tuple[Action, StarterCogState]:
|
|
86
|
-
"""Go to the target location."""
|
|
87
|
-
# Let's just go straight there! I hope we don't run into anything and get stuck.
|
|
88
|
-
possible_actions = []
|
|
89
|
-
if state.position[0] < target[0]:
|
|
90
|
-
possible_actions.append("move_south")
|
|
91
|
-
if state.position[0] > target[0]:
|
|
92
|
-
possible_actions.append("move_north")
|
|
93
|
-
if state.position[1] < target[1]:
|
|
94
|
-
possible_actions.append("move_east")
|
|
95
|
-
if state.position[1] > target[1]:
|
|
96
|
-
possible_actions.append("move_west")
|
|
97
|
-
action = Action(name=random.choice(possible_actions))
|
|
98
|
-
return action, state
|
|
99
|
-
|
|
100
|
-
def step_with_state(self, obs: AgentObservation, state: StarterCogState) -> Tuple[Action, StarterCogState]:
|
|
101
|
-
"""Compute the action for this Cog."""
|
|
102
|
-
state = self._parse_observation(obs, state)
|
|
103
|
-
if state.have_inventory:
|
|
104
|
-
if state.chest_location is not None:
|
|
105
|
-
return self._go_to(state, state.chest_location)
|
|
106
|
-
else:
|
|
107
|
-
if state.extractor_location is not None:
|
|
108
|
-
return self._go_to(state, state.extractor_location)
|
|
109
|
-
direction = random.choice(["north", "south", "east", "west"])
|
|
110
|
-
return Action(name="move_" + direction), state
|
|
111
|
-
|
|
112
|
-
def initial_agent_state(self) -> StarterCogState:
|
|
113
|
-
"""Get the initial state for a new agent."""
|
|
114
|
-
return StarterCogState()
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
# ============================================================================
|
|
118
|
-
# Policy Wrapper Classes
|
|
119
|
-
# ============================================================================
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
class StarterPolicy(MultiAgentPolicy):
|
|
123
|
-
# short_names = ["scripted_starter"] # Uncomment to register a shorthand
|
|
124
|
-
|
|
125
|
-
def __init__(self, policy_env_info: PolicyEnvInterface, device: str = "cpu"):
|
|
126
|
-
super().__init__(policy_env_info, device=device)
|
|
127
|
-
self._agent_policies: dict[int, StatefulAgentPolicy[StarterCogState]] = {}
|
|
128
|
-
|
|
129
|
-
def agent_policy(self, agent_id: int) -> StatefulAgentPolicy[StarterCogState]:
|
|
130
|
-
if agent_id not in self._agent_policies:
|
|
131
|
-
self._agent_policies[agent_id] = StatefulAgentPolicy(
|
|
132
|
-
StarterCogPolicyImpl(self._policy_env_info, agent_id),
|
|
133
|
-
self._policy_env_info,
|
|
134
|
-
agent_id=agent_id,
|
|
135
|
-
)
|
|
136
|
-
return self._agent_policies[agent_id]
|