cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cogames_agents/__init__.py +0 -0
- cogames_agents/evals/__init__.py +5 -0
- cogames_agents/evals/planky_evals.py +415 -0
- cogames_agents/policy/__init__.py +0 -0
- cogames_agents/policy/evolution/__init__.py +0 -0
- cogames_agents/policy/evolution/cogsguard/__init__.py +0 -0
- cogames_agents/policy/evolution/cogsguard/evolution.py +695 -0
- cogames_agents/policy/evolution/cogsguard/evolutionary_coordinator.py +540 -0
- cogames_agents/policy/nim_agents/__init__.py +20 -0
- cogames_agents/policy/nim_agents/agents.py +98 -0
- cogames_agents/policy/nim_agents/bindings/generated/libnim_agents.dylib +0 -0
- cogames_agents/policy/nim_agents/bindings/generated/nim_agents.py +215 -0
- cogames_agents/policy/nim_agents/cogsguard_agents.nim +555 -0
- cogames_agents/policy/nim_agents/cogsguard_align_all_agents.nim +569 -0
- cogames_agents/policy/nim_agents/common.nim +1054 -0
- cogames_agents/policy/nim_agents/install.sh +1 -0
- cogames_agents/policy/nim_agents/ladybug_agent.nim +954 -0
- cogames_agents/policy/nim_agents/nim_agents.nim +68 -0
- cogames_agents/policy/nim_agents/nim_agents.nims +14 -0
- cogames_agents/policy/nim_agents/nimby.lock +3 -0
- cogames_agents/policy/nim_agents/racecar_agents.nim +844 -0
- cogames_agents/policy/nim_agents/random_agents.nim +68 -0
- cogames_agents/policy/nim_agents/test_agents.py +53 -0
- cogames_agents/policy/nim_agents/thinky_agents.nim +677 -0
- cogames_agents/policy/nim_agents/thinky_eval.py +230 -0
- cogames_agents/policy/scripted_agent/README.md +360 -0
- cogames_agents/policy/scripted_agent/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/baseline_agent.py +1031 -0
- cogames_agents/policy/scripted_agent/cogas/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/cogas/context.py +68 -0
- cogames_agents/policy/scripted_agent/cogas/entity_map.py +152 -0
- cogames_agents/policy/scripted_agent/cogas/goal.py +115 -0
- cogames_agents/policy/scripted_agent/cogas/goals/__init__.py +27 -0
- cogames_agents/policy/scripted_agent/cogas/goals/aligner.py +160 -0
- cogames_agents/policy/scripted_agent/cogas/goals/gear.py +197 -0
- cogames_agents/policy/scripted_agent/cogas/goals/miner.py +441 -0
- cogames_agents/policy/scripted_agent/cogas/goals/scout.py +40 -0
- cogames_agents/policy/scripted_agent/cogas/goals/scrambler.py +174 -0
- cogames_agents/policy/scripted_agent/cogas/goals/shared.py +160 -0
- cogames_agents/policy/scripted_agent/cogas/goals/stem.py +60 -0
- cogames_agents/policy/scripted_agent/cogas/goals/survive.py +100 -0
- cogames_agents/policy/scripted_agent/cogas/navigator.py +401 -0
- cogames_agents/policy/scripted_agent/cogas/obs_parser.py +238 -0
- cogames_agents/policy/scripted_agent/cogas/policy.py +525 -0
- cogames_agents/policy/scripted_agent/cogas/trace.py +69 -0
- cogames_agents/policy/scripted_agent/cogsguard/CLAUDE.md +517 -0
- cogames_agents/policy/scripted_agent/cogsguard/README.md +252 -0
- cogames_agents/policy/scripted_agent/cogsguard/__init__.py +74 -0
- cogames_agents/policy/scripted_agent/cogsguard/aligned_junction_held_investigation.md +152 -0
- cogames_agents/policy/scripted_agent/cogsguard/aligner.py +333 -0
- cogames_agents/policy/scripted_agent/cogsguard/behavior_hooks.py +44 -0
- cogames_agents/policy/scripted_agent/cogsguard/control_agent.py +323 -0
- cogames_agents/policy/scripted_agent/cogsguard/debug_agent.py +533 -0
- cogames_agents/policy/scripted_agent/cogsguard/miner.py +589 -0
- cogames_agents/policy/scripted_agent/cogsguard/options.py +67 -0
- cogames_agents/policy/scripted_agent/cogsguard/parity_metrics.py +36 -0
- cogames_agents/policy/scripted_agent/cogsguard/policy.py +1967 -0
- cogames_agents/policy/scripted_agent/cogsguard/prereq_trace.py +33 -0
- cogames_agents/policy/scripted_agent/cogsguard/role_trace.py +50 -0
- cogames_agents/policy/scripted_agent/cogsguard/roles.py +31 -0
- cogames_agents/policy/scripted_agent/cogsguard/rollout_trace.py +40 -0
- cogames_agents/policy/scripted_agent/cogsguard/scout.py +69 -0
- cogames_agents/policy/scripted_agent/cogsguard/scrambler.py +350 -0
- cogames_agents/policy/scripted_agent/cogsguard/targeted_agent.py +418 -0
- cogames_agents/policy/scripted_agent/cogsguard/teacher.py +224 -0
- cogames_agents/policy/scripted_agent/cogsguard/types.py +381 -0
- cogames_agents/policy/scripted_agent/cogsguard/v2_agent.py +49 -0
- cogames_agents/policy/scripted_agent/common/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/common/geometry.py +24 -0
- cogames_agents/policy/scripted_agent/common/roles.py +34 -0
- cogames_agents/policy/scripted_agent/common/tag_utils.py +48 -0
- cogames_agents/policy/scripted_agent/demo_policy.py +242 -0
- cogames_agents/policy/scripted_agent/pathfinding.py +126 -0
- cogames_agents/policy/scripted_agent/pinky/DESIGN.md +317 -0
- cogames_agents/policy/scripted_agent/pinky/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/__init__.py +17 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/aligner.py +400 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/base.py +119 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/miner.py +632 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/scout.py +138 -0
- cogames_agents/policy/scripted_agent/pinky/behaviors/scrambler.py +433 -0
- cogames_agents/policy/scripted_agent/pinky/policy.py +570 -0
- cogames_agents/policy/scripted_agent/pinky/services/__init__.py +7 -0
- cogames_agents/policy/scripted_agent/pinky/services/map_tracker.py +808 -0
- cogames_agents/policy/scripted_agent/pinky/services/navigator.py +864 -0
- cogames_agents/policy/scripted_agent/pinky/services/safety.py +189 -0
- cogames_agents/policy/scripted_agent/pinky/state.py +299 -0
- cogames_agents/policy/scripted_agent/pinky/types.py +138 -0
- cogames_agents/policy/scripted_agent/planky/CLAUDE.md +124 -0
- cogames_agents/policy/scripted_agent/planky/IMPROVEMENTS.md +160 -0
- cogames_agents/policy/scripted_agent/planky/NOTES.md +153 -0
- cogames_agents/policy/scripted_agent/planky/PLAN.md +254 -0
- cogames_agents/policy/scripted_agent/planky/README.md +214 -0
- cogames_agents/policy/scripted_agent/planky/STRATEGY.md +100 -0
- cogames_agents/policy/scripted_agent/planky/__init__.py +5 -0
- cogames_agents/policy/scripted_agent/planky/context.py +68 -0
- cogames_agents/policy/scripted_agent/planky/entity_map.py +152 -0
- cogames_agents/policy/scripted_agent/planky/goal.py +107 -0
- cogames_agents/policy/scripted_agent/planky/goals/__init__.py +27 -0
- cogames_agents/policy/scripted_agent/planky/goals/aligner.py +168 -0
- cogames_agents/policy/scripted_agent/planky/goals/gear.py +179 -0
- cogames_agents/policy/scripted_agent/planky/goals/miner.py +416 -0
- cogames_agents/policy/scripted_agent/planky/goals/scout.py +40 -0
- cogames_agents/policy/scripted_agent/planky/goals/scrambler.py +174 -0
- cogames_agents/policy/scripted_agent/planky/goals/shared.py +160 -0
- cogames_agents/policy/scripted_agent/planky/goals/stem.py +49 -0
- cogames_agents/policy/scripted_agent/planky/goals/survive.py +96 -0
- cogames_agents/policy/scripted_agent/planky/navigator.py +388 -0
- cogames_agents/policy/scripted_agent/planky/obs_parser.py +238 -0
- cogames_agents/policy/scripted_agent/planky/policy.py +485 -0
- cogames_agents/policy/scripted_agent/planky/tests/__init__.py +0 -0
- cogames_agents/policy/scripted_agent/planky/tests/conftest.py +66 -0
- cogames_agents/policy/scripted_agent/planky/tests/helpers.py +152 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_aligner.py +24 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_miner.py +30 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_scout.py +15 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_scrambler.py +29 -0
- cogames_agents/policy/scripted_agent/planky/tests/test_stem.py +36 -0
- cogames_agents/policy/scripted_agent/planky/trace.py +69 -0
- cogames_agents/policy/scripted_agent/types.py +239 -0
- cogames_agents/policy/scripted_agent/unclipping_agent.py +461 -0
- cogames_agents/policy/scripted_agent/utils.py +381 -0
- cogames_agents/policy/scripted_registry.py +80 -0
- cogames_agents/py.typed +0 -0
- cogames_agents-0.0.0.7.dist-info/METADATA +98 -0
- cogames_agents-0.0.0.7.dist-info/RECORD +128 -0
- cogames_agents-0.0.0.7.dist-info/WHEEL +6 -0
- cogames_agents-0.0.0.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# much simpler evaluator for thinky agents.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from typing import Dict, List, Tuple
|
|
7
|
+
|
|
8
|
+
import cogames_agents.policy.nim_agents.agents as na
|
|
9
|
+
from cogames.cli.utils import suppress_noisy_logs
|
|
10
|
+
from cogames.cogs_vs_clips.evals.diagnostic_evals import DIAGNOSTIC_EVALS
|
|
11
|
+
from cogames.cogs_vs_clips.mission import Mission, NumCogsVariant
|
|
12
|
+
from mettagrid.policy.loader import initialize_or_load_policy
|
|
13
|
+
from mettagrid.policy.policy import PolicySpec
|
|
14
|
+
from mettagrid.policy.policy_env_interface import PolicyEnvInterface
|
|
15
|
+
from mettagrid.simulator.rollout import Rollout
|
|
16
|
+
|
|
17
|
+
# Agent to evaluate
|
|
18
|
+
AGENT_PATH = "cogames_agents.policy.nim_agents.agents.ThinkyAgentsMultiPolicy"
|
|
19
|
+
|
|
20
|
+
# Defaults (keep simple)
|
|
21
|
+
NUM_COGS = 4
|
|
22
|
+
MAX_STEPS = 10000
|
|
23
|
+
SEED = 42
|
|
24
|
+
|
|
25
|
+
# Add/modify your evals here over time
|
|
26
|
+
EVALS: List[Tuple[str, str, int]] = [
|
|
27
|
+
# Regular evals
|
|
28
|
+
(
|
|
29
|
+
"energy_starved",
|
|
30
|
+
"buggy",
|
|
31
|
+
NUM_COGS,
|
|
32
|
+
), # E is very hard, max E is 256, but agents think its 100.
|
|
33
|
+
("oxygen_bottleneck", "", NUM_COGS),
|
|
34
|
+
("collect_resources_classic", "", NUM_COGS),
|
|
35
|
+
("collect_resources_spread", "", NUM_COGS),
|
|
36
|
+
("collect_far", "", NUM_COGS),
|
|
37
|
+
("divide_and_conquer", "", NUM_COGS),
|
|
38
|
+
("go_together", "", NUM_COGS),
|
|
39
|
+
("single_use_swarm", "flakey", NUM_COGS),
|
|
40
|
+
# Diagnostic evals
|
|
41
|
+
("diagnostic_chest_navigation1", "", 1),
|
|
42
|
+
("diagnostic_chest_navigation2", "", 1),
|
|
43
|
+
("diagnostic_chest_navigation3", "", 1),
|
|
44
|
+
("diagnostic_chest_deposit_near", "", 1),
|
|
45
|
+
("diagnostic_chest_deposit_search", "", 1),
|
|
46
|
+
("diagnostic_charge_up", "buggy", 1), # The cog needs to sacrifice itself to make hart.
|
|
47
|
+
("diagnostic_memory", "", 1),
|
|
48
|
+
("diagnostic_assemble_seeded_near", "", 1),
|
|
49
|
+
("diagnostic_assemble_seeded_search", "", 1),
|
|
50
|
+
("diagnostic_extract_missing_carbon", "", 1),
|
|
51
|
+
("diagnostic_extract_missing_oxygen", "", 1),
|
|
52
|
+
("diagnostic_extract_missing_germanium", "", 1),
|
|
53
|
+
("diagnostic_extract_missing_silicon", "", 1),
|
|
54
|
+
("diagnostic_agile", "", 1),
|
|
55
|
+
("diagnostic_radial", "", 1),
|
|
56
|
+
# Hello World evals
|
|
57
|
+
("distant_resources", "buggy", NUM_COGS), # Not enough time for such distances.
|
|
58
|
+
("quadrant_buildings", "buggy", NUM_COGS), # Not enough junction for such distances.
|
|
59
|
+
("vibe_check", "", NUM_COGS),
|
|
60
|
+
("oxygen_bottleneck_easy", "", NUM_COGS),
|
|
61
|
+
("oxygen_bottleneck_standard", "", NUM_COGS),
|
|
62
|
+
("oxygen_bottleneck_hard", "buggy", NUM_COGS), # Not enough junction for such distances.
|
|
63
|
+
("energy_starved_easy", "", NUM_COGS),
|
|
64
|
+
("energy_starved_standard", "buggy", NUM_COGS), # E drain too high.
|
|
65
|
+
("energy_starved_hard", "buggy", NUM_COGS), # E drain too high.
|
|
66
|
+
("unclipping_easy", "n/a", NUM_COGS),
|
|
67
|
+
("unclipping_standard", "n/a", NUM_COGS),
|
|
68
|
+
("unclipping_hard", "n/a", NUM_COGS),
|
|
69
|
+
("distant_resources_easy", "", NUM_COGS),
|
|
70
|
+
("distant_resources_standard", "flakey", NUM_COGS), # Not enough time for such distances.
|
|
71
|
+
("distant_resources_hard", "buggy", NUM_COGS), # Not enough time for such distances.
|
|
72
|
+
("quadrant_buildings_easy", "", NUM_COGS),
|
|
73
|
+
("quadrant_buildings_standard", "buggy", NUM_COGS), # Not enough junction for such distances.
|
|
74
|
+
("quadrant_buildings_hard", "buggy", NUM_COGS), # Not enough junction for such distances.
|
|
75
|
+
("single_use_swarm_easy", "buggy", NUM_COGS),
|
|
76
|
+
("single_use_swarm_standard", "buggy", NUM_COGS), # Not enough time for such distances.
|
|
77
|
+
("single_use_swarm_hard", "buggy", NUM_COGS), # E drain too high.
|
|
78
|
+
("vibe_check_easy", "buggy", NUM_COGS), # No/invalid recipes available.
|
|
79
|
+
("vibe_check_standard", "", NUM_COGS),
|
|
80
|
+
("vibe_check_hard", "flakey", NUM_COGS), # Not enough time for such distances.
|
|
81
|
+
# Hearts evals
|
|
82
|
+
("easy_large_hearts", "slow", NUM_COGS),
|
|
83
|
+
("easy_medium_hearts", "", NUM_COGS),
|
|
84
|
+
("easy_small_hearts", "flakey", NUM_COGS),
|
|
85
|
+
# Missions from missions.py
|
|
86
|
+
("harvest", "", NUM_COGS),
|
|
87
|
+
("repair", "", 2), # repair uses 2 cogs
|
|
88
|
+
("hello_world_unclip", "", NUM_COGS),
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _load_all_missions() -> Dict[str, Mission]:
|
|
93
|
+
# Minimal loader: merge all known mission sets
|
|
94
|
+
from importlib import import_module
|
|
95
|
+
|
|
96
|
+
missions: List[Mission] = []
|
|
97
|
+
for mod_name in (
|
|
98
|
+
"cogames.cogs_vs_clips.evals.integrated_evals",
|
|
99
|
+
"cogames.cogs_vs_clips.evals.spanning_evals",
|
|
100
|
+
"cogames.cogs_vs_clips.missions",
|
|
101
|
+
):
|
|
102
|
+
try:
|
|
103
|
+
mod = import_module(mod_name)
|
|
104
|
+
# missions.py uses MISSIONS, others use EVAL_MISSIONS
|
|
105
|
+
eval_list = getattr(mod, "MISSIONS", getattr(mod, "EVAL_MISSIONS", []))
|
|
106
|
+
missions.extend(eval_list)
|
|
107
|
+
except Exception:
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
# Diagnostic evals are a list of classes; instantiate them
|
|
111
|
+
try:
|
|
112
|
+
missions.extend([cls() for cls in DIAGNOSTIC_EVALS]) # type: ignore[misc]
|
|
113
|
+
except Exception:
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
# Build name -> mission instance map
|
|
117
|
+
mission_map: Dict[str, Mission] = {}
|
|
118
|
+
for m in missions:
|
|
119
|
+
# Items in EVAL_MISSIONS may be classes or instances; normalize to instances
|
|
120
|
+
try:
|
|
121
|
+
mission: Mission = m() if isinstance(m, type) else m # type: ignore[call-arg,assignment]
|
|
122
|
+
except Exception:
|
|
123
|
+
continue
|
|
124
|
+
mission_map[mission.name] = mission
|
|
125
|
+
return mission_map
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _ensure_vibe_supports_gear(env_cfg) -> None:
|
|
129
|
+
# Keep minimal and silent if anything fails
|
|
130
|
+
try:
|
|
131
|
+
hub = env_cfg.game.objects.get("hub")
|
|
132
|
+
uses_gear = False
|
|
133
|
+
if hub is not None and hasattr(hub, "protocols"):
|
|
134
|
+
for proto in hub.protocols:
|
|
135
|
+
if any(v == "gear" for v in getattr(proto, "vibes", [])):
|
|
136
|
+
uses_gear = True
|
|
137
|
+
break
|
|
138
|
+
if uses_gear:
|
|
139
|
+
change_vibe = env_cfg.game.actions.change_vibe
|
|
140
|
+
has_gear = any(v.name == "gear" for v in change_vibe.vibes)
|
|
141
|
+
if not has_gear:
|
|
142
|
+
from mettagrid.config.vibes import VIBE_BY_NAME
|
|
143
|
+
|
|
144
|
+
change_vibe.vibes = list(change_vibe.vibes) + [VIBE_BY_NAME["gear"]]
|
|
145
|
+
except Exception:
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def run_eval(experiment_name: str, tag: str, mission_map: Dict[str, Mission], num_cogs: int, seed: int) -> float:
|
|
150
|
+
start = time.perf_counter()
|
|
151
|
+
try:
|
|
152
|
+
if experiment_name not in mission_map:
|
|
153
|
+
print(f"{tag:<6} {experiment_name:<40} {'MISSION NOT FOUND':>6}")
|
|
154
|
+
return 0.0
|
|
155
|
+
|
|
156
|
+
base_mission = mission_map[experiment_name]
|
|
157
|
+
mission = base_mission.with_variants([NumCogsVariant(num_cogs=num_cogs)])
|
|
158
|
+
|
|
159
|
+
env_cfg = mission.make_env()
|
|
160
|
+
_ensure_vibe_supports_gear(env_cfg)
|
|
161
|
+
env_cfg.game.max_steps = MAX_STEPS
|
|
162
|
+
|
|
163
|
+
# Create policy and rollout
|
|
164
|
+
pei = PolicyEnvInterface.from_mg_cfg(env_cfg)
|
|
165
|
+
policy = initialize_or_load_policy(
|
|
166
|
+
pei,
|
|
167
|
+
PolicySpec(class_path=AGENT_PATH, data_path=None),
|
|
168
|
+
)
|
|
169
|
+
agent_policies = [policy.agent_policy(i) for i in range(num_cogs)]
|
|
170
|
+
|
|
171
|
+
rollout = Rollout(
|
|
172
|
+
env_cfg,
|
|
173
|
+
agent_policies,
|
|
174
|
+
render_mode="none",
|
|
175
|
+
seed=seed,
|
|
176
|
+
)
|
|
177
|
+
rollout.run_until_done()
|
|
178
|
+
|
|
179
|
+
total_reward = float(sum(rollout._sim.episode_rewards))
|
|
180
|
+
hearts_per_agent = total_reward / max(1, num_cogs)
|
|
181
|
+
elapsed = time.perf_counter() - start
|
|
182
|
+
|
|
183
|
+
# One simple line per eval
|
|
184
|
+
hpa = f"{hearts_per_agent:.2f}"
|
|
185
|
+
tm = f"{elapsed:.2f}"
|
|
186
|
+
print(f"{tag:<6} {experiment_name:<40} {hpa:>6}h {tm:>6}s")
|
|
187
|
+
return hearts_per_agent
|
|
188
|
+
except Exception as e:
|
|
189
|
+
elapsed = time.perf_counter() - start
|
|
190
|
+
error_message = str(e)
|
|
191
|
+
print(f"{tag:<6} {experiment_name:<40} {error_message}")
|
|
192
|
+
return 0.0
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def main() -> None:
|
|
196
|
+
suppress_noisy_logs()
|
|
197
|
+
na.start_measure()
|
|
198
|
+
mission_map = _load_all_missions()
|
|
199
|
+
print(f"Loaded {len(mission_map)} missions")
|
|
200
|
+
print("tag .. map name ............................... harts/A .. time")
|
|
201
|
+
start = time.perf_counter()
|
|
202
|
+
total_hpa = 0.0
|
|
203
|
+
successful_evals = 0
|
|
204
|
+
num_evals = 0
|
|
205
|
+
for experiment_name, tag, num_cogs in EVALS:
|
|
206
|
+
num_evals += 1
|
|
207
|
+
if tag == "flakey":
|
|
208
|
+
for i in range(10):
|
|
209
|
+
hpa = run_eval(experiment_name, tag, mission_map, num_cogs, SEED + i)
|
|
210
|
+
if hpa > 0:
|
|
211
|
+
successful_evals += 1
|
|
212
|
+
total_hpa += hpa
|
|
213
|
+
break
|
|
214
|
+
else:
|
|
215
|
+
hpa = run_eval(experiment_name, tag, mission_map, num_cogs, SEED)
|
|
216
|
+
if hpa > 0:
|
|
217
|
+
successful_evals += 1
|
|
218
|
+
total_hpa += hpa
|
|
219
|
+
success_rate = successful_evals / num_evals
|
|
220
|
+
elapsed = time.perf_counter() - start
|
|
221
|
+
total_evals = f"{num_evals} evals {success_rate * 100:.1f}% successful"
|
|
222
|
+
hpa = f"{total_hpa:.2f}"
|
|
223
|
+
tm = f"{elapsed:.2f}"
|
|
224
|
+
tag = "total"
|
|
225
|
+
print(f"{tag:<6} {total_evals:<40} {hpa:>6}h {tm:>6}s")
|
|
226
|
+
na.end_measure()
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
if __name__ == "__main__":
|
|
230
|
+
main()
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
# Scripted Agent Policies (cogames-agents)
|
|
2
|
+
|
|
3
|
+
This file mirrors the scripted-agent reference used by the `cogames` CLI docs, kept here so the package ships the full
|
|
4
|
+
details alongside the implementations.
|
|
5
|
+
|
|
6
|
+
Teaching-friendly scripted agents for CoGames evaluation and ablation studies, plus a tiny demo policy and the CogsGuard
|
|
7
|
+
team-play scripted policy.
|
|
8
|
+
|
|
9
|
+
## Overview
|
|
10
|
+
|
|
11
|
+
This package provides the CogsGuard team-play policy, two progressively capable scripted agents, and one tiny demo
|
|
12
|
+
policy:
|
|
13
|
+
|
|
14
|
+
1. **CogsGuard** - Vibe-based multi-role policy for the CogsGuard arena
|
|
15
|
+
2. **BaselineAgent** - Core functionality: exploration, resource gathering, heart assembly (single/multi-agent)
|
|
16
|
+
3. **UnclippingAgent** - Extends BaselineAgent with extractor unclipping capability
|
|
17
|
+
|
|
18
|
+
## Architecture
|
|
19
|
+
|
|
20
|
+
### File Structure
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
scripted_agent/
|
|
24
|
+
├── cogsguard/ # CogsGuard scripted policy (vibe-based roles)
|
|
25
|
+
├── baseline_agent.py # Base agent + BaselinePolicy wrapper
|
|
26
|
+
├── unclipping_agent.py # Unclipping extension + UnclippingPolicy wrapper
|
|
27
|
+
├── demo_policy.py # Tiny demo policy (short name: tiny_baseline)
|
|
28
|
+
├── pathfinding.py # Pathfinding utilities (shared)
|
|
29
|
+
└── README.md # This documentation
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Each agent file contains:
|
|
33
|
+
|
|
34
|
+
- Agent class with core logic and state management
|
|
35
|
+
- Policy wrapper classes at the bottom for CLI integration
|
|
36
|
+
|
|
37
|
+
### Design Philosophy
|
|
38
|
+
|
|
39
|
+
These agents are designed for **ablation studies** and **baseline evaluation**:
|
|
40
|
+
|
|
41
|
+
- Simple, readable implementations
|
|
42
|
+
- Clear separation of capabilities
|
|
43
|
+
- Minimal dependencies
|
|
44
|
+
|
|
45
|
+
## Agents
|
|
46
|
+
|
|
47
|
+
### 1. CogsGuard Scripted Agent
|
|
48
|
+
|
|
49
|
+
CogsGuard is the team-play focus for scripted policies. Agents are controlled by **vibes** that map to roles and gear
|
|
50
|
+
acquisition.
|
|
51
|
+
|
|
52
|
+
**Vibes**:
|
|
53
|
+
|
|
54
|
+
| Vibe | Behavior |
|
|
55
|
+
| ----------- | ---------------------------------------- |
|
|
56
|
+
| `default` | Idle (noop) |
|
|
57
|
+
| `heart` | Idle (noop) |
|
|
58
|
+
| `gear` | Smart role selection |
|
|
59
|
+
| `miner` | Gather and deposit resources |
|
|
60
|
+
| `scout` | Explore and discover structures |
|
|
61
|
+
| `aligner` | Align neutral supply depots to cogs |
|
|
62
|
+
| `scrambler` | Scramble clips-aligned depots to neutral |
|
|
63
|
+
|
|
64
|
+
**Gear costs** (paid from cogs commons):
|
|
65
|
+
|
|
66
|
+
| Gear | Cost | Bonus |
|
|
67
|
+
| --------- | ------------------------------------------ | -------------------- |
|
|
68
|
+
| Miner | 3 carbon, 1 oxygen, 1 germanium, 1 silicon | +40 cargo |
|
|
69
|
+
| Scout | 1 carbon, 1 oxygen, 1 germanium, 3 silicon | +100 energy, +400 HP |
|
|
70
|
+
| Aligner | 3 carbon, 1 oxygen, 1 germanium, 1 silicon | +20 influence |
|
|
71
|
+
| Scrambler | 1 carbon, 3 oxygen, 1 germanium, 1 silicon | +200 HP |
|
|
72
|
+
|
|
73
|
+
**Supply depots** start clips-aligned. Scramblers neutralize them; aligners convert neutral depots to cogs for AOE
|
|
74
|
+
energy regen.
|
|
75
|
+
|
|
76
|
+
**Usage**:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Default role distribution (1 scrambler, 4 miners, rest smart-gear)
|
|
80
|
+
./tools/run.py recipes.experiment.cogsguard.play policy_uri=metta://policy/role
|
|
81
|
+
|
|
82
|
+
# Custom role counts
|
|
83
|
+
./tools/run.py recipes.experiment.cogsguard.play \
|
|
84
|
+
policy_uri="metta://policy/role?miner=3&scout=2&aligner=2&scrambler=3"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Full documentation**: `cogsguard/README.md`
|
|
88
|
+
|
|
89
|
+
### 2. BaselineAgent
|
|
90
|
+
|
|
91
|
+
**Purpose**: Minimal working agent for single/multi-agent missions
|
|
92
|
+
|
|
93
|
+
**Capabilities**:
|
|
94
|
+
|
|
95
|
+
- ✅ Visual discovery (explores to find stations and extractors)
|
|
96
|
+
- ✅ Resource gathering (navigates to extractors, handles cooldowns)
|
|
97
|
+
- ✅ Heart assembly (deposits resources at hub)
|
|
98
|
+
- ✅ Heart delivery (brings hearts to chest)
|
|
99
|
+
- ✅ Energy management (recharges when low)
|
|
100
|
+
- ✅ Extractor tracking (remembers positions, cooldowns, remaining uses)
|
|
101
|
+
- ✅ Agent occupancy avoidance (multi-agent collision avoidance via pathfinding)
|
|
102
|
+
|
|
103
|
+
**Limitations**:
|
|
104
|
+
|
|
105
|
+
- ❌ No unclipping support (can't handle clipped extractors)
|
|
106
|
+
- ⚠️ Multi-agent coordination is basic (agents avoid each other but don't explicitly coordinate)
|
|
107
|
+
|
|
108
|
+
**Usage**:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from cogames_agents.policy.scripted_agent.baseline_agent import BaselinePolicy
|
|
112
|
+
from mettagrid import MettaGridEnv
|
|
113
|
+
|
|
114
|
+
env = MettaGridEnv(env_config)
|
|
115
|
+
policy = BaselinePolicy(env)
|
|
116
|
+
|
|
117
|
+
obs, info = env.reset()
|
|
118
|
+
policy.reset(obs, info)
|
|
119
|
+
|
|
120
|
+
agent = policy.agent_policy(0)
|
|
121
|
+
action = agent.step(obs[0])
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**CLI**:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
# Single agent
|
|
128
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 1
|
|
129
|
+
|
|
130
|
+
# Multi-agent
|
|
131
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 4
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### 3. UnclippingAgent
|
|
135
|
+
|
|
136
|
+
**Purpose**: Handle missions with clipped extractors
|
|
137
|
+
|
|
138
|
+
**Extends BaselineAgent with**:
|
|
139
|
+
|
|
140
|
+
- ✅ Clipped extractor detection
|
|
141
|
+
- ✅ Unclip item crafting
|
|
142
|
+
- ✅ Extractor restoration
|
|
143
|
+
- ✅ Resource deficit management (ensures enough resources for both unclipping and hearts)
|
|
144
|
+
|
|
145
|
+
**Unclip Item Mapping**: | Clipped Resource | Unclip Item | Crafted From | Glyph |
|
|
146
|
+
|-----------------|-------------|--------------|-------| | Oxygen | decoder | carbon | gear | | Carbon | modulator |
|
|
147
|
+
oxygen | gear | | Germanium | resonator | silicon | gear | | Silicon | scrambler | germanium | gear |
|
|
148
|
+
|
|
149
|
+
**Workflow**:
|
|
150
|
+
|
|
151
|
+
1. Detects clipped extractor blocking progress
|
|
152
|
+
2. Gathers craft resource (e.g., carbon for decoder)
|
|
153
|
+
3. Changes glyph to "gear"
|
|
154
|
+
4. Crafts unclip item at hub
|
|
155
|
+
5. Navigates to clipped extractor
|
|
156
|
+
6. Uses item to unclip
|
|
157
|
+
7. Resumes normal gathering
|
|
158
|
+
|
|
159
|
+
**Usage**:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
from cogames_agents.policy.scripted_agent.unclipping_agent import UnclippingPolicy
|
|
163
|
+
|
|
164
|
+
policy = UnclippingPolicy(env)
|
|
165
|
+
# ... same as BaselinePolicy
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### 4. TinyBaseline (demo policy)
|
|
169
|
+
|
|
170
|
+
**Purpose**: Minimal, readable demo policy used for quick experiments.
|
|
171
|
+
|
|
172
|
+
**Short name**: `tiny_baseline` (defined in `demo_policy.py`).
|
|
173
|
+
|
|
174
|
+
## StarterAgent
|
|
175
|
+
|
|
176
|
+
**Purpose**: Intro-friendly agent that mirrors the high-level flow described in docs.
|
|
177
|
+
|
|
178
|
+
**Decision tree**:
|
|
179
|
+
|
|
180
|
+
1. Low energy → go recharge
|
|
181
|
+
2. Carrying a heart → deliver it
|
|
182
|
+
3. Have all recipe inputs → assemble
|
|
183
|
+
4. Otherwise → gather missing resources in a fixed order (carbon, oxygen, germanium, silicon)
|
|
184
|
+
|
|
185
|
+
**Why it exists**: Shows the simplest possible if/else controller that still completes missions, ideal for external
|
|
186
|
+
readers who want a tiny, readable starting point before diving into the full Baseline/Unclipping logic.
|
|
187
|
+
|
|
188
|
+
**Location**: The starter policy lives in the core `cogames` package at `cogames.policy.starter_agent` so it is always
|
|
189
|
+
available without installing `cogames-agents`.
|
|
190
|
+
|
|
191
|
+
## Shared Components
|
|
192
|
+
|
|
193
|
+
### Phase System
|
|
194
|
+
|
|
195
|
+
All agents use a phase-based state machine:
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
class Phase(Enum):
|
|
199
|
+
GATHER = "gather" # Collecting resources
|
|
200
|
+
ASSEMBLE = "assemble" # Crafting heart at hub
|
|
201
|
+
DELIVER = "deliver" # Bringing heart to chest
|
|
202
|
+
RECHARGE = "recharge" # Restoring energy
|
|
203
|
+
CRAFT_UNCLIP = "craft_unclip" # UnclippingAgent only
|
|
204
|
+
UNCLIP = "unclip" # UnclippingAgent only
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Navigation
|
|
208
|
+
|
|
209
|
+
Shared `pathfinding.py` module provides:
|
|
210
|
+
|
|
211
|
+
- **BFS pathfinding** with occupancy grid
|
|
212
|
+
- **Greedy fallback** when path blocked
|
|
213
|
+
- **Adjacent positioning** for station interactions
|
|
214
|
+
- **Agent occupancy avoidance** for multi-agent scenarios
|
|
215
|
+
|
|
216
|
+
### Observation Parsing
|
|
217
|
+
|
|
218
|
+
Agents parse egocentric observations (11×11 grid) to detect:
|
|
219
|
+
|
|
220
|
+
- Stations (hub, chest, junction, extractors)
|
|
221
|
+
- Other agents
|
|
222
|
+
- Walls and obstacles
|
|
223
|
+
- Agent state (resources, energy, inventory)
|
|
224
|
+
|
|
225
|
+
### Extractor Tracking
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
@dataclass
|
|
229
|
+
class ExtractorInfo:
|
|
230
|
+
position: tuple[int, int]
|
|
231
|
+
resource_type: str # "carbon", "oxygen", "germanium", "silicon"
|
|
232
|
+
remaining_uses: int
|
|
233
|
+
clipped: bool # For UnclippingAgent
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Testing
|
|
237
|
+
|
|
238
|
+
### Quick Tests
|
|
239
|
+
|
|
240
|
+
#### BaselineAgent (Diagnostic Missions)
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
# Basic diagnostic (single agent)
|
|
244
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 1 --steps 1000
|
|
245
|
+
|
|
246
|
+
# Chest navigation
|
|
247
|
+
uv run cogames play --mission evals.diagnostic_chest_navigation1 -p baseline --cogs 1 --steps 1000
|
|
248
|
+
|
|
249
|
+
# Resource extraction
|
|
250
|
+
uv run cogames play --mission evals.diagnostic_extract_missing_oxygen -p baseline --cogs 1 --steps 1000
|
|
251
|
+
|
|
252
|
+
# Hard version
|
|
253
|
+
uv run cogames play --mission evals.diagnostic_radial_hard -p baseline --cogs 1 --steps 2000
|
|
254
|
+
|
|
255
|
+
# Multi-agent (2, 4 agents)
|
|
256
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 2 --steps 1500
|
|
257
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 4 --steps 2000
|
|
258
|
+
|
|
259
|
+
# Assembly test
|
|
260
|
+
uv run cogames play --mission evals.diagnostic_assemble_seeded_search -p baseline --cogs 1 --steps 1000
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### Comprehensive Evaluation
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
# Run full evaluation suite
|
|
267
|
+
uv run python packages/cogames/scripts/run_evaluation.py --policy ladybug
|
|
268
|
+
|
|
269
|
+
# Evaluate specific agent
|
|
270
|
+
uv run python packages/cogames/scripts/run_evaluation.py --policy baseline
|
|
271
|
+
uv run python packages/cogames/scripts/run_evaluation.py --policy ladybug
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## Evaluation Results
|
|
275
|
+
|
|
276
|
+
**Summary**:
|
|
277
|
+
|
|
278
|
+
- **BaselineAgent**: Works best for non-clipped missions with straightforward resource gathering
|
|
279
|
+
- **UnclippingAgent**: Best overall performance, handles clipping scenarios well
|
|
280
|
+
|
|
281
|
+
## Extending
|
|
282
|
+
|
|
283
|
+
### Adding New Agent Capabilities
|
|
284
|
+
|
|
285
|
+
To create a new agent variant:
|
|
286
|
+
|
|
287
|
+
1. **Create new file** (e.g., `my_agent.py`)
|
|
288
|
+
2. **Extend base class**:
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
from .baseline_agent import BaselineAgent, SimpleAgentState
|
|
292
|
+
|
|
293
|
+
class MyAgent(BaselineAgent):
|
|
294
|
+
def _update_phase(self, s: SimpleAgentState) -> None:
|
|
295
|
+
# Add custom phase logic
|
|
296
|
+
super()._update_phase(s)
|
|
297
|
+
|
|
298
|
+
def _execute_phase(self, s: SimpleAgentState) -> int:
|
|
299
|
+
# Add custom phase execution
|
|
300
|
+
return super()._execute_phase(s)
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
3. **Add policy wrapper** at bottom of file:
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
class MyAgentPolicy:
|
|
307
|
+
"""Per-agent policy wrapper."""
|
|
308
|
+
def __init__(self, impl: MyAgent, agent_id: int):
|
|
309
|
+
self._impl = impl
|
|
310
|
+
self._agent_id = agent_id
|
|
311
|
+
|
|
312
|
+
def step(self, obs) -> int:
|
|
313
|
+
return self._impl.step(self._agent_id, obs)
|
|
314
|
+
|
|
315
|
+
class MyPolicy:
|
|
316
|
+
"""Policy wrapper for MyAgent."""
|
|
317
|
+
def __init__(self, simulation=None):
|
|
318
|
+
self._simulation = simulation
|
|
319
|
+
self._impl = None
|
|
320
|
+
self._agent_policies = {}
|
|
321
|
+
|
|
322
|
+
def reset(self, obs, info):
|
|
323
|
+
# Initialize impl from simulation
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
def agent_policy(self, agent_id: int):
|
|
327
|
+
# Return per-agent policy
|
|
328
|
+
pass
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
4. **Register in `__init__.py`**:
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
from cogames_agents.policy.scripted_agent.my_agent import MyPolicy
|
|
335
|
+
|
|
336
|
+
__all__ = [..., "MyPolicy"]
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
### Resource Management
|
|
340
|
+
|
|
341
|
+
Agents track deficits and gather in priority order:
|
|
342
|
+
|
|
343
|
+
1. Germanium (5 needed, highest priority)
|
|
344
|
+
2. Silicon (50 needed)
|
|
345
|
+
3. Carbon (20 needed)
|
|
346
|
+
4. Oxygen (20 needed)
|
|
347
|
+
|
|
348
|
+
UnclippingAgent adds special logic:
|
|
349
|
+
|
|
350
|
+
- Ensures enough craft resource for both unclipping AND hearts
|
|
351
|
+
- Prevents resource deficits when crafting decoders
|
|
352
|
+
|
|
353
|
+
## Future Work
|
|
354
|
+
|
|
355
|
+
- [ ] Dynamic heart recipe detection
|
|
356
|
+
- [ ] Charger clipping strategies
|
|
357
|
+
- [ ] Clip spread handling
|
|
358
|
+
- [ ] Learned extractor efficiency
|
|
359
|
+
- [ ] Advanced multi-agent coordination (task assignment, resource reservation)
|
|
360
|
+
- [ ] Frontier-based exploration improvements
|
|
File without changes
|