cogames 0.3.49__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cogames/cli/client.py +60 -6
- cogames/cli/docsync/__init__.py +0 -0
- cogames/cli/docsync/_nb_md_directive_processing.py +180 -0
- cogames/cli/docsync/_nb_md_sync.py +103 -0
- cogames/cli/docsync/_nb_py_sync.py +122 -0
- cogames/cli/docsync/_three_way_sync.py +115 -0
- cogames/cli/docsync/_utils.py +76 -0
- cogames/cli/docsync/docsync.py +156 -0
- cogames/cli/leaderboard.py +112 -28
- cogames/cli/mission.py +64 -53
- cogames/cli/policy.py +46 -10
- cogames/cli/submit.py +268 -67
- cogames/cogs_vs_clips/cog.py +79 -0
- cogames/cogs_vs_clips/cogs_vs_clips_mapgen.md +19 -16
- cogames/cogs_vs_clips/cogsguard_reward_variants.py +153 -0
- cogames/cogs_vs_clips/cogsguard_tutorial.py +56 -0
- cogames/cogs_vs_clips/evals/README.md +10 -16
- cogames/cogs_vs_clips/evals/cogsguard_evals.py +81 -0
- cogames/cogs_vs_clips/evals/diagnostic_evals.py +49 -444
- cogames/cogs_vs_clips/evals/difficulty_variants.py +13 -326
- cogames/cogs_vs_clips/evals/integrated_evals.py +5 -45
- cogames/cogs_vs_clips/evals/spanning_evals.py +9 -180
- cogames/cogs_vs_clips/mission.py +187 -146
- cogames/cogs_vs_clips/missions.py +46 -137
- cogames/cogs_vs_clips/procedural.py +8 -8
- cogames/cogs_vs_clips/sites.py +107 -3
- cogames/cogs_vs_clips/stations.py +198 -186
- cogames/cogs_vs_clips/tutorial_missions.py +1 -1
- cogames/cogs_vs_clips/variants.py +25 -476
- cogames/device.py +13 -1
- cogames/{policy/scripted_agent/README.md → docs/SCRIPTED_AGENT.md} +82 -58
- cogames/evaluate.py +18 -30
- cogames/main.py +1434 -243
- cogames/maps/canidate1_1000.map +1 -1
- cogames/maps/canidate1_1000_stations.map +2 -2
- cogames/maps/canidate1_500.map +1 -1
- cogames/maps/canidate1_500_stations.map +2 -2
- cogames/maps/canidate2_1000.map +1 -1
- cogames/maps/canidate2_1000_stations.map +2 -2
- cogames/maps/canidate2_500.map +1 -1
- cogames/maps/canidate2_500_stations.map +2 -2
- cogames/maps/canidate3_1000.map +1 -1
- cogames/maps/canidate3_1000_stations.map +2 -2
- cogames/maps/canidate3_500.map +1 -1
- cogames/maps/canidate3_500_stations.map +2 -2
- cogames/maps/canidate4_500.map +1 -1
- cogames/maps/canidate4_500_stations.map +2 -2
- cogames/maps/cave_base_50.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_agile.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_agile_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_charge_up.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_charge_up_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation1.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation1_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation2.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation2_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation3.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation3_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_near.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_search.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_chest_search_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_extract_lab.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_extract_lab_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_memory.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_memory_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_radial.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_radial_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_resource_lab.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_unclip.map +2 -2
- cogames/maps/evals/eval_balanced_spread.map +9 -5
- cogames/maps/evals/eval_clip_oxygen.map +9 -5
- cogames/maps/evals/eval_collect_resources.map +9 -5
- cogames/maps/evals/eval_collect_resources_hard.map +9 -5
- cogames/maps/evals/eval_collect_resources_medium.map +9 -5
- cogames/maps/evals/eval_divide_and_conquer.map +9 -5
- cogames/maps/evals/eval_energy_starved.map +9 -5
- cogames/maps/evals/eval_multi_coordinated_collect_hard.map +9 -5
- cogames/maps/evals/eval_oxygen_bottleneck.map +9 -5
- cogames/maps/evals/eval_single_use_world.map +9 -5
- cogames/maps/evals/extractor_hub_100x100.map +9 -5
- cogames/maps/evals/extractor_hub_30x30.map +9 -5
- cogames/maps/evals/extractor_hub_50x50.map +9 -5
- cogames/maps/evals/extractor_hub_70x70.map +9 -5
- cogames/maps/evals/extractor_hub_80x80.map +9 -5
- cogames/maps/machina_100_stations.map +2 -2
- cogames/maps/machina_200_stations.map +2 -2
- cogames/maps/machina_200_stations_small.map +2 -2
- cogames/maps/machina_eval_exp01.map +2 -2
- cogames/maps/machina_eval_template_large.map +2 -2
- cogames/maps/machinatrainer4agents.map +2 -2
- cogames/maps/machinatrainer4agentsbase.map +2 -2
- cogames/maps/machinatrainerbig.map +2 -2
- cogames/maps/machinatrainersmall.map +2 -2
- cogames/maps/planky_evals/aligner_avoid_aoe.map +28 -0
- cogames/maps/planky_evals/aligner_full_cycle.map +28 -0
- cogames/maps/planky_evals/aligner_gear.map +24 -0
- cogames/maps/planky_evals/aligner_hearts.map +24 -0
- cogames/maps/planky_evals/aligner_junction.map +26 -0
- cogames/maps/planky_evals/exploration_distant.map +28 -0
- cogames/maps/planky_evals/maze.map +32 -0
- cogames/maps/planky_evals/miner_best_resource.map +26 -0
- cogames/maps/planky_evals/miner_deposit.map +24 -0
- cogames/maps/planky_evals/miner_extract.map +26 -0
- cogames/maps/planky_evals/miner_full_cycle.map +28 -0
- cogames/maps/planky_evals/miner_gear.map +24 -0
- cogames/maps/planky_evals/multi_role.map +28 -0
- cogames/maps/planky_evals/resource_chain.map +30 -0
- cogames/maps/planky_evals/scout_explore.map +32 -0
- cogames/maps/planky_evals/scout_gear.map +24 -0
- cogames/maps/planky_evals/scrambler_full_cycle.map +28 -0
- cogames/maps/planky_evals/scrambler_gear.map +24 -0
- cogames/maps/planky_evals/scrambler_target.map +26 -0
- cogames/maps/planky_evals/stuck_corridor.map +32 -0
- cogames/maps/planky_evals/survive_retreat.map +26 -0
- cogames/maps/training_facility_clipped.map +2 -2
- cogames/maps/training_facility_open_1.map +2 -2
- cogames/maps/training_facility_open_2.map +2 -2
- cogames/maps/training_facility_open_3.map +2 -2
- cogames/maps/training_facility_tight_4.map +2 -2
- cogames/maps/training_facility_tight_5.map +2 -2
- cogames/maps/vanilla_large.map +2 -2
- cogames/maps/vanilla_small.map +2 -2
- cogames/pickup.py +183 -0
- cogames/play.py +166 -33
- cogames/policy/chaos_monkey.py +54 -0
- cogames/policy/nim_agents/__init__.py +27 -10
- cogames/policy/nim_agents/agents.py +121 -60
- cogames/policy/nim_agents/thinky_eval.py +35 -222
- cogames/policy/pufferlib_policy.py +67 -32
- cogames/policy/starter_agent.py +184 -0
- cogames/policy/trainable_policy_template.py +4 -1
- cogames/train.py +51 -13
- cogames/verbose.py +2 -2
- cogames-0.3.64.dist-info/METADATA +1842 -0
- cogames-0.3.64.dist-info/RECORD +159 -0
- cogames-0.3.64.dist-info/licenses/LICENSE +21 -0
- cogames-0.3.64.dist-info/top_level.txt +2 -0
- metta_alo/__init__.py +0 -0
- metta_alo/job_specs.py +17 -0
- metta_alo/policy.py +16 -0
- metta_alo/pure_single_episode_runner.py +75 -0
- metta_alo/py.typed +0 -0
- metta_alo/rollout.py +322 -0
- metta_alo/scoring.py +168 -0
- cogames/maps/diagnostic_evals/diagnostic_assembler_near.map +0 -49
- cogames/maps/diagnostic_evals/diagnostic_assembler_search.map +0 -49
- cogames/maps/diagnostic_evals/diagnostic_assembler_search_hard.map +0 -89
- cogames/policy/nim_agents/common.nim +0 -887
- cogames/policy/nim_agents/install.sh +0 -1
- cogames/policy/nim_agents/ladybug_agent.nim +0 -984
- cogames/policy/nim_agents/nim_agents.nim +0 -55
- cogames/policy/nim_agents/nim_agents.nims +0 -14
- cogames/policy/nim_agents/nimby.lock +0 -3
- cogames/policy/nim_agents/racecar_agents.nim +0 -884
- cogames/policy/nim_agents/random_agents.nim +0 -68
- cogames/policy/nim_agents/test_agents.py +0 -53
- cogames/policy/nim_agents/thinky_agents.nim +0 -717
- cogames/policy/scripted_agent/baseline_agent.py +0 -1049
- cogames/policy/scripted_agent/demo_policy.py +0 -244
- cogames/policy/scripted_agent/pathfinding.py +0 -126
- cogames/policy/scripted_agent/starter_agent.py +0 -136
- cogames/policy/scripted_agent/types.py +0 -235
- cogames/policy/scripted_agent/unclipping_agent.py +0 -476
- cogames/policy/scripted_agent/utils.py +0 -385
- cogames-0.3.49.dist-info/METADATA +0 -406
- cogames-0.3.49.dist-info/RECORD +0 -136
- cogames-0.3.49.dist-info/top_level.txt +0 -1
- {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/WHEEL +0 -0
- {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/entry_points.txt +0 -0
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
# Scripted Agent Policies
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Teaching-friendly scripted agents for CoGames evaluation and ablation studies, plus a tiny demo policy and the CogsGuard
|
|
4
|
+
team-play scripted policy.
|
|
4
5
|
|
|
5
6
|
## Overview
|
|
6
7
|
|
|
7
|
-
This package provides
|
|
8
|
+
This package provides the CogsGuard team-play policy, two progressively capable scripted agents, and one tiny demo
|
|
9
|
+
policy:
|
|
8
10
|
|
|
9
|
-
1. **
|
|
10
|
-
2. **
|
|
11
|
-
|
|
11
|
+
1. **CogsGuard** - Vibe-based multi-role policy for the CogsGuard arena
|
|
12
|
+
2. **BaselineAgent** - Core functionality for legacy heart-production game: exploration, resource gathering, heart
|
|
13
|
+
assembly
|
|
14
|
+
3. **UnclippingAgent** - Extends BaselineAgent with extractor unclipping capability (legacy game)
|
|
12
15
|
|
|
13
16
|
## Architecture
|
|
14
17
|
|
|
@@ -16,11 +19,11 @@ This package provides three progressively capable scripted agents:
|
|
|
16
19
|
|
|
17
20
|
```
|
|
18
21
|
scripted_agent/
|
|
22
|
+
├── cogsguard/ # CogsGuard scripted policy (vibe-based roles)
|
|
19
23
|
├── baseline_agent.py # Base agent + BaselinePolicy wrapper
|
|
20
24
|
├── unclipping_agent.py # Unclipping extension + UnclippingPolicy wrapper
|
|
21
|
-
├──
|
|
22
|
-
|
|
23
|
-
└── README.md # This file
|
|
25
|
+
├── demo_policy.py # Tiny demo policy (short name: tiny_baseline)
|
|
26
|
+
└── pathfinding.py # Pathfinding utilities (shared)
|
|
24
27
|
```
|
|
25
28
|
|
|
26
29
|
Each agent file contains:
|
|
@@ -38,7 +41,49 @@ These agents are designed for **ablation studies** and **baseline evaluation**:
|
|
|
38
41
|
|
|
39
42
|
## Agents
|
|
40
43
|
|
|
41
|
-
### 1.
|
|
44
|
+
### 1. CogsGuard Scripted Agent
|
|
45
|
+
|
|
46
|
+
CogsGuard is the team-play focus for scripted policies. Agents are controlled by **vibes** that map to roles and gear
|
|
47
|
+
acquisition.
|
|
48
|
+
|
|
49
|
+
**Vibes**:
|
|
50
|
+
|
|
51
|
+
| Vibe | Behavior |
|
|
52
|
+
| ----------- | ---------------------------------------- |
|
|
53
|
+
| `default` | Idle (noop) |
|
|
54
|
+
| `heart` | Idle (noop) |
|
|
55
|
+
| `gear` | Smart role selection |
|
|
56
|
+
| `miner` | Gather and deposit resources |
|
|
57
|
+
| `scout` | Explore and discover structures |
|
|
58
|
+
| `aligner` | Align neutral supply depots to cogs |
|
|
59
|
+
| `scrambler` | Scramble clips-aligned depots to neutral |
|
|
60
|
+
|
|
61
|
+
**Gear costs** (paid from cogs commons):
|
|
62
|
+
|
|
63
|
+
| Gear | Cost | Bonus |
|
|
64
|
+
| --------- | ------------------------------------------ | -------------------- |
|
|
65
|
+
| Miner | 3 carbon, 1 oxygen, 1 germanium, 1 silicon | +40 cargo |
|
|
66
|
+
| Scout | 1 carbon, 1 oxygen, 1 germanium, 3 silicon | +100 energy, +400 HP |
|
|
67
|
+
| Aligner | 3 carbon, 1 oxygen, 1 germanium, 1 silicon | +20 influence |
|
|
68
|
+
| Scrambler | 1 carbon, 3 oxygen, 1 germanium, 1 silicon | +200 HP |
|
|
69
|
+
|
|
70
|
+
**Supply depots** start clips-aligned. Scramblers neutralize them; aligners convert neutral depots to cogs for AOE
|
|
71
|
+
energy regen.
|
|
72
|
+
|
|
73
|
+
**Usage**:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Default role distribution (1 scrambler, 4 miners, rest smart-gear)
|
|
77
|
+
./tools/run.py recipes.experiment.cogsguard.play policy_uri=metta://policy/role
|
|
78
|
+
|
|
79
|
+
# Custom role counts
|
|
80
|
+
./tools/run.py recipes.experiment.cogsguard.play \
|
|
81
|
+
policy_uri="metta://policy/role?miner=3&scout=2&aligner=2&scrambler=3"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Full documentation**: `packages/cogames-agents/src/cogames_agents/policy/scripted_agent/cogsguard/README.md`
|
|
85
|
+
|
|
86
|
+
### 2. BaselineAgent
|
|
42
87
|
|
|
43
88
|
**Purpose**: Minimal working agent for single/multi-agent missions
|
|
44
89
|
|
|
@@ -46,7 +91,7 @@ These agents are designed for **ablation studies** and **baseline evaluation**:
|
|
|
46
91
|
|
|
47
92
|
- ✅ Visual discovery (explores to find stations and extractors)
|
|
48
93
|
- ✅ Resource gathering (navigates to extractors, handles cooldowns)
|
|
49
|
-
- ✅ Heart assembly (deposits resources at
|
|
94
|
+
- ✅ Heart assembly (deposits resources at hub)
|
|
50
95
|
- ✅ Heart delivery (brings hearts to chest)
|
|
51
96
|
- ✅ Energy management (recharges when low)
|
|
52
97
|
- ✅ Extractor tracking (remembers positions, cooldowns, remaining uses)
|
|
@@ -60,7 +105,7 @@ These agents are designed for **ablation studies** and **baseline evaluation**:
|
|
|
60
105
|
**Usage**:
|
|
61
106
|
|
|
62
107
|
```python
|
|
63
|
-
from
|
|
108
|
+
from cogames_agents.policy.scripted_agent.baseline_agent import BaselinePolicy
|
|
64
109
|
from mettagrid import MettaGridEnv
|
|
65
110
|
|
|
66
111
|
env = MettaGridEnv(env_config)
|
|
@@ -77,13 +122,13 @@ action = agent.step(obs[0])
|
|
|
77
122
|
|
|
78
123
|
```bash
|
|
79
124
|
# Single agent
|
|
80
|
-
uv run cogames play --mission evals.diagnostic_radial -p
|
|
125
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 1
|
|
81
126
|
|
|
82
127
|
# Multi-agent
|
|
83
|
-
uv run cogames play --mission evals.diagnostic_radial -p
|
|
128
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 4
|
|
84
129
|
```
|
|
85
130
|
|
|
86
|
-
###
|
|
131
|
+
### 3. UnclippingAgent
|
|
87
132
|
|
|
88
133
|
**Purpose**: Handle missions with clipped extractors
|
|
89
134
|
|
|
@@ -103,7 +148,7 @@ oxygen | gear | | Germanium | resonator | silicon | gear | | Silicon | scrambler
|
|
|
103
148
|
1. Detects clipped extractor blocking progress
|
|
104
149
|
2. Gathers craft resource (e.g., carbon for decoder)
|
|
105
150
|
3. Changes glyph to "gear"
|
|
106
|
-
4. Crafts unclip item at
|
|
151
|
+
4. Crafts unclip item at hub
|
|
107
152
|
5. Navigates to clipped extractor
|
|
108
153
|
6. Uses item to unclip
|
|
109
154
|
7. Resumes normal gathering
|
|
@@ -111,23 +156,19 @@ oxygen | gear | | Germanium | resonator | silicon | gear | | Silicon | scrambler
|
|
|
111
156
|
**Usage**:
|
|
112
157
|
|
|
113
158
|
```python
|
|
114
|
-
from
|
|
159
|
+
from cogames_agents.policy.scripted_agent.unclipping_agent import UnclippingPolicy
|
|
115
160
|
|
|
116
161
|
policy = UnclippingPolicy(env)
|
|
117
162
|
# ... same as BaselinePolicy
|
|
118
163
|
```
|
|
119
164
|
|
|
120
|
-
|
|
165
|
+
### 4. TinyBaseline (demo policy)
|
|
121
166
|
|
|
122
|
-
|
|
123
|
-
# Test with unclipping diagnostic (single agent)
|
|
124
|
-
uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 1
|
|
167
|
+
**Purpose**: Minimal, readable demo policy used for quick experiments.
|
|
125
168
|
|
|
126
|
-
|
|
127
|
-
uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 2
|
|
128
|
-
```
|
|
169
|
+
**Short name**: `tiny_baseline` (defined in `demo_policy.py`).
|
|
129
170
|
|
|
130
|
-
|
|
171
|
+
## StarterAgent
|
|
131
172
|
|
|
132
173
|
**Purpose**: Intro-friendly agent that mirrors the high-level flow described in docs.
|
|
133
174
|
|
|
@@ -141,6 +182,9 @@ uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclippi
|
|
|
141
182
|
**Why it exists**: Shows the simplest possible if/else controller that still completes missions, ideal for external
|
|
142
183
|
readers who want a tiny, readable starting point before diving into the full Baseline/Unclipping logic.
|
|
143
184
|
|
|
185
|
+
**Location**: The starter policy lives in the core `cogames` package at `cogames.policy.starter_agent` so it is always
|
|
186
|
+
available without installing `cogames-agents`.
|
|
187
|
+
|
|
144
188
|
## Shared Components
|
|
145
189
|
|
|
146
190
|
### Phase System
|
|
@@ -150,7 +194,7 @@ All agents use a phase-based state machine:
|
|
|
150
194
|
```python
|
|
151
195
|
class Phase(Enum):
|
|
152
196
|
GATHER = "gather" # Collecting resources
|
|
153
|
-
ASSEMBLE = "assemble" # Crafting heart at
|
|
197
|
+
ASSEMBLE = "assemble" # Crafting heart at hub
|
|
154
198
|
DELIVER = "deliver" # Bringing heart to chest
|
|
155
199
|
RECHARGE = "recharge" # Restoring energy
|
|
156
200
|
CRAFT_UNCLIP = "craft_unclip" # UnclippingAgent only
|
|
@@ -170,7 +214,7 @@ Shared `pathfinding.py` module provides:
|
|
|
170
214
|
|
|
171
215
|
Agents parse egocentric observations (11×11 grid) to detect:
|
|
172
216
|
|
|
173
|
-
- Stations (
|
|
217
|
+
- Stations (hub, chest, junction, extractors)
|
|
174
218
|
- Other agents
|
|
175
219
|
- Walls and obstacles
|
|
176
220
|
- Agent state (resources, energy, inventory)
|
|
@@ -194,40 +238,23 @@ class ExtractorInfo:
|
|
|
194
238
|
|
|
195
239
|
```bash
|
|
196
240
|
# Basic diagnostic (single agent)
|
|
197
|
-
uv run cogames play --mission evals.diagnostic_radial -p
|
|
241
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 1 --steps 1000
|
|
198
242
|
|
|
199
243
|
# Chest navigation
|
|
200
|
-
uv run cogames play --mission evals.diagnostic_chest_navigation1 -p
|
|
244
|
+
uv run cogames play --mission evals.diagnostic_chest_navigation1 -p baseline --cogs 1 --steps 1000
|
|
201
245
|
|
|
202
246
|
# Resource extraction
|
|
203
|
-
uv run cogames play --mission evals.diagnostic_extract_missing_oxygen -p
|
|
247
|
+
uv run cogames play --mission evals.diagnostic_extract_missing_oxygen -p baseline --cogs 1 --steps 1000
|
|
204
248
|
|
|
205
249
|
# Hard version
|
|
206
|
-
uv run cogames play --mission evals.diagnostic_radial_hard -p
|
|
250
|
+
uv run cogames play --mission evals.diagnostic_radial_hard -p baseline --cogs 1 --steps 2000
|
|
207
251
|
|
|
208
252
|
# Multi-agent (2, 4 agents)
|
|
209
|
-
uv run cogames play --mission evals.diagnostic_radial -p
|
|
210
|
-
uv run cogames play --mission evals.diagnostic_radial -p
|
|
253
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 2 --steps 1500
|
|
254
|
+
uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 4 --steps 2000
|
|
211
255
|
|
|
212
256
|
# Assembly test
|
|
213
|
-
uv run cogames play --mission evals.
|
|
214
|
-
```
|
|
215
|
-
|
|
216
|
-
#### UnclippingAgent (Unclipping Diagnostics)
|
|
217
|
-
|
|
218
|
-
```bash
|
|
219
|
-
# Unclipping craft diagnostic
|
|
220
|
-
uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 1 --steps 2000
|
|
221
|
-
|
|
222
|
-
# Unclipping with pre-seeded inventory
|
|
223
|
-
uv run cogames play --mission evals.diagnostic_unclip_preseed -p scripted_unclipping --cogs 1 --steps 2000
|
|
224
|
-
|
|
225
|
-
# Multi-agent unclipping
|
|
226
|
-
uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 2 --steps 2000
|
|
227
|
-
|
|
228
|
-
# Note: For testing clipping variants on procedural maps, use training_facility or hello_world sites
|
|
229
|
-
# Example with variants:
|
|
230
|
-
uv run cogames play --mission training_facility.harvest --variant clip_hub_stations --cogs 1 --steps 2000
|
|
257
|
+
uv run cogames play --mission evals.diagnostic_assemble_seeded_search -p baseline --cogs 1 --steps 1000
|
|
231
258
|
```
|
|
232
259
|
|
|
233
260
|
### Comprehensive Evaluation
|
|
@@ -237,19 +264,16 @@ uv run cogames play --mission training_facility.harvest --variant clip_hub_stati
|
|
|
237
264
|
uv run python packages/cogames/scripts/run_evaluation.py
|
|
238
265
|
|
|
239
266
|
# Evaluate specific agent
|
|
240
|
-
uv run python packages/cogames/scripts/run_evaluation.py --
|
|
241
|
-
uv run python packages/cogames/scripts/run_evaluation.py --
|
|
267
|
+
uv run python packages/cogames/scripts/run_evaluation.py --policy baseline
|
|
268
|
+
uv run python packages/cogames/scripts/run_evaluation.py --policy ladybug
|
|
242
269
|
```
|
|
243
270
|
|
|
244
271
|
## Evaluation Results
|
|
245
272
|
|
|
246
|
-
See `experiments/SCRIPTED_AGENT_EVALUATION.md` for comprehensive evaluation results across all missions and difficulty
|
|
247
|
-
variants.
|
|
248
|
-
|
|
249
273
|
**Summary**:
|
|
250
274
|
|
|
251
|
-
- **BaselineAgent**:
|
|
252
|
-
- **UnclippingAgent**:
|
|
275
|
+
- **BaselineAgent**: Works best for non-clipped missions with straightforward resource gathering
|
|
276
|
+
- **UnclippingAgent**: Best overall performance, handles clipping scenarios well
|
|
253
277
|
|
|
254
278
|
## Extending
|
|
255
279
|
|
|
@@ -304,7 +328,7 @@ class MyPolicy:
|
|
|
304
328
|
4. **Register in `__init__.py`**:
|
|
305
329
|
|
|
306
330
|
```python
|
|
307
|
-
from
|
|
331
|
+
from cogames_agents.policy.scripted_agent.my_agent import MyPolicy
|
|
308
332
|
|
|
309
333
|
__all__ = [..., "MyPolicy"]
|
|
310
334
|
```
|
cogames/evaluate.py
CHANGED
|
@@ -13,11 +13,11 @@ from pydantic import BaseModel, ConfigDict
|
|
|
13
13
|
from rich.console import Console
|
|
14
14
|
from rich.table import Table
|
|
15
15
|
|
|
16
|
+
from metta_alo.rollout import run_multi_episode_rollout
|
|
17
|
+
from metta_alo.scoring import allocate_counts, validate_proportions
|
|
16
18
|
from mettagrid import MettaGridConfig
|
|
17
|
-
from mettagrid.policy.
|
|
18
|
-
from mettagrid.
|
|
19
|
-
from mettagrid.policy.policy_env_interface import PolicyEnvInterface
|
|
20
|
-
from mettagrid.simulator.multi_episode.rollout import MultiEpisodeRolloutResult, multi_episode_rollout
|
|
19
|
+
from mettagrid.policy.policy import PolicySpec
|
|
20
|
+
from mettagrid.simulator.multi_episode.rollout import MultiEpisodeRolloutResult
|
|
21
21
|
from mettagrid.simulator.multi_episode.summary import MultiEpisodeRolloutSummary, build_multi_episode_rollout_summaries
|
|
22
22
|
|
|
23
23
|
MissionResultsSummary: TypeAlias = list[MultiEpisodeRolloutSummary]
|
|
@@ -53,6 +53,7 @@ def evaluate(
|
|
|
53
53
|
raise ValueError("At least one mission must be provided for evaluation.")
|
|
54
54
|
if not policy_specs:
|
|
55
55
|
raise ValueError("At least one policy specification must be provided for evaluation.")
|
|
56
|
+
validate_proportions(proportions, len(policy_specs))
|
|
56
57
|
|
|
57
58
|
mission_names = [mission_name for mission_name, _ in missions]
|
|
58
59
|
if len(missions) == 1:
|
|
@@ -68,40 +69,27 @@ def evaluate(
|
|
|
68
69
|
mission_results: list[MultiEpisodeRolloutResult] = []
|
|
69
70
|
all_replay_paths: list[str] = []
|
|
70
71
|
for mission_name, env_cfg in missions:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
initialize_or_load_policy(env_interface, spec) for spec in policy_specs
|
|
74
|
-
]
|
|
72
|
+
counts = allocate_counts(env_cfg.game.num_agents, proportions)
|
|
73
|
+
assignments = np.repeat(np.arange(len(counts), dtype=int), counts)
|
|
75
74
|
|
|
76
75
|
progress_label = f"Simulating ({mission_name})"
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def _progress_callback(_: int, progress_iter=iterator) -> None:
|
|
82
|
-
try:
|
|
83
|
-
next(progress_iter)
|
|
84
|
-
except StopIteration:
|
|
85
|
-
pass
|
|
86
|
-
|
|
87
|
-
rollout_payload = multi_episode_rollout(
|
|
76
|
+
with typer.progressbar(length=episodes, label=progress_label) as progress:
|
|
77
|
+
rollout, replay_paths = run_multi_episode_rollout(
|
|
78
|
+
policy_specs=policy_specs,
|
|
79
|
+
assignments=assignments,
|
|
88
80
|
env_cfg=env_cfg,
|
|
89
|
-
policies=policy_instances,
|
|
90
|
-
proportions=proportions,
|
|
91
81
|
episodes=episodes,
|
|
92
|
-
max_action_time_ms=action_timeout_ms,
|
|
93
82
|
seed=seed,
|
|
94
|
-
|
|
95
|
-
|
|
83
|
+
max_action_time_ms=action_timeout_ms,
|
|
84
|
+
replay_dir=save_replay,
|
|
85
|
+
create_replay_dir=save_replay is not None,
|
|
86
|
+
on_progress=lambda _episode_idx, _result: progress.update(1),
|
|
96
87
|
)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if episode.replay_path:
|
|
101
|
-
all_replay_paths.append(episode.replay_path)
|
|
88
|
+
|
|
89
|
+
mission_results.append(rollout)
|
|
90
|
+
all_replay_paths.extend(replay_paths)
|
|
102
91
|
|
|
103
92
|
summaries = build_multi_episode_rollout_summaries(mission_results, num_policies=len(policy_specs))
|
|
104
|
-
mission_names = [mission_name for mission_name, _ in missions]
|
|
105
93
|
_output_results(console, policy_specs, mission_names, summaries, output_format)
|
|
106
94
|
|
|
107
95
|
# Print replay commands if replays were saved
|