cogames 0.3.49__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. cogames/cli/client.py +60 -6
  2. cogames/cli/docsync/__init__.py +0 -0
  3. cogames/cli/docsync/_nb_md_directive_processing.py +180 -0
  4. cogames/cli/docsync/_nb_md_sync.py +103 -0
  5. cogames/cli/docsync/_nb_py_sync.py +122 -0
  6. cogames/cli/docsync/_three_way_sync.py +115 -0
  7. cogames/cli/docsync/_utils.py +76 -0
  8. cogames/cli/docsync/docsync.py +156 -0
  9. cogames/cli/leaderboard.py +112 -28
  10. cogames/cli/mission.py +64 -53
  11. cogames/cli/policy.py +46 -10
  12. cogames/cli/submit.py +268 -67
  13. cogames/cogs_vs_clips/cog.py +79 -0
  14. cogames/cogs_vs_clips/cogs_vs_clips_mapgen.md +19 -16
  15. cogames/cogs_vs_clips/cogsguard_reward_variants.py +153 -0
  16. cogames/cogs_vs_clips/cogsguard_tutorial.py +56 -0
  17. cogames/cogs_vs_clips/evals/README.md +10 -16
  18. cogames/cogs_vs_clips/evals/cogsguard_evals.py +81 -0
  19. cogames/cogs_vs_clips/evals/diagnostic_evals.py +49 -444
  20. cogames/cogs_vs_clips/evals/difficulty_variants.py +13 -326
  21. cogames/cogs_vs_clips/evals/integrated_evals.py +5 -45
  22. cogames/cogs_vs_clips/evals/spanning_evals.py +9 -180
  23. cogames/cogs_vs_clips/mission.py +187 -146
  24. cogames/cogs_vs_clips/missions.py +46 -137
  25. cogames/cogs_vs_clips/procedural.py +8 -8
  26. cogames/cogs_vs_clips/sites.py +107 -3
  27. cogames/cogs_vs_clips/stations.py +198 -186
  28. cogames/cogs_vs_clips/tutorial_missions.py +1 -1
  29. cogames/cogs_vs_clips/variants.py +25 -476
  30. cogames/device.py +13 -1
  31. cogames/{policy/scripted_agent/README.md → docs/SCRIPTED_AGENT.md} +82 -58
  32. cogames/evaluate.py +18 -30
  33. cogames/main.py +1434 -243
  34. cogames/maps/canidate1_1000.map +1 -1
  35. cogames/maps/canidate1_1000_stations.map +2 -2
  36. cogames/maps/canidate1_500.map +1 -1
  37. cogames/maps/canidate1_500_stations.map +2 -2
  38. cogames/maps/canidate2_1000.map +1 -1
  39. cogames/maps/canidate2_1000_stations.map +2 -2
  40. cogames/maps/canidate2_500.map +1 -1
  41. cogames/maps/canidate2_500_stations.map +2 -2
  42. cogames/maps/canidate3_1000.map +1 -1
  43. cogames/maps/canidate3_1000_stations.map +2 -2
  44. cogames/maps/canidate3_500.map +1 -1
  45. cogames/maps/canidate3_500_stations.map +2 -2
  46. cogames/maps/canidate4_500.map +1 -1
  47. cogames/maps/canidate4_500_stations.map +2 -2
  48. cogames/maps/cave_base_50.map +2 -2
  49. cogames/maps/diagnostic_evals/diagnostic_agile.map +2 -2
  50. cogames/maps/diagnostic_evals/diagnostic_agile_hard.map +2 -2
  51. cogames/maps/diagnostic_evals/diagnostic_charge_up.map +2 -2
  52. cogames/maps/diagnostic_evals/diagnostic_charge_up_hard.map +2 -2
  53. cogames/maps/diagnostic_evals/diagnostic_chest_navigation1.map +2 -2
  54. cogames/maps/diagnostic_evals/diagnostic_chest_navigation1_hard.map +2 -2
  55. cogames/maps/diagnostic_evals/diagnostic_chest_navigation2.map +2 -2
  56. cogames/maps/diagnostic_evals/diagnostic_chest_navigation2_hard.map +2 -2
  57. cogames/maps/diagnostic_evals/diagnostic_chest_navigation3.map +2 -2
  58. cogames/maps/diagnostic_evals/diagnostic_chest_navigation3_hard.map +2 -2
  59. cogames/maps/diagnostic_evals/diagnostic_chest_near.map +2 -2
  60. cogames/maps/diagnostic_evals/diagnostic_chest_search.map +2 -2
  61. cogames/maps/diagnostic_evals/diagnostic_chest_search_hard.map +2 -2
  62. cogames/maps/diagnostic_evals/diagnostic_extract_lab.map +2 -2
  63. cogames/maps/diagnostic_evals/diagnostic_extract_lab_hard.map +2 -2
  64. cogames/maps/diagnostic_evals/diagnostic_memory.map +2 -2
  65. cogames/maps/diagnostic_evals/diagnostic_memory_hard.map +2 -2
  66. cogames/maps/diagnostic_evals/diagnostic_radial.map +2 -2
  67. cogames/maps/diagnostic_evals/diagnostic_radial_hard.map +2 -2
  68. cogames/maps/diagnostic_evals/diagnostic_resource_lab.map +2 -2
  69. cogames/maps/diagnostic_evals/diagnostic_unclip.map +2 -2
  70. cogames/maps/evals/eval_balanced_spread.map +9 -5
  71. cogames/maps/evals/eval_clip_oxygen.map +9 -5
  72. cogames/maps/evals/eval_collect_resources.map +9 -5
  73. cogames/maps/evals/eval_collect_resources_hard.map +9 -5
  74. cogames/maps/evals/eval_collect_resources_medium.map +9 -5
  75. cogames/maps/evals/eval_divide_and_conquer.map +9 -5
  76. cogames/maps/evals/eval_energy_starved.map +9 -5
  77. cogames/maps/evals/eval_multi_coordinated_collect_hard.map +9 -5
  78. cogames/maps/evals/eval_oxygen_bottleneck.map +9 -5
  79. cogames/maps/evals/eval_single_use_world.map +9 -5
  80. cogames/maps/evals/extractor_hub_100x100.map +9 -5
  81. cogames/maps/evals/extractor_hub_30x30.map +9 -5
  82. cogames/maps/evals/extractor_hub_50x50.map +9 -5
  83. cogames/maps/evals/extractor_hub_70x70.map +9 -5
  84. cogames/maps/evals/extractor_hub_80x80.map +9 -5
  85. cogames/maps/machina_100_stations.map +2 -2
  86. cogames/maps/machina_200_stations.map +2 -2
  87. cogames/maps/machina_200_stations_small.map +2 -2
  88. cogames/maps/machina_eval_exp01.map +2 -2
  89. cogames/maps/machina_eval_template_large.map +2 -2
  90. cogames/maps/machinatrainer4agents.map +2 -2
  91. cogames/maps/machinatrainer4agentsbase.map +2 -2
  92. cogames/maps/machinatrainerbig.map +2 -2
  93. cogames/maps/machinatrainersmall.map +2 -2
  94. cogames/maps/planky_evals/aligner_avoid_aoe.map +28 -0
  95. cogames/maps/planky_evals/aligner_full_cycle.map +28 -0
  96. cogames/maps/planky_evals/aligner_gear.map +24 -0
  97. cogames/maps/planky_evals/aligner_hearts.map +24 -0
  98. cogames/maps/planky_evals/aligner_junction.map +26 -0
  99. cogames/maps/planky_evals/exploration_distant.map +28 -0
  100. cogames/maps/planky_evals/maze.map +32 -0
  101. cogames/maps/planky_evals/miner_best_resource.map +26 -0
  102. cogames/maps/planky_evals/miner_deposit.map +24 -0
  103. cogames/maps/planky_evals/miner_extract.map +26 -0
  104. cogames/maps/planky_evals/miner_full_cycle.map +28 -0
  105. cogames/maps/planky_evals/miner_gear.map +24 -0
  106. cogames/maps/planky_evals/multi_role.map +28 -0
  107. cogames/maps/planky_evals/resource_chain.map +30 -0
  108. cogames/maps/planky_evals/scout_explore.map +32 -0
  109. cogames/maps/planky_evals/scout_gear.map +24 -0
  110. cogames/maps/planky_evals/scrambler_full_cycle.map +28 -0
  111. cogames/maps/planky_evals/scrambler_gear.map +24 -0
  112. cogames/maps/planky_evals/scrambler_target.map +26 -0
  113. cogames/maps/planky_evals/stuck_corridor.map +32 -0
  114. cogames/maps/planky_evals/survive_retreat.map +26 -0
  115. cogames/maps/training_facility_clipped.map +2 -2
  116. cogames/maps/training_facility_open_1.map +2 -2
  117. cogames/maps/training_facility_open_2.map +2 -2
  118. cogames/maps/training_facility_open_3.map +2 -2
  119. cogames/maps/training_facility_tight_4.map +2 -2
  120. cogames/maps/training_facility_tight_5.map +2 -2
  121. cogames/maps/vanilla_large.map +2 -2
  122. cogames/maps/vanilla_small.map +2 -2
  123. cogames/pickup.py +183 -0
  124. cogames/play.py +166 -33
  125. cogames/policy/chaos_monkey.py +54 -0
  126. cogames/policy/nim_agents/__init__.py +27 -10
  127. cogames/policy/nim_agents/agents.py +121 -60
  128. cogames/policy/nim_agents/thinky_eval.py +35 -222
  129. cogames/policy/pufferlib_policy.py +67 -32
  130. cogames/policy/starter_agent.py +184 -0
  131. cogames/policy/trainable_policy_template.py +4 -1
  132. cogames/train.py +51 -13
  133. cogames/verbose.py +2 -2
  134. cogames-0.3.64.dist-info/METADATA +1842 -0
  135. cogames-0.3.64.dist-info/RECORD +159 -0
  136. cogames-0.3.64.dist-info/licenses/LICENSE +21 -0
  137. cogames-0.3.64.dist-info/top_level.txt +2 -0
  138. metta_alo/__init__.py +0 -0
  139. metta_alo/job_specs.py +17 -0
  140. metta_alo/policy.py +16 -0
  141. metta_alo/pure_single_episode_runner.py +75 -0
  142. metta_alo/py.typed +0 -0
  143. metta_alo/rollout.py +322 -0
  144. metta_alo/scoring.py +168 -0
  145. cogames/maps/diagnostic_evals/diagnostic_assembler_near.map +0 -49
  146. cogames/maps/diagnostic_evals/diagnostic_assembler_search.map +0 -49
  147. cogames/maps/diagnostic_evals/diagnostic_assembler_search_hard.map +0 -89
  148. cogames/policy/nim_agents/common.nim +0 -887
  149. cogames/policy/nim_agents/install.sh +0 -1
  150. cogames/policy/nim_agents/ladybug_agent.nim +0 -984
  151. cogames/policy/nim_agents/nim_agents.nim +0 -55
  152. cogames/policy/nim_agents/nim_agents.nims +0 -14
  153. cogames/policy/nim_agents/nimby.lock +0 -3
  154. cogames/policy/nim_agents/racecar_agents.nim +0 -884
  155. cogames/policy/nim_agents/random_agents.nim +0 -68
  156. cogames/policy/nim_agents/test_agents.py +0 -53
  157. cogames/policy/nim_agents/thinky_agents.nim +0 -717
  158. cogames/policy/scripted_agent/baseline_agent.py +0 -1049
  159. cogames/policy/scripted_agent/demo_policy.py +0 -244
  160. cogames/policy/scripted_agent/pathfinding.py +0 -126
  161. cogames/policy/scripted_agent/starter_agent.py +0 -136
  162. cogames/policy/scripted_agent/types.py +0 -235
  163. cogames/policy/scripted_agent/unclipping_agent.py +0 -476
  164. cogames/policy/scripted_agent/utils.py +0 -385
  165. cogames-0.3.49.dist-info/METADATA +0 -406
  166. cogames-0.3.49.dist-info/RECORD +0 -136
  167. cogames-0.3.49.dist-info/top_level.txt +0 -1
  168. {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/WHEEL +0 -0
  169. {cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,17 @@
1
1
  # Scripted Agent Policies
2
2
 
3
- Three teaching-friendly scripted agent implementations for CoGames evaluation and ablation studies.
3
+ Teaching-friendly scripted agents for CoGames evaluation and ablation studies, plus a tiny demo policy and the CogsGuard
4
+ team-play scripted policy.
4
5
 
5
6
  ## Overview
6
7
 
7
- This package provides three progressively capable scripted agents:
8
+ This package provides the CogsGuard team-play policy, two progressively capable scripted agents, and one tiny demo
9
+ policy:
8
10
 
9
- 1. **BaselineAgent** - Core functionality: exploration, resource gathering, heart assembly (single/multi-agent)
10
- 2. **UnclippingAgent** - Extends BaselineAgent with extractor unclipping capability
11
- 3. **StarterAgent** - Lightweight, tutorial-friendly flow for README and quick demos
11
+ 1. **CogsGuard** - Vibe-based multi-role policy for the CogsGuard arena
12
+ 2. **BaselineAgent** - Core functionality for legacy heart-production game: exploration, resource gathering, heart
13
+ assembly
14
+ 3. **UnclippingAgent** - Extends BaselineAgent with extractor unclipping capability (legacy game)
12
15
 
13
16
  ## Architecture
14
17
 
@@ -16,11 +19,11 @@ This package provides three progressively capable scripted agents:
16
19
 
17
20
  ```
18
21
  scripted_agent/
22
+ ├── cogsguard/ # CogsGuard scripted policy (vibe-based roles)
19
23
  ├── baseline_agent.py # Base agent + BaselinePolicy wrapper
20
24
  ├── unclipping_agent.py # Unclipping extension + UnclippingPolicy wrapper
21
- ├── starter_agent.py # Minimal if/else agent for docs and demos
22
- ├── pathfinding.py # Pathfinding utilities (shared)
23
- └── README.md # This file
25
+ ├── demo_policy.py # Tiny demo policy (short name: tiny_baseline)
26
+ └── pathfinding.py # Pathfinding utilities (shared)
24
27
  ```
25
28
 
26
29
  Each agent file contains:
@@ -38,7 +41,49 @@ These agents are designed for **ablation studies** and **baseline evaluation**:
38
41
 
39
42
  ## Agents
40
43
 
41
- ### 1. BaselineAgent
44
+ ### 1. CogsGuard Scripted Agent
45
+
46
+ CogsGuard is the team-play focus for scripted policies. Agents are controlled by **vibes** that map to roles and gear
47
+ acquisition.
48
+
49
+ **Vibes**:
50
+
51
+ | Vibe | Behavior |
52
+ | ----------- | ---------------------------------------- |
53
+ | `default` | Idle (noop) |
54
+ | `heart` | Idle (noop) |
55
+ | `gear` | Smart role selection |
56
+ | `miner` | Gather and deposit resources |
57
+ | `scout` | Explore and discover structures |
58
+ | `aligner` | Align neutral supply depots to cogs |
59
+ | `scrambler` | Scramble clips-aligned depots to neutral |
60
+
61
+ **Gear costs** (paid from cogs commons):
62
+
63
+ | Gear | Cost | Bonus |
64
+ | --------- | ------------------------------------------ | -------------------- |
65
+ | Miner | 3 carbon, 1 oxygen, 1 germanium, 1 silicon | +40 cargo |
66
+ | Scout | 1 carbon, 1 oxygen, 1 germanium, 3 silicon | +100 energy, +400 HP |
67
+ | Aligner | 3 carbon, 1 oxygen, 1 germanium, 1 silicon | +20 influence |
68
+ | Scrambler | 1 carbon, 3 oxygen, 1 germanium, 1 silicon | +200 HP |
69
+
70
+ **Supply depots** start clips-aligned. Scramblers neutralize them; aligners convert neutral depots to cogs for AOE
71
+ energy regen.
72
+
73
+ **Usage**:
74
+
75
+ ```bash
76
+ # Default role distribution (1 scrambler, 4 miners, rest smart-gear)
77
+ ./tools/run.py recipes.experiment.cogsguard.play policy_uri=metta://policy/role
78
+
79
+ # Custom role counts
80
+ ./tools/run.py recipes.experiment.cogsguard.play \
81
+ policy_uri="metta://policy/role?miner=3&scout=2&aligner=2&scrambler=3"
82
+ ```
83
+
84
+ **Full documentation**: `packages/cogames-agents/src/cogames_agents/policy/scripted_agent/cogsguard/README.md`
85
+
86
+ ### 2. BaselineAgent
42
87
 
43
88
  **Purpose**: Minimal working agent for single/multi-agent missions
44
89
 
@@ -46,7 +91,7 @@ These agents are designed for **ablation studies** and **baseline evaluation**:
46
91
 
47
92
  - ✅ Visual discovery (explores to find stations and extractors)
48
93
  - ✅ Resource gathering (navigates to extractors, handles cooldowns)
49
- - ✅ Heart assembly (deposits resources at assembler)
94
+ - ✅ Heart assembly (deposits resources at hub)
50
95
  - ✅ Heart delivery (brings hearts to chest)
51
96
  - ✅ Energy management (recharges when low)
52
97
  - ✅ Extractor tracking (remembers positions, cooldowns, remaining uses)
@@ -60,7 +105,7 @@ These agents are designed for **ablation studies** and **baseline evaluation**:
60
105
  **Usage**:
61
106
 
62
107
  ```python
63
- from cogames.policy.scripted_agent.baseline_agent import BaselinePolicy
108
+ from cogames_agents.policy.scripted_agent.baseline_agent import BaselinePolicy
64
109
  from mettagrid import MettaGridEnv
65
110
 
66
111
  env = MettaGridEnv(env_config)
@@ -77,13 +122,13 @@ action = agent.step(obs[0])
77
122
 
78
123
  ```bash
79
124
  # Single agent
80
- uv run cogames play --mission evals.diagnostic_radial -p scripted_baseline --cogs 1
125
+ uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 1
81
126
 
82
127
  # Multi-agent
83
- uv run cogames play --mission evals.diagnostic_radial -p scripted_baseline --cogs 4
128
+ uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 4
84
129
  ```
85
130
 
86
- ### 2. UnclippingAgent
131
+ ### 3. UnclippingAgent
87
132
 
88
133
  **Purpose**: Handle missions with clipped extractors
89
134
 
@@ -103,7 +148,7 @@ oxygen | gear | | Germanium | resonator | silicon | gear | | Silicon | scrambler
103
148
  1. Detects clipped extractor blocking progress
104
149
  2. Gathers craft resource (e.g., carbon for decoder)
105
150
  3. Changes glyph to "gear"
106
- 4. Crafts unclip item at assembler
151
+ 4. Crafts unclip item at hub
107
152
  5. Navigates to clipped extractor
108
153
  6. Uses item to unclip
109
154
  7. Resumes normal gathering
@@ -111,23 +156,19 @@ oxygen | gear | | Germanium | resonator | silicon | gear | | Silicon | scrambler
111
156
  **Usage**:
112
157
 
113
158
  ```python
114
- from cogames.policy.scripted_agent.unclipping_agent import UnclippingPolicy
159
+ from cogames_agents.policy.scripted_agent.unclipping_agent import UnclippingPolicy
115
160
 
116
161
  policy = UnclippingPolicy(env)
117
162
  # ... same as BaselinePolicy
118
163
  ```
119
164
 
120
- **CLI**:
165
+ ### 4. TinyBaseline (demo policy)
121
166
 
122
- ```bash
123
- # Test with unclipping diagnostic (single agent)
124
- uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 1
167
+ **Purpose**: Minimal, readable demo policy used for quick experiments.
125
168
 
126
- # Test with unclipping diagnostic (multi-agent)
127
- uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 2
128
- ```
169
+ **Short name**: `tiny_baseline` (defined in `demo_policy.py`).
129
170
 
130
- ### 3. StarterAgent
171
+ ## StarterAgent
131
172
 
132
173
  **Purpose**: Intro-friendly agent that mirrors the high-level flow described in docs.
133
174
 
@@ -141,6 +182,9 @@ uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclippi
141
182
  **Why it exists**: Shows the simplest possible if/else controller that still completes missions, ideal for external
142
183
  readers who want a tiny, readable starting point before diving into the full Baseline/Unclipping logic.
143
184
 
185
+ **Location**: The starter policy lives in the core `cogames` package at `cogames.policy.starter_agent` so it is always
186
+ available without installing `cogames-agents`.
187
+
144
188
  ## Shared Components
145
189
 
146
190
  ### Phase System
@@ -150,7 +194,7 @@ All agents use a phase-based state machine:
150
194
  ```python
151
195
  class Phase(Enum):
152
196
  GATHER = "gather" # Collecting resources
153
- ASSEMBLE = "assemble" # Crafting heart at assembler
197
+ ASSEMBLE = "assemble" # Crafting heart at hub
154
198
  DELIVER = "deliver" # Bringing heart to chest
155
199
  RECHARGE = "recharge" # Restoring energy
156
200
  CRAFT_UNCLIP = "craft_unclip" # UnclippingAgent only
@@ -170,7 +214,7 @@ Shared `pathfinding.py` module provides:
170
214
 
171
215
  Agents parse egocentric observations (11×11 grid) to detect:
172
216
 
173
- - Stations (assembler, chest, charger, extractors)
217
+ - Stations (hub, chest, junction, extractors)
174
218
  - Other agents
175
219
  - Walls and obstacles
176
220
  - Agent state (resources, energy, inventory)
@@ -194,40 +238,23 @@ class ExtractorInfo:
194
238
 
195
239
  ```bash
196
240
  # Basic diagnostic (single agent)
197
- uv run cogames play --mission evals.diagnostic_radial -p scripted_baseline --cogs 1 --steps 1000
241
+ uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 1 --steps 1000
198
242
 
199
243
  # Chest navigation
200
- uv run cogames play --mission evals.diagnostic_chest_navigation1 -p scripted_baseline --cogs 1 --steps 1000
244
+ uv run cogames play --mission evals.diagnostic_chest_navigation1 -p baseline --cogs 1 --steps 1000
201
245
 
202
246
  # Resource extraction
203
- uv run cogames play --mission evals.diagnostic_extract_missing_oxygen -p scripted_baseline --cogs 1 --steps 1000
247
+ uv run cogames play --mission evals.diagnostic_extract_missing_oxygen -p baseline --cogs 1 --steps 1000
204
248
 
205
249
  # Hard version
206
- uv run cogames play --mission evals.diagnostic_radial_hard -p scripted_baseline --cogs 1 --steps 2000
250
+ uv run cogames play --mission evals.diagnostic_radial_hard -p baseline --cogs 1 --steps 2000
207
251
 
208
252
  # Multi-agent (2, 4 agents)
209
- uv run cogames play --mission evals.diagnostic_radial -p scripted_baseline --cogs 2 --steps 1500
210
- uv run cogames play --mission evals.diagnostic_radial -p scripted_baseline --cogs 4 --steps 2000
253
+ uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 2 --steps 1500
254
+ uv run cogames play --mission evals.diagnostic_radial -p baseline --cogs 4 --steps 2000
211
255
 
212
256
  # Assembly test
213
- uv run cogames play --mission evals.diagnostic_assembler_search -p scripted_baseline --cogs 1 --steps 1000
214
- ```
215
-
216
- #### UnclippingAgent (Unclipping Diagnostics)
217
-
218
- ```bash
219
- # Unclipping craft diagnostic
220
- uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 1 --steps 2000
221
-
222
- # Unclipping with pre-seeded inventory
223
- uv run cogames play --mission evals.diagnostic_unclip_preseed -p scripted_unclipping --cogs 1 --steps 2000
224
-
225
- # Multi-agent unclipping
226
- uv run cogames play --mission evals.diagnostic_unclip_craft -p scripted_unclipping --cogs 2 --steps 2000
227
-
228
- # Note: For testing clipping variants on procedural maps, use training_facility or hello_world sites
229
- # Example with variants:
230
- uv run cogames play --mission training_facility.harvest --variant clip_hub_stations --cogs 1 --steps 2000
257
+ uv run cogames play --mission evals.diagnostic_assemble_seeded_search -p baseline --cogs 1 --steps 1000
231
258
  ```
232
259
 
233
260
  ### Comprehensive Evaluation
@@ -237,19 +264,16 @@ uv run cogames play --mission training_facility.harvest --variant clip_hub_stati
237
264
  uv run python packages/cogames/scripts/run_evaluation.py
238
265
 
239
266
  # Evaluate specific agent
240
- uv run python packages/cogames/scripts/run_evaluation.py --agent simple
241
- uv run python packages/cogames/scripts/run_evaluation.py --agent unclipping
267
+ uv run python packages/cogames/scripts/run_evaluation.py --policy baseline
268
+ uv run python packages/cogames/scripts/run_evaluation.py --policy ladybug
242
269
  ```
243
270
 
244
271
  ## Evaluation Results
245
272
 
246
- See `experiments/SCRIPTED_AGENT_EVALUATION.md` for comprehensive evaluation results across all missions and difficulty
247
- variants.
248
-
249
273
  **Summary**:
250
274
 
251
- - **BaselineAgent**: 33.8% success rate across 1-8 agents, best for non-clipped missions
252
- - **UnclippingAgent**: 38.6% success rate, best overall performance, handles clipping well
275
+ - **BaselineAgent**: Works best for non-clipped missions with straightforward resource gathering
276
+ - **UnclippingAgent**: Best overall performance, handles clipping scenarios well
253
277
 
254
278
  ## Extending
255
279
 
@@ -304,7 +328,7 @@ class MyPolicy:
304
328
  4. **Register in `__init__.py`**:
305
329
 
306
330
  ```python
307
- from cogames.policy.scripted_agent.my_agent import MyPolicy
331
+ from cogames_agents.policy.scripted_agent.my_agent import MyPolicy
308
332
 
309
333
  __all__ = [..., "MyPolicy"]
310
334
  ```
cogames/evaluate.py CHANGED
@@ -13,11 +13,11 @@ from pydantic import BaseModel, ConfigDict
13
13
  from rich.console import Console
14
14
  from rich.table import Table
15
15
 
16
+ from metta_alo.rollout import run_multi_episode_rollout
17
+ from metta_alo.scoring import allocate_counts, validate_proportions
16
18
  from mettagrid import MettaGridConfig
17
- from mettagrid.policy.loader import initialize_or_load_policy
18
- from mettagrid.policy.policy import MultiAgentPolicy, PolicySpec
19
- from mettagrid.policy.policy_env_interface import PolicyEnvInterface
20
- from mettagrid.simulator.multi_episode.rollout import MultiEpisodeRolloutResult, multi_episode_rollout
19
+ from mettagrid.policy.policy import PolicySpec
20
+ from mettagrid.simulator.multi_episode.rollout import MultiEpisodeRolloutResult
21
21
  from mettagrid.simulator.multi_episode.summary import MultiEpisodeRolloutSummary, build_multi_episode_rollout_summaries
22
22
 
23
23
  MissionResultsSummary: TypeAlias = list[MultiEpisodeRolloutSummary]
@@ -53,6 +53,7 @@ def evaluate(
53
53
  raise ValueError("At least one mission must be provided for evaluation.")
54
54
  if not policy_specs:
55
55
  raise ValueError("At least one policy specification must be provided for evaluation.")
56
+ validate_proportions(proportions, len(policy_specs))
56
57
 
57
58
  mission_names = [mission_name for mission_name, _ in missions]
58
59
  if len(missions) == 1:
@@ -68,40 +69,27 @@ def evaluate(
68
69
  mission_results: list[MultiEpisodeRolloutResult] = []
69
70
  all_replay_paths: list[str] = []
70
71
  for mission_name, env_cfg in missions:
71
- env_interface = PolicyEnvInterface.from_mg_cfg(env_cfg)
72
- policy_instances: list[MultiAgentPolicy] = [
73
- initialize_or_load_policy(env_interface, spec) for spec in policy_specs
74
- ]
72
+ counts = allocate_counts(env_cfg.game.num_agents, proportions)
73
+ assignments = np.repeat(np.arange(len(counts), dtype=int), counts)
75
74
 
76
75
  progress_label = f"Simulating ({mission_name})"
77
- progress_iterable = range(episodes)
78
- with typer.progressbar(progress_iterable, label=progress_label) as progress:
79
- iterator = iter(progress)
80
-
81
- def _progress_callback(_: int, progress_iter=iterator) -> None:
82
- try:
83
- next(progress_iter)
84
- except StopIteration:
85
- pass
86
-
87
- rollout_payload = multi_episode_rollout(
76
+ with typer.progressbar(length=episodes, label=progress_label) as progress:
77
+ rollout, replay_paths = run_multi_episode_rollout(
78
+ policy_specs=policy_specs,
79
+ assignments=assignments,
88
80
  env_cfg=env_cfg,
89
- policies=policy_instances,
90
- proportions=proportions,
91
81
  episodes=episodes,
92
- max_action_time_ms=action_timeout_ms,
93
82
  seed=seed,
94
- progress_callback=_progress_callback,
95
- save_replay=save_replay,
83
+ max_action_time_ms=action_timeout_ms,
84
+ replay_dir=save_replay,
85
+ create_replay_dir=save_replay is not None,
86
+ on_progress=lambda _episode_idx, _result: progress.update(1),
96
87
  )
97
- mission_results.append(rollout_payload)
98
- # Collect replay paths from this mission
99
- for episode in rollout_payload.episodes:
100
- if episode.replay_path:
101
- all_replay_paths.append(episode.replay_path)
88
+
89
+ mission_results.append(rollout)
90
+ all_replay_paths.extend(replay_paths)
102
91
 
103
92
  summaries = build_multi_episode_rollout_summaries(mission_results, num_policies=len(policy_specs))
104
- mission_names = [mission_name for mission_name, _ in missions]
105
93
  _output_results(console, policy_specs, mission_names, summaries, output_format)
106
94
 
107
95
  # Print replay commands if replays were saved