cogames 0.3.65__py3-none-any.whl → 0.3.68__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cogames/cli/client.py +0 -3
- cogames/cli/docsync/docsync.py +7 -1
- cogames/cli/mission.py +44 -19
- cogames/cli/policy.py +26 -10
- cogames/cli/submit.py +127 -141
- cogames/cli/utils.py +5 -0
- cogames/cogs_vs_clips/clip_difficulty.py +57 -0
- cogames/cogs_vs_clips/clips.py +23 -6
- cogames/cogs_vs_clips/cog.py +16 -5
- cogames/cogs_vs_clips/cogsguard_curriculum.py +122 -0
- cogames/cogs_vs_clips/cogsguard_tutorial.py +5 -5
- cogames/cogs_vs_clips/config.py +1 -1
- cogames/cogs_vs_clips/docs/cogs_vs_clips_mapgen.md +2 -3
- cogames/cogs_vs_clips/evals/README.md +8 -32
- cogames/cogs_vs_clips/evals/diagnostic_evals.py +0 -1
- cogames/cogs_vs_clips/evals/difficulty_variants.py +7 -10
- cogames/cogs_vs_clips/mission.py +38 -10
- cogames/cogs_vs_clips/missions.py +1 -1
- cogames/cogs_vs_clips/reward_variants.py +173 -0
- cogames/cogs_vs_clips/sites.py +6 -5
- cogames/cogs_vs_clips/stations.py +13 -9
- cogames/cogs_vs_clips/team.py +3 -1
- cogames/cogs_vs_clips/terrain.py +2 -2
- cogames/cogs_vs_clips/variants.py +175 -4
- cogames/cogs_vs_clips/weather.py +52 -0
- cogames/docs/SCRIPTED_AGENT.md +3 -3
- cogames/evaluate.py +4 -2
- cogames/main.py +357 -51
- cogames/maps/canidate1_1000.map +1 -1
- cogames/maps/canidate1_1000_stations.map +2 -2
- cogames/maps/canidate1_500.map +1 -1
- cogames/maps/canidate1_500_stations.map +2 -2
- cogames/maps/canidate2_1000.map +1 -1
- cogames/maps/canidate2_1000_stations.map +2 -2
- cogames/maps/canidate2_500.map +1 -1
- cogames/maps/canidate2_500_stations.map +1 -1
- cogames/maps/canidate3_1000.map +1 -1
- cogames/maps/canidate3_1000_stations.map +2 -2
- cogames/maps/canidate3_500.map +1 -1
- cogames/maps/canidate3_500_stations.map +2 -2
- cogames/maps/canidate4_500.map +1 -1
- cogames/maps/canidate4_500_stations.map +2 -2
- cogames/maps/cave_base_50.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_agile.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_agile_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_charge_up.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_charge_up_hard.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation1.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation1_hard.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation2.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation2_hard.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation3.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_navigation3_hard.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_near.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_search.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_chest_search_hard.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_extract_lab.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_extract_lab_hard.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_memory.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_memory_hard.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_radial.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_radial_hard.map +2 -2
- cogames/maps/diagnostic_evals/diagnostic_resource_lab.map +6 -6
- cogames/maps/diagnostic_evals/diagnostic_unclip.map +6 -6
- cogames/maps/evals/eval_balanced_spread.map +6 -6
- cogames/maps/evals/eval_clip_oxygen.map +6 -6
- cogames/maps/evals/eval_collect_resources.map +6 -6
- cogames/maps/evals/eval_collect_resources_hard.map +6 -6
- cogames/maps/evals/eval_collect_resources_medium.map +6 -6
- cogames/maps/evals/eval_divide_and_conquer.map +6 -6
- cogames/maps/evals/eval_energy_starved.map +6 -6
- cogames/maps/evals/eval_multi_coordinated_collect_hard.map +6 -6
- cogames/maps/evals/eval_oxygen_bottleneck.map +6 -6
- cogames/maps/evals/eval_single_use_world.map +6 -6
- cogames/maps/evals/extractor_hub_100x100.map +6 -6
- cogames/maps/evals/extractor_hub_30x30.map +6 -6
- cogames/maps/evals/extractor_hub_50x50.map +6 -6
- cogames/maps/evals/extractor_hub_70x70.map +6 -6
- cogames/maps/evals/extractor_hub_80x80.map +6 -6
- cogames/maps/machina_100_stations.map +2 -2
- cogames/maps/machina_200_stations.map +2 -2
- cogames/maps/machina_200_stations_small.map +2 -2
- cogames/maps/machina_eval_exp01.map +2 -2
- cogames/maps/machina_eval_template_large.map +2 -2
- cogames/maps/machinatrainer4agents.map +2 -2
- cogames/maps/machinatrainer4agentsbase.map +2 -2
- cogames/maps/machinatrainerbig.map +2 -2
- cogames/maps/machinatrainersmall.map +2 -2
- cogames/maps/planky_evals/aligner_avoid_aoe.map +6 -6
- cogames/maps/planky_evals/aligner_full_cycle.map +6 -6
- cogames/maps/planky_evals/aligner_gear.map +6 -6
- cogames/maps/planky_evals/aligner_hearts.map +6 -6
- cogames/maps/planky_evals/aligner_junction.map +6 -6
- cogames/maps/planky_evals/exploration_distant.map +6 -6
- cogames/maps/planky_evals/maze.map +6 -6
- cogames/maps/planky_evals/miner_best_resource.map +6 -6
- cogames/maps/planky_evals/miner_deposit.map +6 -6
- cogames/maps/planky_evals/miner_extract.map +6 -6
- cogames/maps/planky_evals/miner_full_cycle.map +6 -6
- cogames/maps/planky_evals/miner_gear.map +6 -6
- cogames/maps/planky_evals/multi_role.map +6 -6
- cogames/maps/planky_evals/resource_chain.map +6 -6
- cogames/maps/planky_evals/scout_explore.map +6 -6
- cogames/maps/planky_evals/scout_gear.map +6 -6
- cogames/maps/planky_evals/scrambler_full_cycle.map +6 -6
- cogames/maps/planky_evals/scrambler_gear.map +6 -6
- cogames/maps/planky_evals/scrambler_target.map +6 -6
- cogames/maps/planky_evals/stuck_corridor.map +6 -6
- cogames/maps/planky_evals/survive_retreat.map +6 -6
- cogames/maps/training_facility_clipped.map +2 -2
- cogames/maps/training_facility_open_1.map +2 -2
- cogames/maps/training_facility_open_2.map +2 -2
- cogames/maps/training_facility_open_3.map +2 -2
- cogames/maps/training_facility_tight_4.map +2 -2
- cogames/maps/training_facility_tight_5.map +2 -2
- cogames/maps/vanilla_large.map +2 -2
- cogames/maps/vanilla_small.map +2 -2
- cogames/pickup.py +6 -5
- cogames/play.py +14 -16
- cogames/policy/nim_agents/__init__.py +0 -2
- cogames/policy/nim_agents/agents.py +0 -11
- cogames/policy/starter_agent.py +4 -1
- {cogames-0.3.65.dist-info → cogames-0.3.68.dist-info}/METADATA +45 -29
- cogames-0.3.68.dist-info/RECORD +160 -0
- metta_alo/scoring.py +7 -7
- cogames-0.3.65.dist-info/RECORD +0 -160
- metta_alo/job_specs.py +0 -17
- metta_alo/policy.py +0 -16
- metta_alo/pure_single_episode_runner.py +0 -75
- metta_alo/rollout.py +0 -322
- {cogames-0.3.65.dist-info → cogames-0.3.68.dist-info}/WHEEL +0 -0
- {cogames-0.3.65.dist-info → cogames-0.3.68.dist-info}/entry_points.txt +0 -0
- {cogames-0.3.65.dist-info → cogames-0.3.68.dist-info}/licenses/LICENSE +0 -0
- {cogames-0.3.65.dist-info → cogames-0.3.68.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Sequence
|
|
6
|
+
|
|
7
|
+
from cogames.cogs_vs_clips.mission import CvCMission
|
|
8
|
+
from cogames.cogs_vs_clips.reward_variants import AVAILABLE_REWARD_VARIANTS
|
|
9
|
+
from cogames.cogs_vs_clips.variants import HIDDEN_VARIANTS, VARIANTS
|
|
10
|
+
from cogames.core import CoGameMissionVariant
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class EventProfile:
|
|
15
|
+
name: str
|
|
16
|
+
clips_overrides: dict[str, object]
|
|
17
|
+
weather_overrides: dict[str, object]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
COGSGUARD_FIXED_MAPS: list[str] = [
|
|
21
|
+
"machina_100_stations.map",
|
|
22
|
+
"machina_200_stations.map",
|
|
23
|
+
"cave_base_50.map",
|
|
24
|
+
"vanilla_large.map",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
DEFAULT_EVENT_PROFILE = EventProfile("events_baseline", {}, {})
|
|
28
|
+
COGSGUARD_EVENT_PROFILES: list[EventProfile] = [
|
|
29
|
+
DEFAULT_EVENT_PROFILE,
|
|
30
|
+
EventProfile(
|
|
31
|
+
"events_fast_clips_short_day",
|
|
32
|
+
{
|
|
33
|
+
"initial_clips_start": 5,
|
|
34
|
+
"initial_clips_spots": 2,
|
|
35
|
+
"scramble_start": 25,
|
|
36
|
+
"scramble_interval": 50,
|
|
37
|
+
"scramble_radius": 35,
|
|
38
|
+
"align_start": 50,
|
|
39
|
+
"align_interval": 50,
|
|
40
|
+
"align_radius": 35,
|
|
41
|
+
},
|
|
42
|
+
{"day_length": 100},
|
|
43
|
+
),
|
|
44
|
+
EventProfile(
|
|
45
|
+
"events_slow_clips_long_day",
|
|
46
|
+
{
|
|
47
|
+
"initial_clips_start": 50,
|
|
48
|
+
"initial_clips_spots": 1,
|
|
49
|
+
"scramble_start": 200,
|
|
50
|
+
"scramble_interval": 200,
|
|
51
|
+
"scramble_radius": 15,
|
|
52
|
+
"align_start": 300,
|
|
53
|
+
"align_interval": 200,
|
|
54
|
+
"align_radius": 15,
|
|
55
|
+
},
|
|
56
|
+
{"day_length": 400},
|
|
57
|
+
),
|
|
58
|
+
EventProfile(
|
|
59
|
+
"events_no_clips",
|
|
60
|
+
{"disabled": True},
|
|
61
|
+
{"day_length": 200},
|
|
62
|
+
),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def normalize_variant_names(variants: str | Sequence[str] | None) -> list[str]:
|
|
67
|
+
if variants is None:
|
|
68
|
+
return []
|
|
69
|
+
if isinstance(variants, str):
|
|
70
|
+
if variants.startswith("["):
|
|
71
|
+
parsed = json.loads(variants)
|
|
72
|
+
if isinstance(parsed, list):
|
|
73
|
+
return [str(name) for name in parsed]
|
|
74
|
+
return [variants]
|
|
75
|
+
return list(variants)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def split_variants(
|
|
79
|
+
variants: str | Sequence[str] | None,
|
|
80
|
+
) -> tuple[list[CoGameMissionVariant], list[str]]:
|
|
81
|
+
if variants is None:
|
|
82
|
+
names: list[str] = []
|
|
83
|
+
else:
|
|
84
|
+
names = normalize_variant_names(variants)
|
|
85
|
+
all_variants = {variant.name: variant for variant in [*VARIANTS, *HIDDEN_VARIANTS]}
|
|
86
|
+
reward_variants = set(AVAILABLE_REWARD_VARIANTS)
|
|
87
|
+
|
|
88
|
+
resolved: list[CoGameMissionVariant] = []
|
|
89
|
+
resolved_rewards: list[str] = []
|
|
90
|
+
unknown: list[str] = []
|
|
91
|
+
for name in names:
|
|
92
|
+
if name in reward_variants:
|
|
93
|
+
resolved_rewards.append(name)
|
|
94
|
+
continue
|
|
95
|
+
variant = all_variants.get(name)
|
|
96
|
+
if variant is None:
|
|
97
|
+
unknown.append(name)
|
|
98
|
+
continue
|
|
99
|
+
resolved.append(variant)
|
|
100
|
+
|
|
101
|
+
if unknown:
|
|
102
|
+
available_mission = ", ".join(v.name for v in VARIANTS)
|
|
103
|
+
available_reward = ", ".join(AVAILABLE_REWARD_VARIANTS)
|
|
104
|
+
missing = ", ".join(unknown)
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"Unknown variant(s): {missing}. Mission variants: {available_mission}. "
|
|
107
|
+
f"Reward variants: {available_reward}."
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return resolved, resolved_rewards
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def resolve_event_profiles(event_profiles: Sequence[EventProfile] | None) -> list[EventProfile]:
|
|
114
|
+
if event_profiles is None:
|
|
115
|
+
return [DEFAULT_EVENT_PROFILE]
|
|
116
|
+
return list(event_profiles)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def filter_compatible_variants(
|
|
120
|
+
mission: CvCMission, variants: Sequence[CoGameMissionVariant]
|
|
121
|
+
) -> list[CoGameMissionVariant]:
|
|
122
|
+
return [variant for variant in variants if variant.compat(mission)]
|
|
@@ -18,11 +18,11 @@ def make_cogsguard_tutorial_site() -> CoGameSite:
|
|
|
18
18
|
hub_height=15,
|
|
19
19
|
outer_clearance=2,
|
|
20
20
|
stations=[
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"chest",
|
|
21
|
+
"c:aligner",
|
|
22
|
+
"c:scrambler",
|
|
23
|
+
"c:miner",
|
|
24
|
+
"c:scout",
|
|
25
|
+
"c:chest",
|
|
26
26
|
],
|
|
27
27
|
)
|
|
28
28
|
map_builder = MapGen.Config(
|
cogames/cogs_vs_clips/config.py
CHANGED
|
@@ -32,7 +32,7 @@ CvCConfig = SimpleNamespace(
|
|
|
32
32
|
"miner": "⛏️",
|
|
33
33
|
"scout": "🔭",
|
|
34
34
|
},
|
|
35
|
-
RESOURCES=["energy", "heart", "hp", "influence", *_ELEMENTS, *_GEAR],
|
|
35
|
+
RESOURCES=["energy", "heart", "hp", "influence", "solar", *_ELEMENTS, *_GEAR],
|
|
36
36
|
VIBES=_VIBES,
|
|
37
37
|
VIBE_NAMES=[vibe.name for vibe in _VIBES],
|
|
38
38
|
)
|
|
@@ -341,11 +341,10 @@ Included missions and variants:
|
|
|
341
341
|
Usage example:
|
|
342
342
|
|
|
343
343
|
```bash
|
|
344
|
-
uv run
|
|
345
|
-
--policy thinky \
|
|
344
|
+
uv run cogames diagnose thinky \
|
|
346
345
|
--mission-set integrated_evals \
|
|
347
346
|
--cogs 4 \
|
|
348
|
-
--
|
|
347
|
+
--episodes 2
|
|
349
348
|
```
|
|
350
349
|
|
|
351
350
|
Recommendation: When designing new scorable baselines, combine one "shaping" variant (e.g., `HeartChorusVariant`,
|
|
@@ -193,49 +193,25 @@ uv run cogames play --mission hello_world.single_use_swarm --cogs 4 -p baseline
|
|
|
193
193
|
|
|
194
194
|
## Programmatic Evaluation
|
|
195
195
|
|
|
196
|
-
### Using
|
|
196
|
+
### Using cogames diagnose / cogames run
|
|
197
197
|
|
|
198
198
|
For systematic evaluation across multiple missions and configurations:
|
|
199
199
|
|
|
200
200
|
```bash
|
|
201
201
|
# Evaluate on integrated eval suite
|
|
202
|
-
uv run
|
|
203
|
-
--policy thinky \
|
|
202
|
+
uv run cogames diagnose thinky \
|
|
204
203
|
--mission-set integrated_evals \
|
|
205
204
|
--cogs 4 \
|
|
206
|
-
--
|
|
205
|
+
--episodes 2
|
|
207
206
|
|
|
208
|
-
# Evaluate specific agent
|
|
209
|
-
uv run
|
|
207
|
+
# Evaluate specific agent with structured output
|
|
208
|
+
uv run cogames run \
|
|
209
|
+
--mission-set integrated_evals \
|
|
210
210
|
--policy baseline \
|
|
211
|
-
--
|
|
212
|
-
--
|
|
211
|
+
--episodes 10 \
|
|
212
|
+
--format json
|
|
213
213
|
```
|
|
214
214
|
|
|
215
|
-
### Using in Curriculum Training
|
|
216
|
-
|
|
217
|
-
Both diagnostic and integrated missions can be used in curriculum training via `mission_variant_curriculum.py`:
|
|
218
|
-
|
|
219
|
-
```python
|
|
220
|
-
from recipes.experiment.cvc import mission_variant_curriculum
|
|
221
|
-
|
|
222
|
-
# Train on diagnostic missions
|
|
223
|
-
mission_variant_curriculum.train(
|
|
224
|
-
base_missions=["diagnostic_missions"],
|
|
225
|
-
num_cogs=4,
|
|
226
|
-
variants="all"
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
# Train on specific integrated missions
|
|
230
|
-
mission_variant_curriculum.train(
|
|
231
|
-
base_missions=["oxygen_bottleneck", "energy_starved"],
|
|
232
|
-
num_cogs=4,
|
|
233
|
-
variants=["pack_rat", "energized"]
|
|
234
|
-
)
|
|
235
|
-
```
|
|
236
|
-
|
|
237
|
-
---
|
|
238
|
-
|
|
239
215
|
## Design Philosophy
|
|
240
216
|
|
|
241
217
|
### Diagnostic Missions
|
|
@@ -37,11 +37,8 @@ class DifficultyLevel(CoGameMissionVariant):
|
|
|
37
37
|
name: str = Field(description="Difficulty name (easy, medium, hard, brutal, etc.)")
|
|
38
38
|
description: str = Field(description="What makes this difficulty challenging", default="")
|
|
39
39
|
|
|
40
|
-
#
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
# Absolute overrides (if set, ignore multipliers)
|
|
44
|
-
energy_regen_override: int | None = Field(default=None)
|
|
40
|
+
# Solar override (if set, overrides weather day/night deltas)
|
|
41
|
+
solar_override: int | None = Field(default=None)
|
|
45
42
|
move_energy_cost_override: int | None = Field(default=None)
|
|
46
43
|
energy_capacity_override: int | None = Field(default=None)
|
|
47
44
|
cargo_capacity_override: int | None = Field(default=None)
|
|
@@ -70,20 +67,20 @@ STANDARD = DifficultyLevel(
|
|
|
70
67
|
HARD = DifficultyLevel(
|
|
71
68
|
name="hard",
|
|
72
69
|
description="Minimal passive regen and higher move cost",
|
|
73
|
-
|
|
70
|
+
solar_override=1, # Minimal regen prevents deadlock
|
|
74
71
|
move_energy_cost_override=2,
|
|
75
72
|
)
|
|
76
73
|
|
|
77
74
|
SINGLE_USE = DifficultyLevel(
|
|
78
75
|
name="single_use",
|
|
79
76
|
description="Minimal regen - no second chances",
|
|
80
|
-
|
|
77
|
+
solar_override=1,
|
|
81
78
|
)
|
|
82
79
|
|
|
83
80
|
SPEED_RUN = DifficultyLevel(
|
|
84
81
|
name="speed_run",
|
|
85
82
|
description="Short clock, cheap movement",
|
|
86
|
-
|
|
83
|
+
solar_override=2,
|
|
87
84
|
move_energy_cost_override=1,
|
|
88
85
|
max_steps_override=600,
|
|
89
86
|
)
|
|
@@ -91,7 +88,7 @@ SPEED_RUN = DifficultyLevel(
|
|
|
91
88
|
ENERGY_CRISIS = DifficultyLevel(
|
|
92
89
|
name="energy_crisis",
|
|
93
90
|
description="Minimal passive regen - plan every move",
|
|
94
|
-
|
|
91
|
+
solar_override=1, # Minimal regen prevents deadlock
|
|
95
92
|
)
|
|
96
93
|
|
|
97
94
|
# Export variants for use with --variant CLI flag.
|
|
@@ -116,7 +113,7 @@ def list_difficulties() -> None:
|
|
|
116
113
|
print("=" * 80)
|
|
117
114
|
for diff in DIFFICULTY_VARIANTS:
|
|
118
115
|
print(f"\n{diff.name.upper()}: {diff.description}")
|
|
119
|
-
print(f"
|
|
116
|
+
print(f" Solar override: {diff.solar_override}")
|
|
120
117
|
|
|
121
118
|
|
|
122
119
|
if __name__ == "__main__":
|
cogames/cogs_vs_clips/mission.py
CHANGED
|
@@ -15,6 +15,7 @@ from cogames.cogs_vs_clips.stations import (
|
|
|
15
15
|
)
|
|
16
16
|
from cogames.cogs_vs_clips.team import CogTeam
|
|
17
17
|
from cogames.cogs_vs_clips.variants import NumCogsVariant
|
|
18
|
+
from cogames.cogs_vs_clips.weather import WeatherConfig
|
|
18
19
|
from cogames.core import (
|
|
19
20
|
MAP_MISSION_DELIMITER,
|
|
20
21
|
CoGameMission,
|
|
@@ -28,7 +29,7 @@ from mettagrid.config.action_config import (
|
|
|
28
29
|
NoopActionConfig,
|
|
29
30
|
)
|
|
30
31
|
from mettagrid.config.game_value import inv
|
|
31
|
-
from mettagrid.config.mettagrid_config import GameConfig, MettaGridConfig
|
|
32
|
+
from mettagrid.config.mettagrid_config import CollectiveConfig, GameConfig, MettaGridConfig
|
|
32
33
|
from mettagrid.config.obs_config import GlobalObsConfig, ObsConfig
|
|
33
34
|
from mettagrid.map_builder.map_builder import AnyMapBuilderConfig
|
|
34
35
|
|
|
@@ -46,6 +47,7 @@ class CvCMission(CoGameMission):
|
|
|
46
47
|
"""Mission configuration for CogsGuard game mode."""
|
|
47
48
|
|
|
48
49
|
max_steps: int = Field(default=10000)
|
|
50
|
+
total_junctions: int = Field(default=118, description="Total junctions on the map (for curriculum scaling)")
|
|
49
51
|
|
|
50
52
|
cog: CogConfig = Field(default_factory=lambda: CogConfig())
|
|
51
53
|
teams: dict[str, CogTeam] = Field(
|
|
@@ -55,6 +57,7 @@ class CvCMission(CoGameMission):
|
|
|
55
57
|
)
|
|
56
58
|
|
|
57
59
|
clips: ClipsConfig = Field(default_factory=lambda: ClipsConfig())
|
|
60
|
+
weather: WeatherConfig = Field(default_factory=lambda: WeatherConfig())
|
|
58
61
|
|
|
59
62
|
@property
|
|
60
63
|
def num_agents(self) -> int:
|
|
@@ -74,7 +77,7 @@ class CvCMission(CoGameMission):
|
|
|
74
77
|
Returns:
|
|
75
78
|
MettaGridConfig ready for environment creation
|
|
76
79
|
"""
|
|
77
|
-
|
|
80
|
+
team_objs = list(self.teams.values())
|
|
78
81
|
game = GameConfig(
|
|
79
82
|
map_builder=self.map_builder(),
|
|
80
83
|
max_steps=self.max_steps,
|
|
@@ -91,24 +94,40 @@ class CvCMission(CoGameMission):
|
|
|
91
94
|
noop=NoopActionConfig(),
|
|
92
95
|
change_vibe=ChangeVibeActionConfig(vibes=CvCConfig.VIBES),
|
|
93
96
|
),
|
|
94
|
-
|
|
95
|
-
|
|
97
|
+
agents=[
|
|
98
|
+
self.cog.agent_config(team=t.name, max_steps=self.max_steps)
|
|
99
|
+
for t in team_objs
|
|
100
|
+
for _ in range(t.num_agents)
|
|
101
|
+
],
|
|
96
102
|
objects={
|
|
97
103
|
"wall": CvCWallConfig().station_cfg(),
|
|
98
|
-
"hub": CvCHubConfig().station_cfg(team="cogs"),
|
|
99
104
|
"junction": CvCJunctionConfig().station_cfg(),
|
|
100
|
-
"chest": CvCChestConfig().station_cfg(team="cogs"),
|
|
101
105
|
**{
|
|
102
106
|
f"{resource}_extractor": CvCExtractorConfig(resource=resource).station_cfg()
|
|
103
107
|
for resource in CvCConfig.ELEMENTS
|
|
104
108
|
},
|
|
105
|
-
**{
|
|
109
|
+
**{
|
|
110
|
+
f"{t.short_name}:hub": CvCHubConfig().station_cfg(team=t.short_name, collective=t.name)
|
|
111
|
+
for t in team_objs
|
|
112
|
+
},
|
|
113
|
+
**{
|
|
114
|
+
f"{t.short_name}:chest": CvCChestConfig().station_cfg(team=t.short_name, collective=t.name)
|
|
115
|
+
for t in team_objs
|
|
116
|
+
},
|
|
117
|
+
**{
|
|
118
|
+
f"{t.short_name}:{g}": CvCGearStationConfig(gear_type=g).station_cfg(
|
|
119
|
+
team=t.short_name, collective=t.name
|
|
120
|
+
)
|
|
121
|
+
for t in team_objs
|
|
122
|
+
for g in CvCConfig.GEAR
|
|
123
|
+
},
|
|
106
124
|
},
|
|
107
125
|
collectives={
|
|
108
|
-
**{
|
|
109
|
-
|
|
126
|
+
**{t.name: t.collective_config() for t in team_objs},
|
|
127
|
+
**self.clips.collectives(),
|
|
128
|
+
"neutral": CollectiveConfig(name="neutral"),
|
|
110
129
|
},
|
|
111
|
-
events=self.
|
|
130
|
+
events=self._merge_events(),
|
|
112
131
|
)
|
|
113
132
|
|
|
114
133
|
env = MettaGridConfig(game=game)
|
|
@@ -122,3 +141,12 @@ class CvCMission(CoGameMission):
|
|
|
122
141
|
env.label += f".{variant.name}"
|
|
123
142
|
|
|
124
143
|
return env
|
|
144
|
+
|
|
145
|
+
def _merge_events(self) -> dict:
|
|
146
|
+
"""Merge clips and weather events, raising on key conflicts."""
|
|
147
|
+
clips_events = self.clips.events(max_steps=self.max_steps)
|
|
148
|
+
weather_events = self.weather.events(max_steps=self.max_steps)
|
|
149
|
+
overlap = set(clips_events) & set(weather_events)
|
|
150
|
+
if overlap:
|
|
151
|
+
raise ValueError(f"Overlapping event keys between clips and weather: {overlap}")
|
|
152
|
+
return {**clips_events, **weather_events}
|
|
@@ -50,7 +50,7 @@ def get_core_missions() -> list[CvCMission]:
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
def _build_eval_missions() -> list[CvCMission]:
|
|
53
|
-
from cogames.cogs_vs_clips.evals.integrated_evals import EVAL_MISSIONS as INTEGRATED_EVAL_MISSIONS
|
|
53
|
+
from cogames.cogs_vs_clips.evals.integrated_evals import EVAL_MISSIONS as INTEGRATED_EVAL_MISSIONS # noqa: PLC0415
|
|
54
54
|
|
|
55
55
|
return [
|
|
56
56
|
*INTEGRATED_EVAL_MISSIONS,
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Reward preset wiring for the CogsGuard (Cogs vs Clips) mission.
|
|
2
|
+
|
|
3
|
+
The mission has a single "true" objective signal, plus optional shaping variants.
|
|
4
|
+
Reward variants are stackable; each one adds additional shaping signals on top of the
|
|
5
|
+
mission's default objective rewards.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from typing import Literal, Sequence, cast
|
|
12
|
+
|
|
13
|
+
from mettagrid.config.game_value import stat
|
|
14
|
+
from mettagrid.config.mettagrid_config import MettaGridConfig
|
|
15
|
+
from mettagrid.config.reward_config import AgentReward, reward
|
|
16
|
+
|
|
17
|
+
CogsGuardRewardVariant = Literal[
|
|
18
|
+
"credit",
|
|
19
|
+
"milestones",
|
|
20
|
+
"no_objective",
|
|
21
|
+
"penalize_vibe_change",
|
|
22
|
+
"objective",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
AVAILABLE_REWARD_VARIANTS: tuple[CogsGuardRewardVariant, ...] = (
|
|
26
|
+
"objective",
|
|
27
|
+
"no_objective",
|
|
28
|
+
"milestones",
|
|
29
|
+
"credit",
|
|
30
|
+
"penalize_vibe_change",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
_OBJECTIVE_STAT_KEY = "aligned_junction_held"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _apply_milestones(rewards: dict[str, AgentReward], *, max_junctions: int = 100) -> None:
|
|
37
|
+
"""Add milestone shaping rewards onto an existing baseline.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
rewards: Rewards dict to modify in-place.
|
|
41
|
+
max_junctions: Maximum expected number of junctions for capping rewards.
|
|
42
|
+
Defaults to 100 as a reasonable upper bound for most maps.
|
|
43
|
+
"""
|
|
44
|
+
w_junction_aligned = 1.0
|
|
45
|
+
w_scramble_act = 0.5
|
|
46
|
+
w_align_act = 1.0
|
|
47
|
+
|
|
48
|
+
# Max caps based on expected junction counts
|
|
49
|
+
max_junction_aligned = w_junction_aligned * max_junctions
|
|
50
|
+
max_scramble = w_scramble_act * max_junctions
|
|
51
|
+
max_align = w_align_act * max_junctions
|
|
52
|
+
|
|
53
|
+
rewards["aligned_junctions"] = reward(
|
|
54
|
+
stat("collective.junction"),
|
|
55
|
+
weight=w_junction_aligned,
|
|
56
|
+
max=max_junction_aligned,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
rewards["junction_scrambled_by_agent"] = reward(
|
|
60
|
+
stat("junction.scrambled_by_agent"),
|
|
61
|
+
weight=w_scramble_act,
|
|
62
|
+
max=max_scramble,
|
|
63
|
+
)
|
|
64
|
+
rewards["junction_aligned_by_agent"] = reward(
|
|
65
|
+
stat("junction.aligned_by_agent"),
|
|
66
|
+
weight=w_align_act,
|
|
67
|
+
max=max_align,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _apply_penalize_vibe_change(rewards: dict[str, AgentReward]) -> None:
|
|
72
|
+
"""Add penalty for vibe changes to discourage spamming."""
|
|
73
|
+
w_vibe_change = -0.01
|
|
74
|
+
rewards["vibe_change_penalty"] = reward(stat("action.change_vibe.success"), weight=w_vibe_change)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _apply_credit(rewards: dict[str, AgentReward]) -> None:
|
|
78
|
+
"""Add dense precursor shaping rewards onto an existing baseline."""
|
|
79
|
+
w_heart = 0.05
|
|
80
|
+
cap_heart = 0.5
|
|
81
|
+
w_align_gear = 0.2
|
|
82
|
+
cap_align_gear = 0.4
|
|
83
|
+
w_scramble_gear = 0.2
|
|
84
|
+
cap_scramble_gear = 0.4
|
|
85
|
+
w_element_gain = 0.001
|
|
86
|
+
cap_element_gain = 0.1
|
|
87
|
+
|
|
88
|
+
# Stats rewards for gains as a single map
|
|
89
|
+
gain_rewards: dict[str, AgentReward] = {
|
|
90
|
+
"heart_gained": reward(stat("heart.gained"), weight=w_heart, max=cap_heart),
|
|
91
|
+
"aligner_gained": reward(stat("aligner.gained"), weight=w_align_gear, max=cap_align_gear),
|
|
92
|
+
"aligner_lost": reward(stat("aligner.lost"), weight=-w_align_gear, max=-cap_align_gear),
|
|
93
|
+
"scrambler_gained": reward(stat("scrambler.gained"), weight=w_scramble_gear, max=cap_scramble_gear),
|
|
94
|
+
"scrambler_lost": reward(stat("scrambler.lost"), weight=-w_scramble_gear, max=-cap_scramble_gear),
|
|
95
|
+
"carbon_gained": reward(stat("carbon.gained"), weight=w_element_gain, max=cap_element_gain),
|
|
96
|
+
"oxygen_gained": reward(stat("oxygen.gained"), weight=w_element_gain, max=cap_element_gain),
|
|
97
|
+
"germanium_gained": reward(stat("germanium.gained"), weight=w_element_gain, max=cap_element_gain),
|
|
98
|
+
"silicon_gained": reward(stat("silicon.gained"), weight=w_element_gain, max=cap_element_gain),
|
|
99
|
+
}
|
|
100
|
+
rewards.update(gain_rewards)
|
|
101
|
+
|
|
102
|
+
# Collective deposit rewards
|
|
103
|
+
w_deposit = 0.002
|
|
104
|
+
cap_deposit = 0.2
|
|
105
|
+
deposit_rewards: dict[str, AgentReward] = {
|
|
106
|
+
f"collective_{element}_deposited": reward(
|
|
107
|
+
stat(f"collective.{element}.deposited"), weight=w_deposit, max=cap_deposit
|
|
108
|
+
)
|
|
109
|
+
for element in ["carbon", "oxygen", "germanium", "silicon"]
|
|
110
|
+
}
|
|
111
|
+
rewards.update(deposit_rewards)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def apply_reward_variants(env: MettaGridConfig, *, variants: str | Sequence[str] | None = None) -> None:
|
|
115
|
+
"""Apply CogsGuard reward variants to `env`.
|
|
116
|
+
|
|
117
|
+
Variants are stackable:
|
|
118
|
+
- `objective`: no-op marker; keeps the mission's default objective reward wiring.
|
|
119
|
+
- `no_objective`: disables the objective stat reward (`junction.held`).
|
|
120
|
+
- `milestones`: adds shaped rewards for aligning/scrambling junctions and holding more junctions.
|
|
121
|
+
- `credit`: adds additional dense shaping for precursor behaviors (resources/gear/deposits).
|
|
122
|
+
- `penalize_vibe_change`: adds a penalty for vibe changes to discourage spamming.
|
|
123
|
+
"""
|
|
124
|
+
if not variants:
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# Parse JSON-encoded list strings (e.g., '["milestones"]' from sweeps)
|
|
128
|
+
if isinstance(variants, str):
|
|
129
|
+
if variants.startswith("["):
|
|
130
|
+
try:
|
|
131
|
+
parsed = json.loads(variants)
|
|
132
|
+
variant_names = list(parsed) if isinstance(parsed, list) else [variants]
|
|
133
|
+
except json.JSONDecodeError:
|
|
134
|
+
variant_names = [variants]
|
|
135
|
+
else:
|
|
136
|
+
variant_names = [variants]
|
|
137
|
+
else:
|
|
138
|
+
variant_names = list(variants)
|
|
139
|
+
|
|
140
|
+
reward_variants: list[CogsGuardRewardVariant] = []
|
|
141
|
+
for variant_name in variant_names:
|
|
142
|
+
if variant_name not in AVAILABLE_REWARD_VARIANTS:
|
|
143
|
+
available = ", ".join(AVAILABLE_REWARD_VARIANTS)
|
|
144
|
+
raise ValueError(f"Unknown Cogsguard reward variant '{variant_name}'. Available: {available}")
|
|
145
|
+
variant = cast(CogsGuardRewardVariant, variant_name)
|
|
146
|
+
if variant in reward_variants:
|
|
147
|
+
continue
|
|
148
|
+
reward_variants.append(variant)
|
|
149
|
+
|
|
150
|
+
enabled = set(reward_variants)
|
|
151
|
+
if enabled <= {"objective"}:
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# Start from the mission's existing objective baseline to preserve its scaling.
|
|
155
|
+
rewards = dict(env.game.agent.rewards)
|
|
156
|
+
|
|
157
|
+
if "no_objective" in enabled:
|
|
158
|
+
rewards.pop(_OBJECTIVE_STAT_KEY, None)
|
|
159
|
+
if "milestones" in enabled:
|
|
160
|
+
_apply_milestones(rewards)
|
|
161
|
+
if "credit" in enabled:
|
|
162
|
+
_apply_credit(rewards)
|
|
163
|
+
if "penalize_vibe_change" in enabled:
|
|
164
|
+
_apply_penalize_vibe_change(rewards)
|
|
165
|
+
|
|
166
|
+
env.game.agent.rewards = rewards
|
|
167
|
+
|
|
168
|
+
# Deterministic label suffix order (exclude "objective").
|
|
169
|
+
for variant in AVAILABLE_REWARD_VARIANTS:
|
|
170
|
+
if variant == "objective":
|
|
171
|
+
continue
|
|
172
|
+
if variant in enabled:
|
|
173
|
+
env.label += f".{variant}"
|
cogames/cogs_vs_clips/sites.py
CHANGED
|
@@ -72,16 +72,17 @@ MACHINA_1 = CoGameSite(
|
|
|
72
72
|
|
|
73
73
|
def _cogsguard_hub_config() -> BaseHubConfig:
|
|
74
74
|
return BaseHubConfig(
|
|
75
|
+
hub_object="c:hub",
|
|
75
76
|
corner_bundle="extractors",
|
|
76
77
|
cross_bundle="none",
|
|
77
78
|
cross_distance=7,
|
|
78
79
|
junction_object="junction",
|
|
80
|
+
heart_chest_object="c:chest",
|
|
79
81
|
stations=[
|
|
80
|
-
"
|
|
81
|
-
"
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
"chest",
|
|
82
|
+
"c:aligner",
|
|
83
|
+
"c:scrambler",
|
|
84
|
+
"c:miner",
|
|
85
|
+
"c:scout",
|
|
85
86
|
],
|
|
86
87
|
)
|
|
87
88
|
|
|
@@ -82,6 +82,7 @@ class CvCJunctionConfig(CvCStationConfig):
|
|
|
82
82
|
def station_cfg(self, team: Optional[str] = None) -> GridObjectConfig:
|
|
83
83
|
return GridObjectConfig(
|
|
84
84
|
name="junction",
|
|
85
|
+
render_name="junction",
|
|
85
86
|
render_symbol="📦",
|
|
86
87
|
collective=team,
|
|
87
88
|
aoes={
|
|
@@ -121,11 +122,12 @@ class CvCHubConfig(CvCStationConfig):
|
|
|
121
122
|
attack_deltas: dict[str, int] = Field(default_factory=lambda: {"hp": -1, "influence": -100})
|
|
122
123
|
elements: list[str] = Field(default_factory=lambda: CvCConfig.ELEMENTS)
|
|
123
124
|
|
|
124
|
-
def station_cfg(self, team: str) -> GridObjectConfig:
|
|
125
|
+
def station_cfg(self, team: str, collective: str | None = None) -> GridObjectConfig:
|
|
125
126
|
return GridObjectConfig(
|
|
126
|
-
name="hub",
|
|
127
|
+
name=f"{team}:hub",
|
|
128
|
+
render_name="hub",
|
|
127
129
|
render_symbol="📦",
|
|
128
|
-
collective=team,
|
|
130
|
+
collective=collective or team,
|
|
129
131
|
aoes={
|
|
130
132
|
"influence": AOEConfig(
|
|
131
133
|
radius=self.aoe_range,
|
|
@@ -152,11 +154,12 @@ class CvCChestConfig(CvCStationConfig):
|
|
|
152
154
|
|
|
153
155
|
heart_cost: dict[str, int] = Field(default_factory=lambda: CvCConfig.HEART_COST)
|
|
154
156
|
|
|
155
|
-
def station_cfg(self, team: str) -> GridObjectConfig:
|
|
157
|
+
def station_cfg(self, team: str, collective: str | None = None) -> GridObjectConfig:
|
|
156
158
|
return GridObjectConfig(
|
|
157
|
-
name="chest",
|
|
159
|
+
name=f"{team}:chest",
|
|
160
|
+
render_name="chest",
|
|
158
161
|
render_symbol="📦",
|
|
159
|
-
collective=team,
|
|
162
|
+
collective=collective or team,
|
|
160
163
|
on_use_handlers={
|
|
161
164
|
"get_heart": Handler(
|
|
162
165
|
filters=[isAlignedToActor(), targetCollectiveHas({"heart": 1})],
|
|
@@ -180,12 +183,13 @@ class CvCGearStationConfig(CvCStationConfig):
|
|
|
180
183
|
gear_costs: dict[str, dict[str, int]] = Field(default_factory=lambda: CvCConfig.GEAR_COSTS)
|
|
181
184
|
gear_symbols: dict[str, str] = Field(default_factory=lambda: CvCConfig.GEAR_SYMBOLS)
|
|
182
185
|
|
|
183
|
-
def station_cfg(self, team: str) -> GridObjectConfig:
|
|
186
|
+
def station_cfg(self, team: str, collective: str | None = None) -> GridObjectConfig:
|
|
184
187
|
cost = self.gear_costs.get(self.gear_type, {})
|
|
185
188
|
return GridObjectConfig(
|
|
186
|
-
name=f"{self.gear_type}
|
|
189
|
+
name=f"{team}:{self.gear_type}",
|
|
190
|
+
render_name=f"{self.gear_type}_station",
|
|
187
191
|
render_symbol=self.gear_symbols[self.gear_type],
|
|
188
|
-
collective=team,
|
|
192
|
+
collective=collective or team,
|
|
189
193
|
on_use_handlers={
|
|
190
194
|
"keep_gear": Handler(
|
|
191
195
|
filters=[isAlignedToActor(), actorHas({self.gear_type: 1})],
|
cogames/cogs_vs_clips/team.py
CHANGED
|
@@ -17,7 +17,8 @@ from mettagrid.config.mettagrid_config import (
|
|
|
17
17
|
class CogTeam(Config):
|
|
18
18
|
"""Configuration for a cogs team."""
|
|
19
19
|
|
|
20
|
-
name: str = Field(default="cogs", description="Team name")
|
|
20
|
+
name: str = Field(default="cogs", description="Team name used for collectives and alignment")
|
|
21
|
+
short_name: str = Field(default="c", description="Short prefix used for map object names")
|
|
21
22
|
wealth: int = Field(default=1, description="Wealth multiplier for initial resources")
|
|
22
23
|
num_agents: int = Field(default=8, ge=1, description="Number of agents in the team")
|
|
23
24
|
|
|
@@ -28,6 +29,7 @@ class CogTeam(Config):
|
|
|
28
29
|
CollectiveConfig with resource limits and initial inventory.
|
|
29
30
|
"""
|
|
30
31
|
return CollectiveConfig(
|
|
32
|
+
name=self.name,
|
|
31
33
|
inventory=InventoryConfig(
|
|
32
34
|
limits={
|
|
33
35
|
"resources": ResourceLimitsConfig(min=10000, resources=CvCConfig.ELEMENTS),
|