PyPI - cogames - Versions diffs - 0.3.49__py3-none-any.whl → 0.3.64__py3-none-any.whl - Mend

cogames 0.3.49py3-none-any.whl → 0.3.64py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

cogames/cli/client.py +60 -6
cogames/cli/docsync/__init__.py +0 -0
cogames/cli/docsync/_nb_md_directive_processing.py +180 -0
cogames/cli/docsync/_nb_md_sync.py +103 -0
cogames/cli/docsync/_nb_py_sync.py +122 -0
cogames/cli/docsync/_three_way_sync.py +115 -0
cogames/cli/docsync/_utils.py +76 -0
cogames/cli/docsync/docsync.py +156 -0
cogames/cli/leaderboard.py +112 -28
cogames/cli/mission.py +64 -53
cogames/cli/policy.py +46 -10
cogames/cli/submit.py +268 -67
cogames/cogs_vs_clips/cog.py +79 -0
cogames/cogs_vs_clips/cogs_vs_clips_mapgen.md +19 -16
cogames/cogs_vs_clips/cogsguard_reward_variants.py +153 -0
cogames/cogs_vs_clips/cogsguard_tutorial.py +56 -0
cogames/cogs_vs_clips/evals/README.md +10 -16
cogames/cogs_vs_clips/evals/cogsguard_evals.py +81 -0
cogames/cogs_vs_clips/evals/diagnostic_evals.py +49 -444
cogames/cogs_vs_clips/evals/difficulty_variants.py +13 -326
cogames/cogs_vs_clips/evals/integrated_evals.py +5 -45
cogames/cogs_vs_clips/evals/spanning_evals.py +9 -180
cogames/cogs_vs_clips/mission.py +187 -146
cogames/cogs_vs_clips/missions.py +46 -137
cogames/cogs_vs_clips/procedural.py +8 -8
cogames/cogs_vs_clips/sites.py +107 -3
cogames/cogs_vs_clips/stations.py +198 -186
cogames/cogs_vs_clips/tutorial_missions.py +1 -1
cogames/cogs_vs_clips/variants.py +25 -476
cogames/device.py +13 -1
cogames/{policy/scripted_agent/README.md → docs/SCRIPTED_AGENT.md} +82 -58
cogames/evaluate.py +18 -30
cogames/main.py +1434 -243
cogames/maps/canidate1_1000.map +1 -1
cogames/maps/canidate1_1000_stations.map +2 -2
cogames/maps/canidate1_500.map +1 -1
cogames/maps/canidate1_500_stations.map +2 -2
cogames/maps/canidate2_1000.map +1 -1
cogames/maps/canidate2_1000_stations.map +2 -2
cogames/maps/canidate2_500.map +1 -1
cogames/maps/canidate2_500_stations.map +2 -2
cogames/maps/canidate3_1000.map +1 -1
cogames/maps/canidate3_1000_stations.map +2 -2
cogames/maps/canidate3_500.map +1 -1
cogames/maps/canidate3_500_stations.map +2 -2
cogames/maps/canidate4_500.map +1 -1
cogames/maps/canidate4_500_stations.map +2 -2
cogames/maps/cave_base_50.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_agile.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_agile_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_charge_up.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_charge_up_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_navigation1.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_navigation1_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_navigation2.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_navigation2_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_navigation3.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_navigation3_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_near.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_search.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_chest_search_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_extract_lab.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_extract_lab_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_memory.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_memory_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_radial.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_radial_hard.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_resource_lab.map +2 -2
cogames/maps/diagnostic_evals/diagnostic_unclip.map +2 -2
cogames/maps/evals/eval_balanced_spread.map +9 -5
cogames/maps/evals/eval_clip_oxygen.map +9 -5
cogames/maps/evals/eval_collect_resources.map +9 -5
cogames/maps/evals/eval_collect_resources_hard.map +9 -5
cogames/maps/evals/eval_collect_resources_medium.map +9 -5
cogames/maps/evals/eval_divide_and_conquer.map +9 -5
cogames/maps/evals/eval_energy_starved.map +9 -5
cogames/maps/evals/eval_multi_coordinated_collect_hard.map +9 -5
cogames/maps/evals/eval_oxygen_bottleneck.map +9 -5
cogames/maps/evals/eval_single_use_world.map +9 -5
cogames/maps/evals/extractor_hub_100x100.map +9 -5
cogames/maps/evals/extractor_hub_30x30.map +9 -5
cogames/maps/evals/extractor_hub_50x50.map +9 -5
cogames/maps/evals/extractor_hub_70x70.map +9 -5
cogames/maps/evals/extractor_hub_80x80.map +9 -5
cogames/maps/machina_100_stations.map +2 -2
cogames/maps/machina_200_stations.map +2 -2
cogames/maps/machina_200_stations_small.map +2 -2
cogames/maps/machina_eval_exp01.map +2 -2
cogames/maps/machina_eval_template_large.map +2 -2
cogames/maps/machinatrainer4agents.map +2 -2
cogames/maps/machinatrainer4agentsbase.map +2 -2
cogames/maps/machinatrainerbig.map +2 -2
cogames/maps/machinatrainersmall.map +2 -2
cogames/maps/planky_evals/aligner_avoid_aoe.map +28 -0
cogames/maps/planky_evals/aligner_full_cycle.map +28 -0
cogames/maps/planky_evals/aligner_gear.map +24 -0
cogames/maps/planky_evals/aligner_hearts.map +24 -0
cogames/maps/planky_evals/aligner_junction.map +26 -0
cogames/maps/planky_evals/exploration_distant.map +28 -0
cogames/maps/planky_evals/maze.map +32 -0
cogames/maps/planky_evals/miner_best_resource.map +26 -0
cogames/maps/planky_evals/miner_deposit.map +24 -0
cogames/maps/planky_evals/miner_extract.map +26 -0
cogames/maps/planky_evals/miner_full_cycle.map +28 -0
cogames/maps/planky_evals/miner_gear.map +24 -0
cogames/maps/planky_evals/multi_role.map +28 -0
cogames/maps/planky_evals/resource_chain.map +30 -0
cogames/maps/planky_evals/scout_explore.map +32 -0
cogames/maps/planky_evals/scout_gear.map +24 -0
cogames/maps/planky_evals/scrambler_full_cycle.map +28 -0
cogames/maps/planky_evals/scrambler_gear.map +24 -0
cogames/maps/planky_evals/scrambler_target.map +26 -0
cogames/maps/planky_evals/stuck_corridor.map +32 -0
cogames/maps/planky_evals/survive_retreat.map +26 -0
cogames/maps/training_facility_clipped.map +2 -2
cogames/maps/training_facility_open_1.map +2 -2
cogames/maps/training_facility_open_2.map +2 -2
cogames/maps/training_facility_open_3.map +2 -2
cogames/maps/training_facility_tight_4.map +2 -2
cogames/maps/training_facility_tight_5.map +2 -2
cogames/maps/vanilla_large.map +2 -2
cogames/maps/vanilla_small.map +2 -2
cogames/pickup.py +183 -0
cogames/play.py +166 -33
cogames/policy/chaos_monkey.py +54 -0
cogames/policy/nim_agents/__init__.py +27 -10
cogames/policy/nim_agents/agents.py +121 -60
cogames/policy/nim_agents/thinky_eval.py +35 -222
cogames/policy/pufferlib_policy.py +67 -32
cogames/policy/starter_agent.py +184 -0
cogames/policy/trainable_policy_template.py +4 -1
cogames/train.py +51 -13
cogames/verbose.py +2 -2
cogames-0.3.64.dist-info/METADATA +1842 -0
cogames-0.3.64.dist-info/RECORD +159 -0
cogames-0.3.64.dist-info/licenses/LICENSE +21 -0
cogames-0.3.64.dist-info/top_level.txt +2 -0
metta_alo/__init__.py +0 -0
metta_alo/job_specs.py +17 -0
metta_alo/policy.py +16 -0
metta_alo/pure_single_episode_runner.py +75 -0
metta_alo/py.typed +0 -0
metta_alo/rollout.py +322 -0
metta_alo/scoring.py +168 -0
cogames/maps/diagnostic_evals/diagnostic_assembler_near.map +0 -49
cogames/maps/diagnostic_evals/diagnostic_assembler_search.map +0 -49
cogames/maps/diagnostic_evals/diagnostic_assembler_search_hard.map +0 -89
cogames/policy/nim_agents/common.nim +0 -887
cogames/policy/nim_agents/install.sh +0 -1
cogames/policy/nim_agents/ladybug_agent.nim +0 -984
cogames/policy/nim_agents/nim_agents.nim +0 -55
cogames/policy/nim_agents/nim_agents.nims +0 -14
cogames/policy/nim_agents/nimby.lock +0 -3
cogames/policy/nim_agents/racecar_agents.nim +0 -884
cogames/policy/nim_agents/random_agents.nim +0 -68
cogames/policy/nim_agents/test_agents.py +0 -53
cogames/policy/nim_agents/thinky_agents.nim +0 -717
cogames/policy/scripted_agent/baseline_agent.py +0 -1049
cogames/policy/scripted_agent/demo_policy.py +0 -244
cogames/policy/scripted_agent/pathfinding.py +0 -126
cogames/policy/scripted_agent/starter_agent.py +0 -136
cogames/policy/scripted_agent/types.py +0 -235
cogames/policy/scripted_agent/unclipping_agent.py +0 -476
cogames/policy/scripted_agent/utils.py +0 -385
cogames-0.3.49.dist-info/METADATA +0 -406
cogames-0.3.49.dist-info/RECORD +0 -136
cogames-0.3.49.dist-info/top_level.txt +0 -1
{cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/WHEEL +0 -0
{cogames-0.3.49.dist-info → cogames-0.3.64.dist-info}/entry_points.txt +0 -0

cogames/policy/nim_agents/thinky_eval.py CHANGED Viewed

@@ -1,233 +1,46 @@
-# much simpler evaluator for thinky agents.
+"""Legacy module path for Thinky eval helpers.
-from __future__ import annotations
-import time
-from typing import Dict, List, Tuple
-import cogames.policy.nim_agents.agents as na
-from cogames.cli.utils import suppress_noisy_logs
-from cogames.cogs_vs_clips.evals.diagnostic_evals import DIAGNOSTIC_EVALS
-from cogames.cogs_vs_clips.mission import Mission, NumCogsVariant
-from mettagrid.policy.loader import initialize_or_load_policy
-from mettagrid.policy.policy import PolicySpec
-from mettagrid.policy.policy_env_interface import PolicyEnvInterface
-from mettagrid.simulator.rollout import Rollout
-# Agent to evaluate
-AGENT_PATH = "cogames.policy.nim_agents.agents.ThinkyAgentsMultiPolicy"
-# Defaults (keep simple)
-NUM_COGS = 4
-MAX_STEPS = 10000
-SEED = 42
-# Add/modify your evals here over time
-EVALS: List[Tuple[str, str, int]] = [
-    # Regular evals
-    (
-        "energy_starved",
-        "buggy",
-        NUM_COGS,
-    ),  # E is very hard, max E is 256, but agents think its 100.
-    ("oxygen_bottleneck", "", NUM_COGS),
-    ("collect_resources_classic", "", NUM_COGS),
-    ("collect_resources_spread", "", NUM_COGS),
-    ("collect_far", "", NUM_COGS),
-    ("divide_and_conquer", "", NUM_COGS),
-    ("go_together", "", NUM_COGS),
-    ("single_use_swarm", "flakey", NUM_COGS),
-    # Diagnostic evals
-    ("diagnostic_chest_navigation1", "", 1),
-    ("diagnostic_chest_navigation2", "", 1),
-    ("diagnostic_chest_navigation3", "", 1),
-    ("diagnostic_chest_deposit_near", "", 1),
-    ("diagnostic_chest_deposit_search", "", 1),
-    ("diagnostic_charge_up", "buggy", 1),  # The cog needs to sacrifice itself to make hart.
-    ("diagnostic_memory", "", 1),
-    ("diagnostic_assemble_seeded_near", "", 1),
-    ("diagnostic_assemble_seeded_search", "", 1),
-    ("diagnostic_extract_missing_carbon", "", 1),
-    ("diagnostic_extract_missing_oxygen", "", 1),
-    ("diagnostic_extract_missing_germanium", "", 1),
-    ("diagnostic_extract_missing_silicon", "", 1),
-    ("diagnostic_unclip_craft", "", 1),
-    ("diagnostic_unclip_preseed", "", 1),
-    ("diagnostic_agile", "", 1),
-    ("diagnostic_radial", "", 1),
-    # Hello World evals
-    ("distant_resources", "buggy", NUM_COGS),  # Not enough time for such distances.
-    ("quadrant_buildings", "buggy", NUM_COGS),  # Not enough charger for such distances.
-    ("vibe_check", "", NUM_COGS),
-    ("oxygen_bottleneck_easy", "", NUM_COGS),
-    ("oxygen_bottleneck_standard", "", NUM_COGS),
-    ("oxygen_bottleneck_hard", "buggy", NUM_COGS),  # Not enough charger for such distances.
-    ("energy_starved_easy", "", NUM_COGS),
-    ("energy_starved_standard", "buggy", NUM_COGS),  # E drain too high.
-    ("energy_starved_hard", "buggy", NUM_COGS),  # E drain too high.
-    ("unclipping_easy", "n/a", NUM_COGS),
-    ("unclipping_standard", "n/a", NUM_COGS),
-    ("unclipping_hard", "n/a", NUM_COGS),
-    ("distant_resources_easy", "", NUM_COGS),
-    ("distant_resources_standard", "flakey", NUM_COGS),  # Not enough time for such distances.
-    ("distant_resources_hard", "buggy", NUM_COGS),  # Not enough time for such distances.
-    ("quadrant_buildings_easy", "", NUM_COGS),
-    ("quadrant_buildings_standard", "buggy", NUM_COGS),  # Not enough charger for such distances.
-    ("quadrant_buildings_hard", "buggy", NUM_COGS),  # Not enough charger for such distances.
-    ("single_use_swarm_easy", "buggy", NUM_COGS),
-    ("single_use_swarm_standard", "buggy", NUM_COGS),  # Not enough time for such distances.
-    ("single_use_swarm_hard", "buggy", NUM_COGS),  # E drain too high.
-    ("vibe_check_easy", "buggy", NUM_COGS),  # No/invalid recipes available.
-    ("vibe_check_standard", "", NUM_COGS),
-    ("vibe_check_hard", "flakey", NUM_COGS),  # Not enough time for such distances.
-    # Hearts evals
-    ("easy_large_hearts", "slow", NUM_COGS),
-    ("easy_medium_hearts", "", NUM_COGS),
-    ("easy_small_hearts", "flakey", NUM_COGS),
-    # Missions from missions.py
-    ("harvest", "", NUM_COGS),
-    ("repair", "", 2),  # repair uses 2 cogs
-    ("hello_world_unclip", "", NUM_COGS),
-]
-def _load_all_missions() -> Dict[str, Mission]:
-    # Minimal loader: merge all known mission sets
-    from importlib import import_module
-    missions: List[Mission] = []
-    for mod_name in (
-        "cogames.cogs_vs_clips.evals.eval_missions",
-        "cogames.cogs_vs_clips.evals.integrated_evals",
-        "cogames.cogs_vs_clips.evals.spanning_evals",
-        "cogames.cogs_vs_clips.missions",
-    ):
-        try:
-            mod = import_module(mod_name)
-            # missions.py uses MISSIONS, others use EVAL_MISSIONS
-            eval_list = getattr(mod, "MISSIONS", getattr(mod, "EVAL_MISSIONS", []))
-            missions.extend(eval_list)
-        except Exception:
-            pass
+The implementation moved to `cogames_agents.policy.nim_agents.thinky_eval`.
+"""
-    # Diagnostic evals are a list of classes; instantiate them
-    try:
-        missions.extend([cls() for cls in DIAGNOSTIC_EVALS])  # type: ignore[misc]
-    except Exception:
-        pass
-    # Build name -> mission instance map
-    mission_map: Dict[str, Mission] = {}
-    for m in missions:
-        # Items in EVAL_MISSIONS may be classes or instances; normalize to instances
-        try:
-            mission: Mission = m() if isinstance(m, type) else m  # type: ignore[call-arg,assignment]
-        except Exception:
-            continue
-        mission_map[mission.name] = mission
-    return mission_map
-def _ensure_vibe_supports_gear(env_cfg) -> None:
-    # Keep minimal and silent if anything fails
-    try:
-        assembler = env_cfg.game.objects.get("assembler")
-        uses_gear = False
-        if assembler is not None and hasattr(assembler, "protocols"):
-            for proto in assembler.protocols:
-                if any(v == "gear" for v in getattr(proto, "vibes", [])):
-                    uses_gear = True
-                    break
-        if uses_gear:
-            change_vibe = env_cfg.game.actions.change_vibe
-            has_gear = any(v.name == "gear" for v in change_vibe.vibes)
-            if not has_gear:
-                from mettagrid.config.vibes import VIBE_BY_NAME
-                change_vibe.vibes = list(change_vibe.vibes) + [VIBE_BY_NAME["gear"]]
-    except Exception:
-        pass
-def run_eval(experiment_name: str, tag: str, mission_map: Dict[str, Mission], num_cogs: int, seed: int) -> float:
-    start = time.perf_counter()
-    try:
-        if experiment_name not in mission_map:
-            print(f"{tag:<6} {experiment_name:<40} {'MISSION NOT FOUND':>6}")
-            return 0.0
-        base_mission = mission_map[experiment_name]
-        mission = base_mission.with_variants([NumCogsVariant(num_cogs=num_cogs)])
+from __future__ import annotations
-        env_cfg = mission.make_env()
-        _ensure_vibe_supports_gear(env_cfg)
-        env_cfg.game.max_steps = MAX_STEPS
+import importlib
+from typing import Any
-        # Create policy and rollout
-        pei = PolicyEnvInterface.from_mg_cfg(env_cfg)
-        policy = initialize_or_load_policy(
-            pei,
-            PolicySpec(class_path=AGENT_PATH, data_path=None),
-        )
-        agent_policies = [policy.agent_policy(i) for i in range(num_cogs)]
+try:
+    _thinky_eval = importlib.import_module("cogames_agents.policy.nim_agents.thinky_eval")
+except ModuleNotFoundError as exc:
+    if exc.name and (exc.name == "cogames_agents" or exc.name.startswith("cogames_agents.")):
+        raise ModuleNotFoundError(
+            "Legacy import `cogames.policy.nim_agents.thinky_eval` requires optional dependency "
+            "`cogames-agents` (install `cogames[agents]`)."
+        ) from exc
+    raise
-        rollout = Rollout(
-            env_cfg,
-            agent_policies,
-            render_mode="none",
-            seed=seed,
-        )
-        rollout.run_until_done()
+AGENT_PATH = _thinky_eval.AGENT_PATH
+EVALS = _thinky_eval.EVALS
+MAX_STEPS = _thinky_eval.MAX_STEPS
+NUM_COGS = _thinky_eval.NUM_COGS
+SEED = _thinky_eval.SEED
+main = _thinky_eval.main
+run_eval = _thinky_eval.run_eval
-        total_reward = float(sum(rollout._sim.episode_rewards))
-        hearts_per_agent = total_reward / max(1, num_cogs)
-        elapsed = time.perf_counter() - start
-        # One simple line per eval
-        hpa = f"{hearts_per_agent:.2f}"
-        tm = f"{elapsed:.2f}"
-        print(f"{tag:<6} {experiment_name:<40} {hpa:>6}h {tm:>6}s")
-        return hearts_per_agent
-    except Exception as e:
-        elapsed = time.perf_counter() - start
-        error_message = str(e)
-        print(f"{tag:<6} {experiment_name:<40} {error_message}")
-        return 0.0
+def __getattr__(name: str) -> Any:
+    return getattr(_thinky_eval, name)
-def main() -> None:
-    suppress_noisy_logs()
-    na.start_measure()
-    mission_map = _load_all_missions()
-    print(f"Loaded {len(mission_map)} missions")
-    print("tag .. map name ............................... harts/A .. time")
-    start = time.perf_counter()
-    total_hpa = 0.0
-    successful_evals = 0
-    num_evals = 0
-    for experiment_name, tag, num_cogs in EVALS:
-        num_evals += 1
-        if tag == "flakey":
-            for i in range(10):
-                hpa = run_eval(experiment_name, tag, mission_map, num_cogs, SEED + i)
-                if hpa > 0:
-                    successful_evals += 1
-                    total_hpa += hpa
-                    break
-        else:
-            hpa = run_eval(experiment_name, tag, mission_map, num_cogs, SEED)
-            if hpa > 0:
-                successful_evals += 1
-                total_hpa += hpa
-    success_rate = successful_evals / num_evals
-    elapsed = time.perf_counter() - start
-    total_evals = f"{num_evals} evals {success_rate * 100:.1f}% successful"
-    hpa = f"{total_hpa:.2f}"
-    tm = f"{elapsed:.2f}"
-    tag = "total"
-    print(f"{tag:<6} {total_evals:<40} {hpa:>6}h {tm:>6}s")
-    na.end_measure()
+def __dir__() -> list[str]:
+    return sorted(set(globals()).union(dir(_thinky_eval)))
-if __name__ == "__main__":
-    main()
+__all__ = [
+    "AGENT_PATH",
+    "EVALS",
+    "MAX_STEPS",
+    "NUM_COGS",
+    "SEED",
+    "main",
+    "run_eval",
+]

cogames/policy/pufferlib_policy.py CHANGED Viewed

@@ -15,8 +15,9 @@ import torch
 import pufferlib.models  # type: ignore[import-untyped]
 import pufferlib.pytorch  # type: ignore[import-untyped]
-from mettagrid.policy.policy import AgentPolicy, MultiAgentPolicy
+from mettagrid.policy.policy import AgentPolicy, MultiAgentPolicy, StatefulAgentPolicy
 from mettagrid.policy.policy_env_interface import PolicyEnvInterface
+from mettagrid.policy.pufferlib import PufferlibStatefulImpl
 from mettagrid.simulator import Action, AgentObservation, Simulation
@@ -24,7 +25,7 @@ class PufferlibCogsPolicy(MultiAgentPolicy, AgentPolicy):
     """Loads and runs checkpoints trained with PufferLib's CoGames policy.
     This policy serves as both the MultiAgentPolicy factory and AgentPolicy
-    implementation, returning itself from agent_policy().
+    implementation, returning per-agent wrappers that track state.
     """
     short_names = ["pufferlib_cogs"]
@@ -38,64 +39,98 @@ class PufferlibCogsPolicy(MultiAgentPolicy, AgentPolicy):
     ):
         MultiAgentPolicy.__init__(self, policy_env_info, device=device)
         AgentPolicy.__init__(self, policy_env_info)
-        shim_env = SimpleNamespace(
+        self._hidden_size = hidden_size
+        self._device = torch.device(device)
+        self._shim_env = SimpleNamespace(
             single_observation_space=policy_env_info.observation_space,
             single_action_space=policy_env_info.action_space,
             observation_space=policy_env_info.observation_space,
             action_space=policy_env_info.action_space,
             num_agents=policy_env_info.num_agents,
         )
-        shim_env.env = shim_env
-        self._net = pufferlib.models.Default(shim_env, hidden_size=hidden_size)  # type: ignore[arg-type]
-        self._net = self._net.to(torch.device(device))
+        self._shim_env.env = self._shim_env
+        self._net = pufferlib.models.Default(self._shim_env, hidden_size=hidden_size).to(self._device)  # type: ignore[arg-type]
         self._action_names = policy_env_info.action_names
-        self._num_tokens, self._token_dim = policy_env_info.observation_space.shape
-        self._device = next(self._net.parameters()).device
+        self._is_recurrent = False
+        self._stateful_impl = PufferlibStatefulImpl(
+            self._net,
+            policy_env_info,
+            self._device,
+            is_recurrent=self._is_recurrent,
+        )
+        self._agent_policies: dict[int, StatefulAgentPolicy[dict[str, torch.Tensor | None]]] = {}
+        self._state_initialized = False
+        self._state: dict[str, torch.Tensor | None] = {}
     def network(self) -> torch.nn.Module:  # type: ignore[override]
         return self._net
     def agent_policy(self, agent_id: int) -> AgentPolicy:  # type: ignore[override]
-        return self
+        if agent_id not in self._agent_policies:
+            self._agent_policies[agent_id] = StatefulAgentPolicy(
+                self._stateful_impl,
+                self._policy_env_info,
+                agent_id=agent_id,
+            )
+        return self._agent_policies[agent_id]
     def is_recurrent(self) -> bool:
-        return False
+        return self._is_recurrent
     def reset(self, simulation: Optional[Simulation] = None) -> None:  # type: ignore[override]
-        # No internal state to reset; signature satisfies AgentPolicy and MultiAgentPolicy
-        return None
+        for policy in self._agent_policies.values():
+            policy.reset(simulation)
+        self._reset_state()
     def load_policy_data(self, policy_data_path: str) -> None:
-        state = torch.load(policy_data_path, map_location=next(self._net.parameters()).device)
-        self._net.load_state_dict(state)
-        self._net = self._net.to(next(self._net.parameters()).device)
+        state = torch.load(policy_data_path, map_location=self._device)
+        state = {k.replace("module.", ""): v for k, v in state.items()}
+        uses_rnn = any(key.startswith(("lstm.", "cell.")) for key in state)
+        base_net = pufferlib.models.Default(self._shim_env, hidden_size=self._hidden_size)  # type: ignore[arg-type]
+        net = (
+            pufferlib.models.LSTMWrapper(
+                self._shim_env,
+                base_net,
+                input_size=base_net.hidden_size,
+                hidden_size=base_net.hidden_size,
+            )
+            if uses_rnn
+            else base_net
+        )
+        net.load_state_dict(state)
+        self._net = net.to(self._device)
+        self._is_recurrent = uses_rnn
+        self._stateful_impl = PufferlibStatefulImpl(
+            self._net,
+            self._policy_env_info,
+            self._device,
+            is_recurrent=self._is_recurrent,
+        )
+        self._agent_policies.clear()
+        self._state_initialized = False
+        self._state = {}
     def save_policy_data(self, policy_data_path: str) -> None:
         torch.save(self._net.state_dict(), policy_data_path)
     def step(self, obs: Union[AgentObservation, torch.Tensor, Sequence[Any]]) -> Action:  # type: ignore[override]
         if isinstance(obs, AgentObservation):
-            obs_tensor = torch.full(
-                (self._num_tokens, self._token_dim),
-                fill_value=255.0,
-                device=self._device,
-                dtype=torch.float32,
-            )
-            for idx, token in enumerate(obs.tokens):
-                if idx >= self._num_tokens:
-                    break
-                raw = torch.as_tensor(token.raw_token, device=self._device, dtype=obs_tensor.dtype)
-                obs_tensor[idx, : raw.numel()] = raw
-        else:
-            obs_tensor = torch.as_tensor(obs, device=self._device, dtype=torch.float32)
-        obs_tensor = obs_tensor * (1.0 / 255.0)
+            if not self._state_initialized:
+                self._reset_state()
+            with torch.no_grad():
+                action, self._state = self._stateful_impl.step_with_state(obs, self._state)
+            return action
+        obs_tensor = torch.as_tensor(obs, device=self._device, dtype=torch.float32)
         if obs_tensor.ndim == 2:
             obs_tensor = obs_tensor.unsqueeze(0)
         with torch.no_grad():
             self._net.eval()
-            logits, _ = self._net.forward_eval(obs_tensor)
+            logits, _ = self._net.forward_eval(obs_tensor, None)
             sampled, _, _ = pufferlib.pytorch.sample_logits(logits)
         action_idx = max(0, min(int(sampled.item()), len(self._action_names) - 1))
         return Action(name=self._action_names[action_idx])
+    def _reset_state(self) -> None:
+        self._stateful_impl.reset()
+        self._state = self._stateful_impl.initial_agent_state()
+        self._state_initialized = True

cogames/policy/starter_agent.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""
+Sample policy for the CoGames CogsGuard environment.
+This starter policy uses simple heuristics:
+- If the agent has no gear, head toward the nearest gear station.
+- If the agent has aligner or scrambler gear, try to get hearts (and influence for aligner) then head to junctions.
+- If the agent has miner gear, head to extractors.
+- If the agent has scout gear, explore in a simple pattern.
+Note to users of this policy:
+We don't intend for scripted policies to be the final word on how policies are generated (e.g., we expect the
+environment to be complicated enough that trained agents will be necessary). So we expect that scripting policies
+is a good way to start, but don't want you to get stuck here. Feel free to prove us wrong!
+Note to cogames developers:
+This policy should be kept relatively minimalist, without dependencies on intricate algorithms.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Iterable, Optional
+from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
+from mettagrid.policy.policy_env_interface import PolicyEnvInterface
+from mettagrid.simulator import Action
+from mettagrid.simulator.interface import AgentObservation
+GEAR = ("aligner", "scrambler", "miner", "scout")
+ELEMENTS = ("carbon", "oxygen", "germanium", "silicon")
+WANDER_DIRECTIONS = ("east", "south", "west", "north")
+WANDER_STEPS = 8
+@dataclass
+class StarterCogState:
+    wander_direction_index: int = 0
+    wander_steps_remaining: int = WANDER_STEPS
+class StarterCogPolicyImpl(StatefulPolicyImpl[StarterCogState]):
+    def __init__(
+        self,
+        policy_env_info: PolicyEnvInterface,
+        agent_id: int,
+    ):
+        self._agent_id = agent_id
+        self._policy_env_info = policy_env_info
+        self._action_names = policy_env_info.action_names
+        self._action_name_set = set(self._action_names)
+        self._fallback_action_name = "noop" if "noop" in self._action_name_set else self._action_names[0]
+        self._center = (policy_env_info.obs_height // 2, policy_env_info.obs_width // 2)
+        self._tag_name_to_id = {name: idx for idx, name in enumerate(policy_env_info.tags)}
+        self._gear_station_tags = self._resolve_tag_ids([f"{gear}_station" for gear in GEAR])
+        self._extractor_tags = self._resolve_tag_ids([f"{element}_extractor" for element in ELEMENTS])
+        self._junction_tags = self._resolve_tag_ids(["junction"])
+        self._chest_tags = self._resolve_tag_ids(["chest"])
+        self._hub_tags = self._resolve_tag_ids(["hub"])
+    def _resolve_tag_ids(self, names: Iterable[str]) -> set[int]:
+        tag_ids: set[int] = set()
+        for name in names:
+            if name in self._tag_name_to_id:
+                tag_ids.add(self._tag_name_to_id[name])
+            if name.startswith("type:"):
+                continue
+            type_name = f"type:{name}"
+            if type_name in self._tag_name_to_id:
+                tag_ids.add(self._tag_name_to_id[type_name])
+        return tag_ids
+    def _inventory_items(self, obs: AgentObservation) -> set[str]:
+        items: set[str] = set()
+        for token in obs.tokens:
+            if token.location != self._center:
+                continue
+            name = token.feature.name
+            if not name.startswith("inv:"):
+                continue
+            parts = name.split(":", 2)
+            if len(parts) >= 2:
+                items.add(parts[1])
+        return items
+    def _closest_tag_location(self, obs: AgentObservation, tag_ids: set[int]) -> Optional[tuple[int, int]]:
+        if not tag_ids:
+            return None
+        best_location: Optional[tuple[int, int]] = None
+        best_distance = 999
+        for token in obs.tokens:
+            if token.feature.name != "tag":
+                continue
+            if token.value not in tag_ids:
+                continue
+            distance = abs(token.location[0] - self._center[0]) + abs(token.location[1] - self._center[1])
+            if distance < best_distance:
+                best_distance = distance
+                best_location = token.location
+        return best_location
+    def _action(self, name: str) -> Action:
+        if name in self._action_name_set:
+            return Action(name=name)
+        return Action(name=self._fallback_action_name)
+    def _wander(self, state: StarterCogState) -> tuple[Action, StarterCogState]:
+        if state.wander_steps_remaining <= 0:
+            state.wander_direction_index = (state.wander_direction_index + 1) % len(WANDER_DIRECTIONS)
+            state.wander_steps_remaining = WANDER_STEPS
+        direction = WANDER_DIRECTIONS[state.wander_direction_index]
+        state.wander_steps_remaining -= 1
+        return self._action(f"move_{direction}"), state
+    def _move_toward(self, state: StarterCogState, target: Optional[tuple[int, int]]) -> tuple[Action, StarterCogState]:
+        if target is None:
+            return self._wander(state)
+        delta_row = target[0] - self._center[0]
+        delta_col = target[1] - self._center[1]
+        if delta_row == 0 and delta_col == 0:
+            return self._action(self._fallback_action_name), state
+        if abs(delta_row) >= abs(delta_col):
+            direction = "south" if delta_row > 0 else "north"
+        else:
+            direction = "east" if delta_col > 0 else "west"
+        return self._action(f"move_{direction}"), state
+    def _current_gear(self, items: set[str]) -> Optional[str]:
+        for gear in GEAR:
+            if gear in items:
+                return gear
+        return None
+    def step_with_state(self, obs: AgentObservation, state: StarterCogState) -> tuple[Action, StarterCogState]:
+        """Compute the action for this Cog."""
+        items = self._inventory_items(obs)
+        gear = self._current_gear(items)
+        has_heart = "heart" in items
+        has_influence = "influence" in items
+        if gear is None:
+            target_tags = self._gear_station_tags
+        elif gear == "aligner":
+            if has_heart and has_influence:
+                target_tags = self._junction_tags
+            elif not has_heart:
+                target_tags = self._chest_tags
+            else:
+                target_tags = self._hub_tags
+        elif gear == "scrambler":
+            target_tags = self._junction_tags if has_heart else self._chest_tags
+        elif gear == "miner":
+            target_tags = self._extractor_tags
+        else:
+            target_tags = set()
+        target_location = self._closest_tag_location(obs, target_tags) if target_tags else None
+        return self._move_toward(state, target_location)
+    def initial_agent_state(self) -> StarterCogState:
+        """Get the initial state for a new agent."""
+        return StarterCogState()
+# ============================================================================
+# Policy Wrapper Classes
+# ============================================================================
+class StarterPolicy(MultiAgentPolicy):
+    short_names = ["starter"]
+    def __init__(self, policy_env_info: PolicyEnvInterface, device: str = "cpu"):
+        super().__init__(policy_env_info, device=device)
+        self._agent_policies: dict[int, StatefulAgentPolicy[StarterCogState]] = {}
+    def agent_policy(self, agent_id: int) -> StatefulAgentPolicy[StarterCogState]:
+        if agent_id not in self._agent_policies:
+            self._agent_policies[agent_id] = StatefulAgentPolicy(
+                StarterCogPolicyImpl(self._policy_env_info, agent_id),
+                self._policy_env_info,
+                agent_id=agent_id,
+            )
+        return self._agent_policies[agent_id]

cogames/policy/trainable_policy_template.py CHANGED Viewed

@@ -1,6 +1,9 @@
 """
 Trainable Policy Template for the CoGames environment.
+This template is compatible with CogsGuard missions. It uses only the observation and action
+spaces provided by the environment and makes no game-specific assumptions.
 This template provides a minimal trainable neural network policy that can be used with
 `cogames tutorial train`. It demonstrates the key interfaces required for training:
@@ -14,7 +17,7 @@ clarity and without the pufferlib dependency.
 To use this template:
 1. Modify MyNetwork to implement your desired architecture
-2. Run: cogames tutorial train -m training_facility.harvest -p class=my_trainable_policy.MyTrainablePolicy
+2. Run: cogames tutorial train -m cogsguard_machina_1.basic -p class=my_trainable_policy.MyTrainablePolicy
 """
 from __future__ import annotations

cogames 0.3.49__py3-none-any.whl → 0.3.64__py3-none-any.whl

cogames 0.3.49py3-none-any.whl → 0.3.64py3-none-any.whl