PyPI - cogames-agents - Versions diffs - 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl - Mend

cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

cogames_agents/policy/scripted_agent/demo_policy.py ADDED Viewed

@@ -0,0 +1,242 @@
+import random
+from mettagrid.policy.policy import MultiAgentPolicy, StatefulAgentPolicy, StatefulPolicyImpl
+from mettagrid.simulator import Action
+from mettagrid.simulator.interface import AgentObservation
+from .common.geometry import manhattan
+from .types import BaselineHyperparameters, CellType, SimpleAgentState
+from .utils import (
+    change_vibe_action,
+    is_station,
+    is_wall,
+    parse_observation,
+    read_inventory_from_obs,
+    update_agent_position,
+    use_object_at,
+)
+class DemoPolicyImpl(StatefulPolicyImpl[SimpleAgentState]):
+    def __init__(self, policy_env_info, agent_id, hyperparams, *, heart_recipe=None):
+        self._agent_id = agent_id
+        self._hyperparams = hyperparams
+        self._policy_env_info = policy_env_info
+        self._action_names = policy_env_info.action_names
+        self._move_deltas = {"north": (-1, 0), "south": (1, 0), "east": (0, 1), "west": (0, -1)}
+        self._obs_hr = policy_env_info.obs_height // 2
+        self._obs_wr = policy_env_info.obs_width // 2
+        if heart_recipe:
+            self._heart_recipe = {k: v for k, v in heart_recipe.items() if k != "energy"}
+        else:
+            self._heart_recipe = None
+    def initial_agent_state(self):
+        center = 25
+        return SimpleAgentState(
+            agent_id=self._agent_id,
+            map_height=50,
+            map_width=50,
+            occupancy=[[CellType.FREE.value] * 50 for _ in range(50)],
+            row=center,
+            col=center,
+            heart_recipe=self._heart_recipe,
+        )
+    # ------------------------------------------------------------
+    # Utility helpers (kept tiny)
+    # ------------------------------------------------------------
+    def _adjacent(self, s, pos):
+        return manhattan((s.row, s.col), pos) == 1
+    def _random_step(self, s, parsed):
+        dirs = list(self._move_deltas.keys())
+        random.shuffle(dirs)
+        blocked = {
+            (r, c)
+            for (r, c), obj in parsed.nearby_objects.items()
+            if self._adjacent(s, (r, c))
+            and (
+                is_wall(obj.name)
+                or "extractor" in obj.name
+                or is_station(obj.name, "hub")
+                or is_station(obj.name, "chest")
+                or is_station(obj.name, "junction")
+                or (obj.name == "agent" and obj.agent_group != s.agent_id)
+            )
+        }
+        for d in dirs:
+            dr, dc = self._move_deltas[d]
+            nr, nc = s.row + dr, s.col + dc
+            if (nr, nc) not in blocked:
+                return Action(name=f"move_{d}")
+        return Action(name="noop")
+    def _step_towards(self, s, target, parsed):
+        """Single-step greedy pursuit, else random."""
+        r, c = s.row, s.col
+        tr, tc = target
+        cand = []
+        if abs(tr - r) >= abs(tc - c):
+            if tr < r:
+                cand.append("north")
+            elif tr > r:
+                cand.append("south")
+            if tc < c:
+                cand.append("west")
+            elif tc > c:
+                cand.append("east")
+        else:
+            if tc < c:
+                cand.append("west")
+            elif tc > c:
+                cand.append("east")
+            if tr < r:
+                cand.append("north")
+            elif tr > r:
+                cand.append("south")
+        blocked = {
+            (rr, cc)
+            for (rr, cc), obj in parsed.nearby_objects.items()
+            if self._adjacent(s, (rr, cc))
+            and (
+                is_wall(obj.name)
+                or "extractor" in obj.name
+                or is_station(obj.name, "hub")
+                or is_station(obj.name, "chest")
+                or is_station(obj.name, "junction")
+                or (obj.name == "agent" and obj.agent_group != s.agent_id)
+            )
+        }
+        for d in cand:
+            dr, dc = self._move_deltas[d]
+            nr, nc = r + dr, c + dc
+            if (nr, nc) not in blocked:
+                return Action(name=f"move_{d}")
+        return self._random_step(s, parsed)
+    def _closest(self, s, parsed, pred):
+        items = [pos for pos, obj in parsed.nearby_objects.items() if pred(obj)]
+        return min(items, key=lambda p: manhattan((s.row, s.col), p)) if items else None
+    def _rtype(self, name):
+        name = name.lower().replace("clipped_", "")
+        if "_extractor" not in name:
+            return None
+        name = name.replace("_extractor", "")
+        return name if name in ("carbon", "oxygen", "germanium", "silicon") else None
+    # ------------------------------------------------------------
+    # Main step
+    # ------------------------------------------------------------
+    def step_with_state(self, obs: AgentObservation, s: SimpleAgentState):
+        s.step_count += 1
+        read_inventory_from_obs(s, obs, obs_hr=self._obs_hr, obs_wr=self._obs_wr)
+        update_agent_position(s, move_deltas=self._move_deltas)
+        parsed = parse_observation(
+            s,
+            obs,
+            obs_hr=self._obs_hr,
+            obs_wr=self._obs_wr,
+            spatial_feature_names={"tag", "cooldown_remaining", "clipped", "remaining_uses"},
+            agent_feature_key_by_name={"agent:group": "agent_group", "agent:frozen": "agent_frozen"},
+            protocol_input_prefix="protocol_input:",
+            protocol_output_prefix="protocol_output:",
+            tag_names=self._policy_env_info.tag_id_to_name,
+        )
+        # Learn recipe if visible
+        if s.heart_recipe is None:
+            for _pos, obj in parsed.nearby_objects.items():
+                if obj.name == "hub" and obj.protocol_outputs.get("heart", 0) > 0:
+                    s.heart_recipe = {k: v for k, v in obj.protocol_inputs.items() if k != "energy"}
+        # ---------------- PRE-PHASE: find hub to learn recipe ----------------
+        if s.heart_recipe is None:
+            if s.current_glyph != "heart_a":
+                s.current_glyph = "heart_a"
+                return change_vibe_action("heart_a", action_names=self._action_names), s
+            hub = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "hub"))
+            if hub:
+                if self._adjacent(s, hub):
+                    return use_object_at(s, hub), s
+                return self._step_towards(s, hub, parsed), s
+            return self._random_step(s, parsed), s
+        # ---------------- MAIN PHASE ----------------
+        # Deliver hearts
+        if s.hearts > 0:
+            chest = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "chest"))
+            if chest:
+                if s.current_glyph != "default":
+                    s.current_glyph = "default"
+                    return change_vibe_action("default", action_names=self._action_names), s
+                if self._adjacent(s, chest):
+                    return use_object_at(s, chest), s
+                return self._step_towards(s, chest, parsed), s
+        # Assemble
+        if (
+            s.carbon >= s.heart_recipe.get("carbon", 0)
+            and s.oxygen >= s.heart_recipe.get("oxygen", 0)
+            and s.germanium >= s.heart_recipe.get("germanium", 0)
+            and s.silicon >= s.heart_recipe.get("silicon", 0)
+        ):
+            hub = self._closest(s, parsed, lambda o: is_station(o.name.lower(), "hub"))
+            if hub:
+                if s.current_glyph != "heart_a":
+                    s.current_glyph = "heart_a"
+                    return change_vibe_action("heart_a", action_names=self._action_names), s
+                if self._adjacent(s, hub):
+                    return use_object_at(s, hub), s
+                return self._step_towards(s, hub, parsed), s
+        # Gather needed resources
+        deficits = {
+            r: s.heart_recipe.get(r, 0) - getattr(s, r, 0) for r in ("carbon", "oxygen", "germanium", "silicon")
+        }
+        needed = [
+            (pos, obj, self._rtype(obj.name.lower()))
+            for pos, obj in parsed.nearby_objects.items()
+            if "extractor" in obj.name.lower()
+        ]
+        needed = [(pos, obj, r) for pos, obj, r in needed if r and deficits[r] > 0]
+        if needed:
+            pos, obj, r = min(needed, key=lambda x: manhattan((s.row, s.col), x[0]))
+            if self._adjacent(s, pos):
+                return use_object_at(s, pos), s
+            return self._step_towards(s, pos, parsed), s
+        # Otherwise wander
+        return self._random_step(s, parsed), s
+class DemoPolicy(MultiAgentPolicy):
+    short_names = ["tiny_baseline"]
+    def __init__(self, policy_env_info, device: str = "cpu", hyperparams=None, *, heart_recipe=None):
+        super().__init__(policy_env_info, device=device)
+        self._hyperparams = hyperparams or BaselineHyperparameters()
+        self._heart_recipe = heart_recipe
+        self._agent_policies = {}
+    def agent_policy(self, agent_id):
+        if agent_id not in self._agent_policies:
+            self._agent_policies[agent_id] = StatefulAgentPolicy(
+                DemoPolicyImpl(self._policy_env_info, agent_id, self._hyperparams, heart_recipe=self._heart_recipe),
+                self._policy_env_info,
+                agent_id=agent_id,
+            )
+        return self._agent_policies[agent_id]

cogames_agents/policy/scripted_agent/pathfinding.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""
+Pathfinding utilities for scripted agents.
+This module contains A* pathfinding implementation and related utilities
+for navigating the grid world.
+"""
+from __future__ import annotations
+from collections import deque
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from cogames_agents.policy.scripted_agent.types import CellType, SimpleAgentState
+def compute_goal_cells(
+    state: SimpleAgentState, target: tuple[int, int], reach_adjacent: bool, cell_type: type[CellType]
+) -> list[tuple[int, int]]:
+    """
+    Compute the set of goal cells for pathfinding.
+    """
+    if not reach_adjacent:
+        return [target]
+    goals: list[tuple[int, int]] = []
+    for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
+        nr, nc = target[0] + dr, target[1] + dc
+        if is_traversable(state, nr, nc, cell_type):
+            goals.append((nr, nc))
+    # If no adjacent traversable tiles are known yet, allow exploring toward unknown ones
+    if not goals:
+        for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
+            nr, nc = target[0] + dr, target[1] + dc
+            if is_within_bounds(state, nr, nc) and state.occupancy[nr][nc] != cell_type.OBSTACLE.value:
+                goals.append((nr, nc))
+    return goals
+def shortest_path(
+    state: SimpleAgentState,
+    start: tuple[int, int],
+    goals: list[tuple[int, int]],
+    allow_goal_block: bool,
+    cell_type: type[CellType],
+) -> list[tuple[int, int]]:
+    """
+    Find shortest path from start to any goal using BFS.
+    """
+    goal_set = set(goals)
+    queue: deque[tuple[int, int]] = deque([start])
+    came_from: dict[tuple[int, int], tuple[int, int] | None] = {start: None}
+    def walkable(r: int, c: int) -> bool:
+        if (r, c) in goal_set and allow_goal_block:
+            return True
+        return is_traversable(state, r, c, cell_type)
+    while queue:
+        current = queue.popleft()
+        if current in goal_set:
+            return reconstruct_path(came_from, current)
+        for nr, nc in get_neighbors(state, current):
+            if (nr, nc) not in came_from and walkable(nr, nc):
+                came_from[(nr, nc)] = current
+                queue.append((nr, nc))
+    return []
+def reconstruct_path(
+    came_from: dict[tuple[int, int], tuple[int, int] | None],
+    current: tuple[int, int],
+) -> list[tuple[int, int]]:
+    """
+    Reconstruct path from BFS came_from dict.
+    """
+    path: list[tuple[int, int]] = []
+    while came_from[current] is not None:
+        path.append(current)
+        prev = came_from[current]
+        assert prev is not None  # Loop condition ensures this
+        current = prev
+    path.reverse()
+    return path
+def get_neighbors(state: SimpleAgentState, pos: tuple[int, int]) -> list[tuple[int, int]]:
+    """
+    Get valid neighboring positions (4-connected grid).
+    """
+    r, c = pos
+    candidates = [(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)]
+    return [(nr, nc) for nr, nc in candidates if is_within_bounds(state, nr, nc)]
+def is_within_bounds(state: SimpleAgentState, r: int, c: int) -> bool:
+    """
+    Check if position is within map bounds.
+    """
+    return 0 <= r < state.map_height and 0 <= c < state.map_width
+def is_passable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
+    """
+    Check if a cell is passable (not an obstacle).
+    """
+    if not is_within_bounds(state, r, c):
+        return False
+    return is_traversable(state, r, c, cell_type)
+def is_traversable(state: SimpleAgentState, r: int, c: int, cell_type: type[CellType]) -> bool:
+    """
+    Check if a cell is traversable (free and no agent there).
+    """
+    if not is_within_bounds(state, r, c):
+        return False
+    # Don't walk through other agents
+    if (r, c) in state.agent_occupancy:
+        return False
+    cell = state.occupancy[r][c]
+    # Only traverse cells we KNOW are free, not unknown cells
+    return cell == cell_type.FREE.value

cogames_agents/policy/scripted_agent/pinky/DESIGN.md ADDED Viewed

@@ -0,0 +1,317 @@
+# Pinky Policy Design
+Pinky is a scripted multi-agent policy for CogsGuard. Each agent is assigned a role and executes behavior-tree style
+decision making.
+## Current Implementation
+### Roles
+Roles are assigned at spawn via URI parameters (e.g., `pinky?miner=2&scout=1`).
+| Role          | Risk Tolerance | Gear Bonus         | Primary Action                               |
+| ------------- | -------------- | ------------------ | -------------------------------------------- |
+| **MINER**     | Conservative   | +40 cargo capacity | Harvest resources, deposit at cogs buildings |
+| **SCOUT**     | Aggressive     | +400 HP            | Explore map frontiers                        |
+| **ALIGNER**   | Moderate       | +20 influence      | Convert neutral junctions to cogs            |
+| **SCRAMBLER** | Aggressive     | +200 HP            | Neutralize enemy (clips) junctions           |
+### Modes (Current)
+Modes are set via `debug_info.mode` for debugging output. They describe what the agent is currently doing:
+**Universal modes** (all roles):
+- `retreat` - HP critical, returning to safe zone
+- `get_gear` - Moving to/using role station to acquire gear
+- `explore` - Random or directed exploration
+**Miner-specific:**
+- `mine` - Moving toward extractors
+- `deposit` - Returning cargo to cogs depot
+**Scout-specific:**
+- `explore` - Frontier-based exploration (BFS to unexplored cells)
+**Aligner-specific:**
+- `get_hearts` - Acquiring hearts from chest
+- `align` - Converting neutral junction to cogs
+**Scrambler-specific:**
+- `get_hearts` - Acquiring hearts from chest
+- `scramble` - Neutralizing enemy junction
+**Policy-level:**
+- `activate` - Changing vibe to assigned role (step 1)
+- `inactive` - Agent has non-role vibe, nooping
+### State Structure (Current)
+```python
+@dataclass
+class AgentState:
+    agent_id: int
+    role: Role                    # MINER, SCOUT, ALIGNER, SCRAMBLER
+    vibe: str                     # Current vibe from observation
+    step: int                     # Step counter
+    # Position
+    row: int
+    col: int
+    # Inventory
+    energy: int
+    hp: int
+    carbon: int, oxygen: int, germanium: int, silicon: int
+    heart: int
+    influence: int
+    # Gear flags
+    miner_gear: bool
+    scout_gear: bool
+    aligner_gear: bool
+    scrambler_gear: bool
+    # Knowledge
+    map: MapKnowledge             # Occupancy grid, structures, stations
+    nav: NavigationState          # Path cache, exploration direction
+    # Debug
+    debug_info: DebugInfo         # mode, goal, target_object, target_pos
+```
+### Decision Flow (Current)
+Each behavior follows a priority-based decision tree:
+```
+MinerBehavior.act():
+  1. HP <= 15? → retreat
+  2. No miner_gear? → get_gear (or explore for station)
+  3. Cargo full? → deposit
+  4. Otherwise → mine (move toward extractors)
+ScoutBehavior.act():
+  1. HP < 50? → retreat
+  2. No scout_gear? → get_gear
+  3. Otherwise → explore_frontier
+AlignerBehavior.act():
+  1. Should retreat? → retreat
+  2. No aligner_gear? → get_gear
+  3. No hearts? → get_hearts
+  4. Otherwise → align_junction
+ScramblerBehavior.act():
+  1. HP < 30? → retreat
+  2. No scrambler_gear? → get_gear
+  3. No hearts? → get_hearts
+  4. Otherwise → scramble_junction
+```
+### Limitations of Current Design
+1. **No explicit state machine** - Mode transitions are implicit in if/else priority chains
+2. **No goal/destination tracking** - Each step re-evaluates from scratch
+3. **No role selection** - Roles are fixed at spawn, agents can't adapt
+4. **No mood/urgency** - All decisions binary (do/don't)
+5. **Debug-only modes** - Modes exist for logging, not for control flow
+---
+## Proposed Design
+### Roles (Expanded)
+| Role            | Description                              |
+| --------------- | ---------------------------------------- |
+| `resting`       | Inactive, waiting for assignment         |
+| `choosing_role` | Evaluating team composition to pick role |
+| `miner`         | Resource gathering specialist            |
+| `scout`         | Map exploration specialist               |
+| `aligner`       | Territory expansion specialist           |
+| `scrambler`     | Enemy territory disruption specialist    |
+### Modes (Explicit State Machine)
+Modes should be **first-class state** that drives behavior, not just debug labels.
+**Universal Modes** (available to all roles):
+| Mode       | Description               | Exit Condition              |
+| ---------- | ------------------------- | --------------------------- |
+| `idle`     | No current task           | Goal assigned               |
+| `get_gear` | Acquiring role equipment  | Gear obtained               |
+| `retreat`  | Returning to safety       | HP restored above threshold |
+| `explore`  | Searching for something   | Target found                |
+| `move_to`  | Navigating to destination | Arrived at destination      |
+**Miner Modes:**
+| Mode      | Description          | Exit Condition                   |
+| --------- | -------------------- | -------------------------------- |
+| `harvest` | Extracting resources | Cargo full or extractor depleted |
+| `deposit` | Delivering cargo     | Cargo empty                      |
+**Scout Modes:**
+| Mode               | Description             | Exit Condition     |
+| ------------------ | ----------------------- | ------------------ |
+| `frontier_explore` | BFS to unexplored areas | Map fully explored |
+| `report`           | Returning with intel    | At cogs building   |
+**Aligner Modes:**
+| Mode             | Description                 | Exit Condition     |
+| ---------------- | --------------------------- | ------------------ |
+| `acquire_hearts` | Getting hearts from chest   | Have hearts        |
+| `align_junction` | Converting neutral junction | Junction converted |
+**Scrambler Modes:**
+| Mode             | Description                 | Exit Condition       |
+| ---------------- | --------------------------- | -------------------- |
+| `acquire_hearts` | Getting hearts from chest   | Have hearts          |
+| `raid_junction`  | Neutralizing enemy junction | Junction neutralized |
+### Goals and Destinations
+Explicit goal tracking separates **intent** from **execution**:
+```python
+@dataclass
+class AgentGoal:
+    """What the agent is trying to achieve."""
+    # High-level intent
+    goal: str                     # "get_gear", "harvest_carbon", "deposit_cargo"
+    # Target
+    destination: Optional[str]    # "miner_station", "carbon_extractor", "hub"
+    destination_pos: Optional[tuple[int, int]]
+    # Progress
+    started_at_step: int
+    timeout_steps: int = 100      # Give up and re-evaluate
+    # Completion
+    success_condition: str        # "has_miner_gear", "cargo_full", "cargo_empty"
+```
+### Mood / Urgency
+Mood modifies behavior parameters:
+| Mood        | Trigger                 | Effect                                   |
+| ----------- | ----------------------- | ---------------------------------------- |
+| `calm`      | HP > 80%, safe zone     | Normal risk tolerance                    |
+| `cautious`  | HP 50-80% or near enemy | Reduced exploration range                |
+| `urgent`    | HP 20-50%               | Prioritize retreat paths                 |
+| `desperate` | HP < 20%                | Shortest path to safety, ignore all else |
+### Proposed State Structure
+```python
+@dataclass
+class AgentState:
+    agent_id: int
+    # Identity
+    role: Role                    # resting, choosing_role, miner, scout, aligner, scrambler
+    # Behavioral state machine
+    mode: Mode                    # Current mode (idle, get_gear, retreat, harvest, etc.)
+    mood: Mood                    # calm, cautious, urgent, desperate
+    # Current goal
+    goal: Optional[AgentGoal]     # What we're trying to achieve
+    # ... rest of inventory, map, nav state ...
+```
+### State Transition Diagram
+```
+                    ┌─────────────────────────────────────────┐
+                    │                                         │
+                    ▼                                         │
+              ┌──────────┐                                    │
+     spawn───►│  resting │                                    │
+              └────┬─────┘                                    │
+                   │ team needs role                          │
+                   ▼                                          │
+            ┌──────────────┐                                  │
+            │choosing_role │                                  │
+            └──────┬───────┘                                  │
+                   │ role selected                            │
+        ┌──────────┼──────────┬──────────┐                    │
+        ▼          ▼          ▼          ▼                    │
+    ┌───────┐ ┌───────┐ ┌─────────┐ ┌───────────┐            │
+    │ miner │ │ scout │ │ aligner │ │ scrambler │            │
+    └───┬───┘ └───┬───┘ └────┬────┘ └─────┬─────┘            │
+        │         │          │            │                   │
+        └─────────┴──────────┴────────────┘                   │
+                          │                                   │
+                          │ HP critical or role no longer needed
+                          │                                   │
+                          └───────────────────────────────────┘
+```
+### Role-Specific Mode Transitions
+**Miner:**
+```
+idle ──► get_gear ──► explore ──► harvest ──► deposit ──► harvest
+           │              │           │           │
+           └──────────────┴───────────┴───────────┴──► retreat ──► idle
+                                (HP critical)
+```
+**Aligner:**
+```
+idle ──► get_gear ──► acquire_hearts ──► align_junction ──► acquire_hearts
+           │                │                  │
+           └────────────────┴──────────────────┴──► retreat ──► idle
+```
+### Transition Triggers
+| From               | To                 | Trigger                   |
+| ------------------ | ------------------ | ------------------------- |
+| `idle`             | `get_gear`         | Role assigned, no gear    |
+| `get_gear`         | role mode          | Gear acquired             |
+| any                | `retreat`          | HP < threshold for mood   |
+| `retreat`          | `idle`             | HP restored, in safe zone |
+| `harvest`          | `deposit`          | Cargo full                |
+| `deposit`          | `harvest`          | Cargo empty               |
+| `acquire_hearts`   | `align`/`scramble` | Have hearts               |
+| `align`/`scramble` | `acquire_hearts`   | Hearts depleted           |
+| any                | `idle`             | Goal timeout reached      |
+---
+## Migration Path
+1. **Add `Mode` enum** with all modes (keep current logic)
+2. **Add `goal` field** to AgentState
+3. **Refactor behaviors** to set mode/goal explicitly
+4. **Add mood system** for risk tolerance modulation
+5. **Add role selection** for `choosing_role` state
+6. **Add `resting`** state for unassigned agents
+---
+## Debug Output Format
+Current: `role:mode:goal:target:action`
+Proposed: `role:mode:mood:goal→dest:action`
+Example: `miner:harvest:calm:get_carbon→carbon_extractor(5,12):move_east`

cogames_agents/policy/scripted_agent/pinky/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Pinky policy for CogsGuard game."""
+from .policy import PinkyPolicy
+__all__ = ["PinkyPolicy"]