PyPI - cogames-agents - Versions diffs - 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl - Mend

cogames-agents 0.0.0.7__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

cogames_agents/policy/scripted_agent/cogas/navigator.py ADDED Viewed

@@ -0,0 +1,401 @@
+"""A* navigator for Cogas policy."""
+from __future__ import annotations
+import heapq
+import random
+from typing import TYPE_CHECKING, Optional
+from mettagrid.simulator import Action
+if TYPE_CHECKING:
+    from .entity_map import EntityMap
+MOVE_DELTAS: dict[str, tuple[int, int]] = {
+    "north": (-1, 0),
+    "south": (1, 0),
+    "east": (0, 1),
+    "west": (0, -1),
+}
+DIRECTIONS = ["north", "south", "east", "west"]
+class Navigator:
+    """A* pathfinding over the entity map."""
+    def __init__(self) -> None:
+        self._cached_path: Optional[list[tuple[int, int]]] = None
+        self._cached_target: Optional[tuple[int, int]] = None
+        self._cached_reach_adjacent: bool = False
+        self._position_history: list[tuple[int, int]] = []
+    def get_action(
+        self,
+        current: tuple[int, int],
+        target: tuple[int, int],
+        map: EntityMap,
+        reach_adjacent: bool = False,
+    ) -> Action:
+        """Navigate from current to target using A*.
+        Args:
+            current: Current position
+            target: Target position
+            map: Entity map for pathfinding
+            reach_adjacent: If True, stop adjacent to target
+        """
+        # Track position history for stuck detection
+        self._position_history.append(current)
+        if len(self._position_history) > 30:
+            self._position_history.pop(0)
+        # Stuck detection
+        if self._is_stuck():
+            action = self._break_stuck(current, map)
+            if action:
+                return action
+        if current == target and not reach_adjacent:
+            # Already at target - bump in a random direction to stay active
+            return self._random_move(current, map)
+        # Check if adjacent to target (for reach_adjacent mode)
+        if reach_adjacent and _manhattan(current, target) == 1:
+            # Already adjacent - bump toward target instead of nooping
+            return _move_action(current, target)
+        # Get or compute path
+        path = self._get_path(current, target, map, reach_adjacent)
+        if not path:
+            # No path found — try exploring toward target
+            return self._move_toward_greedy(current, target, map)
+        next_pos = path[0]
+        # Check if next position is blocked by agent
+        if map.has_agent(next_pos):
+            sidestep = self._find_sidestep(current, next_pos, target, map)
+            if sidestep:
+                self._cached_path = None
+                return _move_action(current, sidestep)
+            # Don't wait (noop) - try random move to break congestion
+            self._cached_path = None
+            return self._random_move(current, map)
+        # Advance path
+        self._cached_path = path[1:] if len(path) > 1 else None
+        return _move_action(current, next_pos)
+    def explore(
+        self,
+        current: tuple[int, int],
+        map: EntityMap,
+        direction_bias: Optional[str] = None,
+    ) -> Action:
+        """Navigate toward unexplored frontier cells."""
+        self._position_history.append(current)
+        if len(self._position_history) > 30:
+            self._position_history.pop(0)
+        if self._is_stuck():
+            action = self._break_stuck(current, map)
+            if action:
+                return action
+        frontier = self._find_frontier(current, map, direction_bias)
+        if frontier:
+            return self.get_action(current, frontier, map)
+        # No frontier — random walk
+        return self._random_move(current, map)
+    def _get_path(
+        self,
+        start: tuple[int, int],
+        target: tuple[int, int],
+        map: EntityMap,
+        reach_adjacent: bool,
+    ) -> Optional[list[tuple[int, int]]]:
+        """Get cached path or compute new one."""
+        if self._cached_path and self._cached_target == target and self._cached_reach_adjacent == reach_adjacent:
+            # Verify path is still valid
+            for pos in self._cached_path:
+                if map.has_agent(pos):
+                    break
+            else:
+                return self._cached_path
+        # Compute new path
+        goal_cells = self._compute_goals(target, map, reach_adjacent)
+        if not goal_cells:
+            return None
+        # Try known terrain first
+        path = self._astar(start, goal_cells, map, allow_unknown=False)
+        if not path:
+            # Allow unknown cells
+            path = self._astar(start, goal_cells, map, allow_unknown=True)
+        self._cached_path = path.copy() if path else None
+        self._cached_target = target
+        self._cached_reach_adjacent = reach_adjacent
+        return path
+    def _compute_goals(
+        self,
+        target: tuple[int, int],
+        map: EntityMap,
+        reach_adjacent: bool,
+    ) -> list[tuple[int, int]]:
+        if not reach_adjacent:
+            return [target]
+        goals = []
+        for dr, dc in MOVE_DELTAS.values():
+            nr, nc = target[0] + dr, target[1] + dc
+            pos = (nr, nc)
+            if self._is_traversable(pos, map, allow_unknown=True):
+                goals.append(pos)
+        return goals
+    def _astar(
+        self,
+        start: tuple[int, int],
+        goals: list[tuple[int, int]],
+        map: EntityMap,
+        allow_unknown: bool,
+    ) -> list[tuple[int, int]]:
+        """A* pathfinding with iteration limit to prevent hanging."""
+        goal_set = set(goals)
+        if not goals:
+            return []
+        def h(pos: tuple[int, int]) -> int:
+            return min(_manhattan(pos, g) for g in goals)
+        tie = 0
+        iterations = 0
+        max_iterations = 5000  # Prevent infinite search on large unknown maps
+        open_set: list[tuple[int, int, tuple[int, int]]] = [(h(start), tie, start)]
+        came_from: dict[tuple[int, int], Optional[tuple[int, int]]] = {start: None}
+        g_score: dict[tuple[int, int], int] = {start: 0}
+        while open_set and iterations < max_iterations:
+            iterations += 1
+            _, _, current = heapq.heappop(open_set)
+            if current in goal_set:
+                return self._reconstruct(came_from, current)
+            current_g = g_score.get(current, float("inf"))
+            if isinstance(current_g, float):
+                continue
+            for dr, dc in MOVE_DELTAS.values():
+                neighbor = (current[0] + dr, current[1] + dc)
+                is_goal = neighbor in goal_set
+                if not is_goal and not self._is_traversable(neighbor, map, allow_unknown):
+                    continue
+                tentative_g = current_g + 1
+                if tentative_g < g_score.get(neighbor, float("inf")):
+                    came_from[neighbor] = current
+                    g_score[neighbor] = tentative_g
+                    f = tentative_g + h(neighbor)
+                    tie += 1
+                    heapq.heappush(open_set, (f, tie, neighbor))
+        return []
+    def _reconstruct(
+        self,
+        came_from: dict[tuple[int, int], Optional[tuple[int, int]]],
+        current: tuple[int, int],
+    ) -> list[tuple[int, int]]:
+        path = []
+        while came_from[current] is not None:
+            path.append(current)
+            prev = came_from[current]
+            assert prev is not None
+            current = prev
+        path.reverse()
+        return path
+    def _is_traversable(
+        self,
+        pos: tuple[int, int],
+        map: EntityMap,
+        allow_unknown: bool = False,
+    ) -> bool:
+        """Check if a cell can be walked through."""
+        if map.is_wall(pos) or map.is_structure(pos):
+            return False
+        if map.has_agent(pos):
+            return False
+        if pos in map.explored:
+            return pos not in map.entities or map.entities[pos].type == "agent"
+        # Unknown cell
+        return allow_unknown
+    def _find_frontier(
+        self,
+        from_pos: tuple[int, int],
+        map: EntityMap,
+        direction_bias: Optional[str] = None,
+    ) -> Optional[tuple[int, int]]:
+        """BFS to find nearest unexplored cell adjacent to explored free cell."""
+        from collections import deque
+        if direction_bias == "north":
+            deltas = [(-1, 0), (0, -1), (0, 1), (1, 0)]
+        elif direction_bias == "south":
+            deltas = [(1, 0), (0, -1), (0, 1), (-1, 0)]
+        elif direction_bias == "east":
+            deltas = [(0, 1), (-1, 0), (1, 0), (0, -1)]
+        elif direction_bias == "west":
+            deltas = [(0, -1), (-1, 0), (1, 0), (0, 1)]
+        else:
+            deltas = [(-1, 0), (1, 0), (0, -1), (0, 1)]
+        visited: set[tuple[int, int]] = {from_pos}
+        queue: deque[tuple[int, int, int]] = deque([(from_pos[0], from_pos[1], 0)])
+        while queue:
+            r, c, dist = queue.popleft()
+            if dist > 50:
+                continue
+            for dr, dc in deltas:
+                nr, nc = r + dr, c + dc
+                pos = (nr, nc)
+                if pos in visited:
+                    continue
+                visited.add(pos)
+                if pos not in map.explored:
+                    # Check if any neighbor is explored and free
+                    for dr2, dc2 in deltas:
+                        adj = (nr + dr2, nc + dc2)
+                        if adj in map.explored and map.is_free(adj):
+                            return pos
+                    continue
+                if map.is_free(pos):
+                    queue.append((nr, nc, dist + 1))
+        return None
+    def _find_sidestep(
+        self,
+        current: tuple[int, int],
+        blocked: tuple[int, int],
+        target: tuple[int, int],
+        map: EntityMap,
+    ) -> Optional[tuple[int, int]]:
+        """Find sidestep around blocking agent."""
+        current_dist = _manhattan(current, target)
+        candidates = []
+        for d in DIRECTIONS:
+            dr, dc = MOVE_DELTAS[d]
+            pos = (current[0] + dr, current[1] + dc)
+            if pos == blocked:
+                continue
+            if not self._is_traversable(pos, map, allow_unknown=True):
+                continue
+            new_dist = _manhattan(pos, target)
+            score = new_dist - current_dist
+            candidates.append((score, pos))
+        if not candidates:
+            return None
+        candidates.sort()
+        if candidates[0][0] <= 2:
+            return candidates[0][1]
+        return None
+    def _is_stuck(self) -> bool:
+        history = self._position_history
+        if len(history) < 6:
+            return False
+        recent = history[-6:]
+        if len(set(recent)) <= 2:
+            return True
+        if len(history) >= 20:
+            current = history[-1]
+            earlier = history[:-10]
+            if earlier.count(current) >= 2:
+                return True
+        return False
+    def _break_stuck(self, current: tuple[int, int], map: EntityMap) -> Optional[Action]:
+        self._cached_path = None
+        self._cached_target = None
+        self._position_history.clear()
+        return self._random_move(current, map)
+    def _random_move(self, current: tuple[int, int], map: EntityMap) -> Action:
+        dirs = list(DIRECTIONS)
+        random.shuffle(dirs)
+        # Try explored free cells first (excluding agent positions)
+        for d in dirs:
+            dr, dc = MOVE_DELTAS[d]
+            pos = (current[0] + dr, current[1] + dc)
+            if pos in map.explored and not map.is_wall(pos) and not map.is_structure(pos) and not map.has_agent(pos):
+                return Action(name=f"move_{d}")
+        # Try explored cells even with agents (will fail but better than noop)
+        for d in dirs:
+            dr, dc = MOVE_DELTAS[d]
+            pos = (current[0] + dr, current[1] + dc)
+            if pos in map.explored and not map.is_wall(pos) and not map.is_structure(pos):
+                return Action(name=f"move_{d}")
+        # Try unknown cells
+        for d in dirs:
+            dr, dc = MOVE_DELTAS[d]
+            pos = (current[0] + dr, current[1] + dc)
+            if not map.is_wall(pos):
+                return Action(name=f"move_{d}")
+        # Absolute last resort: try any direction (will likely fail but attempt something)
+        return Action(name=f"move_{dirs[0]}")
+    def _move_toward_greedy(self, current: tuple[int, int], target: tuple[int, int], map: EntityMap) -> Action:
+        """Move greedily toward target without pathfinding."""
+        dr = target[0] - current[0]
+        dc = target[1] - current[1]
+        # Try primary direction
+        if abs(dr) >= abs(dc):
+            primary = "south" if dr > 0 else "north"
+            secondary = "east" if dc > 0 else "west"
+        else:
+            primary = "east" if dc > 0 else "west"
+            secondary = "south" if dr > 0 else "north"
+        for d in [primary, secondary]:
+            ddr, ddc = MOVE_DELTAS[d]
+            pos = (current[0] + ddr, current[1] + ddc)
+            if not map.is_wall(pos) and not map.is_structure(pos) and not map.has_agent(pos):
+                return Action(name=f"move_{d}")
+        return self._random_move(current, map)
+def _manhattan(a: tuple[int, int], b: tuple[int, int]) -> int:
+    return abs(a[0] - b[0]) + abs(a[1] - b[1])
+def _move_action(current: tuple[int, int], target: tuple[int, int]) -> Action:
+    """Return move action from current to adjacent target."""
+    dr = target[0] - current[0]
+    dc = target[1] - current[1]
+    if dr == -1 and dc == 0:
+        return Action(name="move_north")
+    if dr == 1 and dc == 0:
+        return Action(name="move_south")
+    if dr == 0 and dc == 1:
+        return Action(name="move_east")
+    if dr == 0 and dc == -1:
+        return Action(name="move_west")
+    # Already at target - pick a random direction instead of nooping
+    return Action(name=f"move_{random.choice(['north', 'south', 'east', 'west'])}")

cogames_agents/policy/scripted_agent/cogas/obs_parser.py ADDED Viewed

@@ -0,0 +1,238 @@
+"""Observation parser for Cogas policy.
+Converts raw observation tokens into StateSnapshot and visible entities.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from .context import StateSnapshot
+from .entity_map import Entity
+if TYPE_CHECKING:
+    from mettagrid.policy.policy_env_interface import PolicyEnvInterface
+    from mettagrid.simulator.interface import AgentObservation
+class ObsParser:
+    """Parses observation tokens into state snapshot and visible entities."""
+    def __init__(self, policy_env_info: PolicyEnvInterface) -> None:
+        self._obs_hr = policy_env_info.obs_height // 2
+        self._obs_wr = policy_env_info.obs_width // 2
+        self._tag_names = policy_env_info.tag_id_to_name
+        # Derive vibe names from action names
+        self._vibe_names: list[str] = []
+        for action_name in policy_env_info.action_names:
+            if action_name.startswith("change_vibe_"):
+                self._vibe_names.append(action_name[len("change_vibe_") :])
+        # Collective name mapping
+        self._collective_names = ["clips", "cogs"]  # Alphabetical
+        self._cogs_collective_id = 1  # "cogs" is index 1 alphabetically
+        self._clips_collective_id = 0  # "clips" is index 0
+    def parse(
+        self,
+        obs: AgentObservation,
+        step: int,
+        spawn_pos: tuple[int, int],
+    ) -> tuple[StateSnapshot, dict[tuple[int, int], Entity]]:
+        """Parse observation into state snapshot and visible entities.
+        Args:
+            obs: Raw observation
+            step: Current tick
+            spawn_pos: Agent's spawn position for offset calculation
+        Returns:
+            (state_snapshot, visible_entities_dict)
+        """
+        state = StateSnapshot()
+        # Read center cell for inventory/vibe and local position
+        inv: dict[str, int] = {}
+        vibe_id = 0
+        # Local position tokens: lp:east/west for col offset, lp:north/south for row offset
+        lp_col_offset = 0  # east is positive, west is negative
+        lp_row_offset = 0  # south is positive, north is negative
+        has_position = False
+        center_r, center_c = self._obs_hr, self._obs_wr
+        for tok in obs.tokens:
+            if tok.row() == center_r and tok.col() == center_c:
+                feature_name = tok.feature.name
+                if feature_name.startswith("inv:"):
+                    resource_name = feature_name[4:]
+                    # Handle multi-token encoding
+                    if ":p" in resource_name:
+                        base_name, power_str = resource_name.rsplit(":p", 1)
+                        power = int(power_str)
+                        current = inv.get(base_name, 0)
+                        inv[base_name] = current + tok.value * (256**power)
+                    else:
+                        current = inv.get(resource_name, 0)
+                        inv[resource_name] = current + tok.value
+                elif feature_name == "vibe":
+                    vibe_id = tok.value
+                # Local position tokens from local_position observation feature
+                elif feature_name == "lp:east":
+                    lp_col_offset = tok.value
+                    has_position = True
+                elif feature_name == "lp:west":
+                    lp_col_offset = -tok.value
+                    has_position = True
+                elif feature_name == "lp:south":
+                    lp_row_offset = tok.value
+                    has_position = True
+                elif feature_name == "lp:north":
+                    lp_row_offset = -tok.value
+                    has_position = True
+        # Build state - lp: tokens give offset from spawn
+        if has_position:
+            state.position = (spawn_pos[0] + lp_row_offset, spawn_pos[1] + lp_col_offset)
+        else:
+            state.position = spawn_pos
+        state.hp = inv.get("hp", 100)
+        state.energy = inv.get("energy", 100)
+        state.carbon = inv.get("carbon", 0)
+        state.oxygen = inv.get("oxygen", 0)
+        state.germanium = inv.get("germanium", 0)
+        state.silicon = inv.get("silicon", 0)
+        state.heart = inv.get("heart", 0)
+        state.influence = inv.get("influence", 0)
+        state.miner_gear = inv.get("miner", 0) > 0
+        state.scout_gear = inv.get("scout", 0) > 0
+        state.aligner_gear = inv.get("aligner", 0) > 0
+        state.scrambler_gear = inv.get("scrambler", 0) > 0
+        state.vibe = self._get_vibe_name(vibe_id)
+        # Read collective inventory from the inv dict.
+        # Collective tokens appear as "inv:collective:<resource>" features on the center cell,
+        # parsed above into keys like "collective:carbon", "collective:oxygen", etc.
+        state.collective_carbon = inv.get("collective:carbon", 0)
+        state.collective_oxygen = inv.get("collective:oxygen", 0)
+        state.collective_germanium = inv.get("collective:germanium", 0)
+        state.collective_silicon = inv.get("collective:silicon", 0)
+        state.collective_heart = inv.get("collective:heart", 0)
+        state.collective_influence = inv.get("collective:influence", 0)
+        # Parse visible entities
+        visible_entities: dict[tuple[int, int], Entity] = {}
+        position_features: dict[tuple[int, int], dict] = {}
+        for tok in obs.tokens:
+            obs_r, obs_c = tok.row(), tok.col()
+            # Skip center cell
+            if obs_r == center_r and obs_c == center_c:
+                continue
+            world_r = obs_r - self._obs_hr + state.position[0]
+            world_c = obs_c - self._obs_wr + state.position[1]
+            world_pos = (world_r, world_c)
+            if world_pos not in position_features:
+                position_features[world_pos] = {"tags": [], "props": {}}
+            feature_name = tok.feature.name
+            if feature_name == "tag":
+                position_features[world_pos]["tags"].append(tok.value)
+            elif feature_name in ("cooldown_remaining", "clipped", "remaining_uses", "collective"):
+                position_features[world_pos]["props"][feature_name] = tok.value
+            elif feature_name.startswith("inv:"):
+                inv_dict = position_features[world_pos].setdefault("inventory", {})
+                suffix = feature_name[4:]
+                if ":p" in suffix:
+                    base_name, power_str = suffix.rsplit(":p", 1)
+                    power = int(power_str)
+                    current = inv_dict.get(base_name, 0)
+                    inv_dict[base_name] = current + tok.value * (256**power)
+                else:
+                    current = inv_dict.get(suffix, 0)
+                    inv_dict[suffix] = current + tok.value
+        # Convert to entities
+        for world_pos, features in position_features.items():
+            tags = features.get("tags", [])
+            if not tags:
+                continue
+            obj_name = self._resolve_object_name(tags)
+            if obj_name == "unknown":
+                continue
+            props = dict(features.get("props", {}))
+            inv_data = features.get("inventory")
+            # Alignment from collective ID
+            collective_id = props.pop("collective", None)
+            if collective_id is not None:
+                props["collective_id"] = collective_id
+            alignment = self._derive_alignment(obj_name, props.get("clipped", 0), collective_id)
+            if alignment:
+                props["alignment"] = alignment
+            # Remaining uses
+            if "remaining_uses" not in props:
+                props["remaining_uses"] = 999
+            # Inventory amount for extractors
+            if inv_data:
+                props["inventory_amount"] = sum(inv_data.values())
+                props["has_inventory"] = True
+            else:
+                props.setdefault("inventory_amount", -1)
+            visible_entities[world_pos] = Entity(
+                type=obj_name,
+                properties=props,
+                last_seen=step,
+            )
+        return state, visible_entities
+    def _resolve_object_name(self, tag_ids: list[int]) -> str:
+        """Resolve tag IDs to an object name."""
+        resolved = [self._tag_names.get(tid, "") for tid in tag_ids]
+        # Priority: type:* tags
+        for tag in resolved:
+            if tag.startswith("type:"):
+                return tag[5:]
+        # Non-collective tags
+        for tag in resolved:
+            if tag and not tag.startswith("collective:"):
+                return tag
+        return "unknown"
+    def _get_vibe_name(self, vibe_id: int) -> str:
+        if 0 <= vibe_id < len(self._vibe_names):
+            return self._vibe_names[vibe_id]
+        return "default"
+    def _derive_alignment(self, obj_name: str, clipped: int, collective_id: int | None) -> str | None:
+        if collective_id is not None:
+            if collective_id == self._cogs_collective_id:
+                return "cogs"
+            elif collective_id == self._clips_collective_id:
+                return "clips"
+        if "cogs" in obj_name:
+            return "cogs"
+        if "clips" in obj_name or clipped > 0:
+            return "clips"
+        return None
+    @property
+    def obs_half_height(self) -> int:
+        return self._obs_hr
+    @property
+    def obs_half_width(self) -> int:
+        return self._obs_wr