PyPI - multi-agent-rlenv - Versions diffs - 3.3.7__py3-none-any.whl → 3.5.0__py3-none-any.whl - Mend

multi-agent-rlenv 3.3.7py3-none-any.whl → 3.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

marlenv/__init__.py +11 -13
marlenv/adapters/gym_adapter.py +6 -16
marlenv/adapters/overcooked_adapter.py +43 -23
marlenv/adapters/pettingzoo_adapter.py +5 -5
marlenv/adapters/pymarl_adapter.py +3 -4
marlenv/adapters/smac_adapter.py +6 -6
marlenv/env_builder.py +8 -9
marlenv/env_pool.py +5 -7
marlenv/mock_env.py +7 -7
marlenv/models/__init__.py +2 -4
marlenv/models/env.py +23 -12
marlenv/models/episode.py +17 -20
marlenv/models/spaces.py +90 -83
marlenv/models/transition.py +6 -10
marlenv/utils/__init__.py +10 -0
marlenv/utils/schedule.py +281 -0
marlenv/wrappers/agent_id_wrapper.py +4 -5
marlenv/wrappers/available_actions_mask.py +6 -7
marlenv/wrappers/available_actions_wrapper.py +7 -9
marlenv/wrappers/blind_wrapper.py +5 -7
marlenv/wrappers/centralised.py +12 -14
marlenv/wrappers/delayed_rewards.py +13 -11
marlenv/wrappers/last_action_wrapper.py +10 -14
marlenv/wrappers/paddings.py +6 -8
marlenv/wrappers/penalty_wrapper.py +5 -8
marlenv/wrappers/rlenv_wrapper.py +12 -9
marlenv/wrappers/time_limit.py +3 -3
marlenv/wrappers/video_recorder.py +4 -6
{multi_agent_rlenv-3.3.7.dist-info → multi_agent_rlenv-3.5.0.dist-info}/METADATA +1 -1
multi_agent_rlenv-3.5.0.dist-info/RECORD +39 -0
multi_agent_rlenv-3.3.7.dist-info/RECORD +0 -37
{multi_agent_rlenv-3.3.7.dist-info → multi_agent_rlenv-3.5.0.dist-info}/WHEEL +0 -0
{multi_agent_rlenv-3.3.7.dist-info → multi_agent_rlenv-3.5.0.dist-info}/licenses/LICENSE +0 -0

marlenv/models/episode.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from functools import cached_property
-from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, overload
+from typing import Any, Callable, Optional, Sequence, overload
 import numpy as np
 import numpy.typing as npt
@@ -14,11 +14,8 @@ from .env import MARLEnv
 from marlenv.exceptions import EnvironmentMismatchException, ReplayMismatchException
-A = TypeVar("A")
 @dataclass
-class Episode(Generic[A]):
+class Episode:
     """Episode model made of observations, actions, rewards, ..."""
     all_observations: list[npt.NDArray[np.float32]]
@@ -55,7 +52,7 @@ class Episode(Generic[A]):
         )
     @staticmethod
-    def from_transitions(transitions: Sequence[Transition[A]]) -> "Episode":
+    def from_transitions(transitions: Sequence[Transition]) -> "Episode":
         """Create an episode from a list of transitions"""
         episode = Episode.new(transitions[0].obs, transitions[0].state)
         for transition in transitions:
@@ -179,9 +176,9 @@ class Episode(Generic[A]):
     @cached_property
     def dones(self):
         """The done flags for each transition"""
-        dones = np.zeros_like(self.rewards, dtype=np.float32)
+        dones = np.zeros_like(self.rewards, dtype=np.bool)
         if self.is_done:
-            dones[self.episode_len - 1 :] = 1.0
+            dones[self.episode_len - 1 :] = True
         return dones
     @property
@@ -214,11 +211,11 @@ class Episode(Generic[A]):
     def replay(
         self,
-        env: MARLEnv[A, Any],
+        env: MARLEnv,
         seed: Optional[int] = None,
         *,
-        after_reset: Optional[Callable[[Observation, State, MARLEnv[A]], None]] = None,
-        after_step: Optional[Callable[[int, Step, MARLEnv[A]], None]] = None,
+        after_reset: Optional[Callable[[Observation, State, MARLEnv], None]] = None,
+        after_step: Optional[Callable[[int, Step, MARLEnv], None]] = None,
     ):
         """
         Replay the episode in the environment (i.e. perform the actions) and assert that the outcomes match.
@@ -243,12 +240,12 @@ class Episode(Generic[A]):
                 raise ReplayMismatchException("observation", step.obs.data, self.next_obs[i], time_step=i)
             if not np.array_equal(step.state.data, self.next_states[i]):
                 raise ReplayMismatchException("state", step.state.data, self.next_states[i], time_step=i)
-            if not np.array_equal(step.reward, self.rewards[i]):
+            if not np.isclose(step.reward, self.rewards[i]):
                 raise ReplayMismatchException("reward", step.reward, self.rewards[i], time_step=i)
             if after_step is not None:
                 after_step(i, step, env)
-    def get_images(self, env: MARLEnv[A, Any], seed: Optional[int] = None) -> list[np.ndarray]:
+    def get_images(self, env: MARLEnv, seed: Optional[int] = None) -> list[np.ndarray]:
         images = []
         def collect_image(*_, **__):
@@ -257,7 +254,7 @@ class Episode(Generic[A]):
         self.replay(env, seed, after_reset=collect_image, after_step=collect_image)
         return images
-    def render(self, env: MARLEnv[A, Any], seed: Optional[int] = None, fps: int = 5):
+    def render(self, env: MARLEnv, seed: Optional[int] = None, fps: int = 5):
         def render_callback(*_, **__):
             env.render()
             cv2.waitKey(1000 // fps)
@@ -288,10 +285,10 @@ class Episode(Generic[A]):
         return returns
     @overload
-    def add(self, transition: Transition[A], /): ...
+    def add(self, transition: Transition, /): ...
     @overload
-    def add(self, step: Step, action: A, /): ...
+    def add(self, step: Step, action: np.ndarray, /): ...
     def add(self, *data):
         match data:
@@ -322,10 +319,10 @@ class Episode(Generic[A]):
     def add_data(
         self,
-        next_obs,
-        next_state,
-        action: A,
-        reward: np.ndarray,
+        next_obs: Observation,
+        next_state: State,
+        action: np.ndarray,
+        reward: npt.NDArray[np.float32],
         others: dict[str, Any],
         done: bool,
         truncated: bool,

marlenv/models/spaces.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import math
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Generic, Optional, TypeVar
+from typing import Optional
 import numpy as np
 import numpy.typing as npt
-S = TypeVar("S", bound="Space")
 @dataclass
 class Space(ABC):
@@ -23,7 +21,7 @@ class Space(ABC):
         self.labels = labels
     @abstractmethod
-    def sample(self, mask: Optional[npt.NDArray[np.bool_]] = None) -> Any:
+    def sample(self, mask: Optional[npt.NDArray[np.bool_]] = None) -> npt.NDArray[np.float32]:
         """Sample a value from the space."""
     def __eq__(self, value: object) -> bool:
@@ -34,6 +32,16 @@ class Space(ABC):
     def __ne__(self, value: object) -> bool:
         return not self.__eq__(value)
+    @property
+    @abstractmethod
+    def is_discrete(self) -> bool:
+        """Whether the space is discrete."""
+    @property
+    def is_continuous(self) -> bool:
+        """Whether the space is continuous."""
+        return not self.is_discrete
 @dataclass
 class DiscreteSpace(Space):
@@ -45,8 +53,8 @@ class DiscreteSpace(Space):
         self.size = size
         self.space = np.arange(size)
-    def sample(self, mask: Optional[npt.NDArray[np.bool_]] = None) -> int:
-        space = self.space
+    def sample(self, mask: Optional[npt.NDArray[np.bool]] = None):
+        space = self.space.copy()
         if mask is not None:
             space = space[mask]
         return int(np.random.choice(space))
@@ -58,6 +66,25 @@ class DiscreteSpace(Space):
             return False
         return super().__eq__(value)
+    @property
+    def is_discrete(self) -> bool:
+        return True
+    @staticmethod
+    def action(size, labels: Optional[list[str]] = None):
+        """
+        Create a discrete action space where the default labels are set to "Action-n".
+        """
+        if labels is None:
+            labels = [f"Action {i}" for i in range(size)]
+        return DiscreteSpace(size, labels)
+    def repeat(self, n: int):
+        """
+        Repeat the discrete space n times.
+        """
+        return MultiDiscreteSpace(*([self] * n), labels=self.labels)
 @dataclass
 class MultiDiscreteSpace(Space):
@@ -75,10 +102,10 @@ class MultiDiscreteSpace(Space):
     def from_sizes(cls, *sizes: int):
         return cls(*(DiscreteSpace(size) for size in sizes))
-    def sample(self, masks: Optional[npt.NDArray[np.bool_] | list[npt.NDArray[np.bool_]]] = None):
-        if masks is None:
+    def sample(self, mask: Optional[npt.NDArray[np.bool] | list[npt.NDArray[np.bool]]] = None):
+        if mask is None:
             return np.array([space.sample() for space in self.spaces], dtype=np.int32)
-        return np.array([space.sample(mask) for mask, space in zip(masks, self.spaces)], dtype=np.int32)
+        return np.array([space.sample(mask=mask) for mask, space in zip(mask, self.spaces)], dtype=np.int32)
     def __eq__(self, value: object) -> bool:
         if not isinstance(value, MultiDiscreteSpace):
@@ -90,6 +117,10 @@ class MultiDiscreteSpace(Space):
                 return False
         return super().__eq__(value)
+    @property
+    def is_discrete(self) -> bool:
+        return True
 @dataclass
 class ContinuousSpace(Space):
@@ -100,23 +131,35 @@ class ContinuousSpace(Space):
     high: npt.NDArray[np.float32]
     """Upper bound of the space for each dimension."""
-    @staticmethod
-    def from_bounds(
-        low: int | float | list | npt.NDArray[np.float32],
-        high: int | float | list | npt.NDArray[np.float32],
+    def __init__(
+        self,
+        low: int | float | list | npt.NDArray[np.float32] | None,
+        high: int | float | list | npt.NDArray[np.float32] | None,
         labels: Optional[list[str]] = None,
     ):
         match low:
+            case None:
+                assert high is not None, "If low is None, high must be set to infer the shape."
+                shape = ContinuousSpace.get_shape(high)
+                low = np.full(shape, -np.inf, dtype=np.float32)
             case list():
                 low = np.array(low, dtype=np.float32)
             case float() | int():
                 low = np.array([low], dtype=np.float32)
         match high:
+            case None:
+                assert low is not None, "If high is None, low must be set to infer the shape."
+                shape = ContinuousSpace.get_shape(low)
+                high = np.full(shape, np.inf, dtype=np.float32)
             case list():
                 high = np.array(high, dtype=np.float32)
             case float() | int():
                 high = np.array([high], dtype=np.float32)
-        return ContinuousSpace(low, high, labels)
+        assert low.shape == high.shape, f"Low and high must have the same shape. Low shape: {low.shape}, high shape: {high.shape}"
+        assert np.all(low <= high), "All elements in low must be less than the corresponding elements in high."
+        Space.__init__(self, low.shape, labels)
+        self.low = low
+        self.high = high
     @staticmethod
     def from_shape(
@@ -143,20 +186,24 @@ class ContinuousSpace(Space):
                 high = np.array(high, dtype=np.float32)
         return ContinuousSpace(low, high, labels)
-    def __init__(
-        self,
-        low: npt.NDArray[np.float32],
-        high: npt.NDArray[np.float32],
-        labels: Optional[list[str]] = None,
-    ):
-        assert low.shape == high.shape, "Low and high must have the same shape."
-        assert np.all(low <= high), "All elements in low must be less than the corresponding elements in high."
-        Space.__init__(self, low.shape, labels)
-        self.low = low
-        self.high = high
+    def clamp(self, action: np.ndarray | list):
+        """Clamp the action to the bounds of the space."""
+        if isinstance(action, list):
+            action = np.array(action)
+        return np.clip(action, self.low, self.high)
+    def sample(self) -> npt.NDArray[np.float32]:
+        r = np.random.random(self.shape) * (self.high - self.low) + self.low
+        return r.astype(np.float32)
-    def sample(self, *_):
-        return np.random.random(self.shape) * (self.high - self.low) + self.low
+    @staticmethod
+    def get_shape(item: float | int | list | npt.NDArray[np.float32]) -> tuple[int, ...]:
+        """Get the shape of the item."""
+        if isinstance(item, list):
+            item = np.array(item)
+        if isinstance(item, np.ndarray):
+            return item.shape
+        return (1,)
     def __eq__(self, value: object) -> bool:
         if not isinstance(value, ContinuousSpace):
@@ -167,59 +214,19 @@ class ContinuousSpace(Space):
             return False
         return super().__eq__(value)
-@dataclass
-class ActionSpace(Space, Generic[S]):
-    n_agents: int
-    """Number of agents."""
-    action_names: list[str]
-    """The meaning of each action."""
-    n_actions: int
-    individual_action_space: S
-    def __init__(self, n_agents: int, individual_action_space: S, action_names: Optional[list] = None):
-        Space.__init__(self, (n_agents, *individual_action_space.shape), action_names)
-        self.n_agents = n_agents
-        self.individual_action_space = individual_action_space
-        self.n_actions = math.prod(individual_action_space.shape)
-        self.action_names = action_names or [f"Action {i}" for i in range(self.n_actions)]
-    def sample(self, mask: np.ndarray | None = None):
-        res = []
-        for i in range(self.n_agents):
-            if mask is not None:
-                m = mask[i]
-            else:
-                m = None
-            res.append(self.individual_action_space.sample(m))
-        return np.array(res)
-    def __eq__(self, value: object) -> bool:
-        if not isinstance(value, ActionSpace):
-            return False
-        if self.n_agents != value.n_agents:
-            return False
-        if self.n_actions != value.n_actions:
-            return False
-        if self.individual_action_space != value.individual_action_space:
-            return False
-        return super().__eq__(value)
-@dataclass
-class DiscreteActionSpace(ActionSpace[DiscreteSpace]):
-    def __init__(self, n_agents: int, n_actions: int, action_names: Optional[list[str]] = None):
-        individual_action_space = DiscreteSpace(n_actions, action_names)
-        super().__init__(n_agents, individual_action_space, action_names)
-@dataclass
-class MultiDiscreteActionSpace(ActionSpace[MultiDiscreteSpace]):
-    pass
-@dataclass
-class ContinuousActionSpace(ActionSpace[ContinuousSpace]):
-    def __init__(self, n_agents: int, low: np.ndarray | list, high: np.ndarray | list, action_names: list | None = None):
-        space = ContinuousSpace.from_bounds(low, high, action_names)
-        super().__init__(n_agents, space, action_names)
+    def repeat(self, n: int):
+        """
+        Repeat the continuous space n times to become of shape (n, *shape).
+        """
+        low = np.tile(self.low, (n, 1))
+        high = np.tile(self.high, (n, 1))
+        return ContinuousSpace.from_shape(
+            (n, *self.shape),
+            low=low,
+            high=high,
+            labels=self.labels,
+        )
+    @property
+    def is_discrete(self) -> bool:
+        return False

marlenv/models/transition.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from dataclasses import dataclass
-from typing import Any, Generic, Sequence
-from typing_extensions import TypeVar
+from typing import Any, Sequence
 import numpy as np
 import numpy.typing as npt
@@ -10,16 +9,13 @@ from .state import State
 from .step import Step
-A = TypeVar("A", default=np.ndarray)
 @dataclass
-class Transition(Generic[A]):
+class Transition:
     """Transition model"""
     obs: Observation
     state: State
-    action: A
+    action: np.ndarray
     reward: npt.NDArray[np.float32]
     done: bool
     info: dict[str, Any]
@@ -32,7 +28,7 @@ class Transition(Generic[A]):
         self,
         obs: Observation,
         state: State,
-        action: A,
+        action: np.ndarray | Sequence[float],
         reward: npt.NDArray[np.float32] | float | Sequence[float],
         done: bool,
         info: dict[str, Any],
@@ -65,14 +61,14 @@ class Transition(Generic[A]):
     def from_step(
         prev_obs: Observation,
         prev_state: State,
-        actions: A,
+        action: np.ndarray | Sequence[float],
         step: Step,
         **kwargs,
     ):
         return Transition(
             obs=prev_obs,
             state=prev_state,
-            action=actions,
+            action=action,
             reward=step.reward,
             done=step.done,
             info=step.info,

marlenv/utils/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .schedule import Schedule, MultiSchedule, RoundedSchedule, LinearSchedule, ExpSchedule
+__all__ = [
+    "Schedule",
+    "LinearSchedule",
+    "ExpSchedule",
+    "MultiSchedule",
+    "RoundedSchedule",
+]

multi-agent-rlenv 3.3.7__py3-none-any.whl → 3.5.0__py3-none-any.whl

multi-agent-rlenv 3.3.7py3-none-any.whl → 3.5.0py3-none-any.whl