PyPI - multi-agent-rlenv - Versions diffs - 3.4.0__tar.gz → 3.5.1__tar.gz - Mend

multi-agent-rlenv 3.4.0tar.gz → 3.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: multi-agent-rlenv
-Version: 3.4.0
+Version: 3.5.1
 Summary: A strongly typed Multi-Agent Reinforcement Learning framework
 Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
 Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/__init__.py RENAMED Viewed

@@ -62,16 +62,11 @@ print(env.extras_shape) # (1, )
 If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
 """
-__version__ = "3.4.0"
+__version__ = "3.5.1"
 from . import models
-from . import wrappers
-from . import adapters
-from .models import spaces
-from .env_builder import make, Builder
 from .models import (
+    spaces,
     MARLEnv,
     State,
     Step,
@@ -80,10 +75,14 @@ from .models import (
     Transition,
     DiscreteSpace,
     ContinuousSpace,
-    ActionSpace,
-    DiscreteActionSpace,
-    ContinuousActionSpace,
+    Space,
+    MultiDiscreteSpace,
 )
+from . import wrappers
+from . import adapters
+from .env_builder import make, Builder
 from .wrappers import RLEnvWrapper
 from .mock_env import DiscreteMockEnv, DiscreteMOMockEnv
@@ -100,12 +99,11 @@ __all__ = [
     "Observation",
     "Episode",
     "Transition",
-    "ActionSpace",
     "DiscreteSpace",
     "ContinuousSpace",
-    "DiscreteActionSpace",
-    "ContinuousActionSpace",
     "DiscreteMockEnv",
     "DiscreteMOMockEnv",
     "RLEnvWrapper",
+    "Space",
+    "MultiDiscreteSpace",
 ]

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/gym_adapter.py RENAMED Viewed

@@ -1,26 +1,16 @@
 import sys
-import cv2
 from dataclasses import dataclass
-from typing import Sequence
+import cv2
 import gymnasium as gym
 import numpy as np
-import numpy.typing as npt
 from gymnasium import Env, spaces
-from marlenv.models import (
-    ActionSpace,
-    ContinuousActionSpace,
-    DiscreteActionSpace,
-    MARLEnv,
-    Observation,
-    State,
-    Step,
-)
+from marlenv import ContinuousSpace, DiscreteSpace, MARLEnv, Observation, Space, State, Step
 @dataclass
-class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
+class Gym(MARLEnv[Space]):
     """Wraps a gym envronment in an RLEnv"""
     def __init__(self, env: Env | str, **kwargs):
@@ -30,7 +20,7 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
             raise NotImplementedError("Observation space must have a shape")
         match env.action_space:
             case spaces.Discrete() as s:
-                space = DiscreteActionSpace(1, int(s.n))
+                space = DiscreteSpace(int(s.n), labels=[f"Action {i}" for i in range(s.n)]).repeat(1)
             case spaces.Box() as s:
                 low = s.low.astype(np.float32)
                 high = s.high.astype(np.float32)
@@ -38,10 +28,10 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
                     low = np.full(s.shape, s.low, dtype=np.float32)
                 if not isinstance(high, np.ndarray):
                     high = np.full(s.shape, s.high, dtype=np.float32)
-                space = ContinuousActionSpace(1, low, high)
+                space = ContinuousSpace(low, high, labels=[f"Action {i}" for i in range(s.shape[0])]).repeat(1)
             case other:
                 raise NotImplementedError(f"Action space {other} not supported")
-        super().__init__(space, env.observation_space.shape, (1,))
+        super().__init__(1, space, env.observation_space.shape, (1,))
         self._gym_env = env
         if self._gym_env.unwrapped.spec is not None:
             self.name = self._gym_env.unwrapped.spec.id

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/overcooked_adapter.py RENAMED Viewed

@@ -7,7 +7,7 @@ import cv2
 import numpy as np
 import numpy.typing as npt
 import pygame
-from marlenv.models import ContinuousSpace, DiscreteActionSpace, MARLEnv, Observation, State, Step
+from marlenv.models import ContinuousSpace, DiscreteSpace, MARLEnv, Observation, State, Step, MultiDiscreteSpace
 from marlenv.utils import Schedule
 from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
@@ -16,7 +16,7 @@ from overcooked_ai_py.visualization.state_visualizer import StateVisualizer
 @dataclass
-class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
+class Overcooked(MARLEnv[MultiDiscreteSpace]):
     horizon: int
     shaping_factor: Schedule
@@ -37,10 +37,9 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
         # -1 because we extract the "urgent" layer to the extras
         shape = (int(layers - 1), int(width), int(height))
         super().__init__(
-            action_space=DiscreteActionSpace(
-                n_agents=self._mdp.num_players,
-                n_actions=Action.NUM_ACTIONS,
-                action_names=[Action.ACTION_TO_CHAR[a] for a in Action.ALL_ACTIONS],
+            n_agents=self._mdp.num_players,
+            action_space=DiscreteSpace(Action.NUM_ACTIONS, labels=[Action.ACTION_TO_CHAR[a] for a in Action.ALL_ACTIONS]).repeat(
+                self._mdp.num_players
             ),
             observation_shape=shape,
             extras_shape=(2,),
@@ -95,7 +94,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
                 available_actions[agent_num, Action.ACTION_TO_INDEX[action]] = True
         return np.array(available_actions, dtype=np.bool)
-    def step(self, actions: Sequence[int] | npt.NDArray[np.int32 | np.int64]) -> Step:
+    def step(self, actions: Sequence[int] | np.ndarray) -> Step:
         self.shaping_factor.update()
         actions = [Action.ALL_ACTIONS[a] for a in actions]
         _, reward, done, info = self._oenv.step(actions, display_phi=True)

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/pettingzoo_adapter.py RENAMED Viewed

@@ -6,17 +6,17 @@ import numpy.typing as npt
 from gymnasium import spaces  # pettingzoo uses gymnasium spaces
 from pettingzoo import ParallelEnv
-from marlenv.models import ActionSpace, ContinuousActionSpace, DiscreteActionSpace, MARLEnv, Observation, State, Step
+from marlenv.models import MARLEnv, Observation, State, Step, DiscreteSpace, ContinuousSpace, Space
 @dataclass
-class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
+class PettingZoo(MARLEnv[Space]):
     def __init__(self, env: ParallelEnv):
         aspace = env.action_space(env.possible_agents[0])
         n_agents = len(env.possible_agents)
         match aspace:
             case spaces.Discrete() as s:
-                space = DiscreteActionSpace(n_agents, int(s.n))
+                space = DiscreteSpace.action(int(s.n)).repeat(n_agents)
             case spaces.Box() as s:
                 low = s.low.astype(np.float32)
@@ -25,7 +25,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
                     low = np.full(s.shape, s.low, dtype=np.float32)
                 if not isinstance(high, np.ndarray):
                     high = np.full(s.shape, s.high, dtype=np.float32)
-                space = ContinuousActionSpace(n_agents, low, high=high)
+                space = ContinuousSpace(low, high=high).repeat(n_agents)
             case other:
                 raise NotImplementedError(f"Action space {other} not supported")
@@ -34,7 +34,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
             raise NotImplementedError("Only discrete observation spaces are supported")
         self._pz_env = env
         env.reset()
-        super().__init__(space, obs_space.shape, self.get_state().shape)
+        super().__init__(n_agents, space, obs_space.shape, self.get_state().shape)
         self.agents = env.possible_agents
         self.last_observation = None

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/pymarl_adapter.py RENAMED Viewed

@@ -1,10 +1,9 @@
 from dataclasses import dataclass
-from typing import Any, Sequence
+from typing import Any
 import numpy as np
-import numpy.typing as npt
-from marlenv.models import DiscreteActionSpace, MARLEnv
+from marlenv.models import MARLEnv, MultiDiscreteSpace
 from marlenv.wrappers import TimeLimit
@@ -15,7 +14,7 @@ class PymarlAdapter:
     with the pymarl-qplex code base.
     """
-    def __init__(self, env: MARLEnv[Sequence | npt.NDArray, DiscreteActionSpace], episode_limit: int):
+    def __init__(self, env: MARLEnv[MultiDiscreteSpace], episode_limit: int):
         assert env.reward_space.size == 1, "Only single objective environments are supported."
         self.env = TimeLimit(env, episode_limit, add_extra=False)
         # Required by PyMarl

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/smac_adapter.py RENAMED Viewed

@@ -1,15 +1,15 @@
 from dataclasses import dataclass
-from typing import Sequence, overload
+from typing import overload
 import numpy as np
 import numpy.typing as npt
 from smac.env import StarCraft2Env
-from marlenv.models import DiscreteActionSpace, MARLEnv, Observation, State, Step
+from marlenv.models import MARLEnv, Observation, State, Step, MultiDiscreteSpace, DiscreteSpace
 @dataclass
-class SMAC(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
+class SMAC(MARLEnv[MultiDiscreteSpace]):
     """Wrapper for the SMAC environment to work with this framework"""
     @overload
@@ -157,10 +157,10 @@ class SMAC(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
             case other:
                 raise ValueError(f"Invalid argument type: {type(other)}")
         self._env = StarCraft2Env(map_name=map_name)
-        action_space = DiscreteActionSpace(self._env.n_agents, self._env.n_actions)
         self._env_info = self._env.get_env_info()
         super().__init__(
-            action_space=action_space,
+            self._env.n_agents,
+            action_space=DiscreteSpace(self._env.n_actions).repeat(self._env.n_agents),
             observation_shape=(self._env_info["obs_shape"],),
             state_shape=(self._env_info["state_shape"],),
         )
@@ -195,7 +195,7 @@ class SMAC(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
         )
         return step
-    def available_actions(self) -> npt.NDArray[np.bool_]:
+    def available_actions(self) -> npt.NDArray[np.bool]:
         return np.array(self._env.get_avail_actions()) == 1
     def get_image(self):

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/env_builder.py RENAMED Viewed

@@ -5,10 +5,9 @@ import numpy.typing as npt
 from . import wrappers
 from marlenv import adapters
-from .models import ActionSpace, MARLEnv
+from .models import Space, MARLEnv
-A = TypeVar("A")
-AS = TypeVar("AS", bound=ActionSpace)
+AS = TypeVar("AS", bound=Space)
 if adapters.HAS_PETTINGZOO:
     from .adapters import PettingZoo
@@ -71,12 +70,12 @@ def make(env, **kwargs):
 @dataclass
-class Builder(Generic[A, AS]):
+class Builder(Generic[AS]):
     """Builder for environments"""
-    _env: MARLEnv[A, AS]
+    _env: MARLEnv[AS]
-    def __init__(self, env: MARLEnv[A, AS]):
+    def __init__(self, env: MARLEnv[AS]):
         self._env = env
     def time_limit(self, n_steps: int, add_extra: bool = True, truncation_penalty: Optional[float] = None):
@@ -124,9 +123,9 @@ class Builder(Generic[A, AS]):
     def centralised(self):
         """Centralises the observations and actions"""
-        from marlenv.models import DiscreteActionSpace
+        from marlenv.models import MultiDiscreteSpace
-        assert isinstance(self._env.action_space, DiscreteActionSpace)
+        assert isinstance(self._env.action_space, MultiDiscreteSpace)
         self._env = wrappers.Centralized(self._env)  # type: ignore
         return self
@@ -159,6 +158,6 @@ class Builder(Generic[A, AS]):
         self._env = wrappers.TimePenalty(self._env, penalty)
         return self
-    def build(self) -> MARLEnv[A, AS]:
+    def build(self) -> MARLEnv[AS]:
         """Build and return the environment"""
         return self._env

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/env_pool.py RENAMED Viewed

@@ -1,21 +1,19 @@
 from typing import Sequence
 from dataclasses import dataclass
-import numpy.typing as npt
 from typing_extensions import TypeVar
 import random
 from marlenv import RLEnvWrapper, MARLEnv
-from marlenv.models import ActionSpace
+from marlenv.models import Space
-ActionType = TypeVar("ActionType", default=npt.NDArray)
-ActionSpaceType = TypeVar("ActionSpaceType", bound=ActionSpace, default=ActionSpace)
+ActionSpaceType = TypeVar("ActionSpaceType", bound=Space, default=Space)
 @dataclass
-class EnvPool(RLEnvWrapper[ActionType, ActionSpaceType]):
-    envs: Sequence[MARLEnv[ActionType, ActionSpaceType]]
+class EnvPool(RLEnvWrapper[ActionSpaceType]):
+    envs: Sequence[MARLEnv[ActionSpaceType]]
-    def __init__(self, envs: Sequence[MARLEnv[ActionType, ActionSpaceType]]):
+    def __init__(self, envs: Sequence[MARLEnv[ActionSpaceType]]):
         assert len(envs) > 0, "EnvPool must contain at least one environment"
         self.envs = envs
         for env in envs[1:]:

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/mock_env.py RENAMED Viewed

@@ -1,12 +1,10 @@
-from typing import Sequence
 import numpy as np
-import numpy.typing as npt
 from dataclasses import dataclass
-from marlenv import MARLEnv, Observation, DiscreteActionSpace, ContinuousSpace, Step, State
+from marlenv import MARLEnv, Observation, ContinuousSpace, Step, State, DiscreteSpace, MultiDiscreteSpace
 @dataclass
-class DiscreteMockEnv(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
+class DiscreteMockEnv(MARLEnv[MultiDiscreteSpace]):
     def __init__(
         self,
         n_agents: int = 4,
@@ -27,7 +25,8 @@ class DiscreteMockEnv(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace])
             case _:
                 raise ValueError("reward_step must be an int, float or np.ndarray")
         super().__init__(
-            DiscreteActionSpace(n_agents, n_actions),
+            n_agents,
+            DiscreteSpace(n_actions).repeat(n_agents),
             (obs_size,),
             (n_agents * agent_state_size,),
             extras_shape=(extras_size,),
@@ -85,7 +84,7 @@ class DiscreteMockEnv(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace])
         )
-class DiscreteMOMockEnv(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
+class DiscreteMOMockEnv(MARLEnv[DiscreteSpace]):
     """Multi-Objective Mock Environment"""
     def __init__(
@@ -100,7 +99,8 @@ class DiscreteMOMockEnv(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace
         extras_size: int = 0,
     ) -> None:
         super().__init__(
-            DiscreteActionSpace(n_agents, n_actions),
+            n_agents,
+            DiscreteSpace(n_actions),
             (obs_size,),
             (n_agents * agent_state_size,),
             extras_shape=(extras_size,),

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from .spaces import ActionSpace, DiscreteSpace, ContinuousSpace, MultiDiscreteSpace, DiscreteActionSpace, ContinuousActionSpace
+from .spaces import DiscreteSpace, ContinuousSpace, MultiDiscreteSpace, Space
 from .observation import Observation
 from .step import Step
 from .state import State
@@ -8,7 +8,6 @@ from .episode import Episode
 __all__ = [
-    "ActionSpace",
     "Step",
     "State",
     "DiscreteSpace",
@@ -18,6 +17,5 @@ __all__ = [
     "Transition",
     "Episode",
     "MultiDiscreteSpace",
-    "DiscreteActionSpace",
-    "ContinuousActionSpace",
+    "Space",
 ]

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/env.py RENAMED Viewed

@@ -1,24 +1,22 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from itertools import product
-from typing import Any, Generic, Optional, Sequence
+from typing import Generic, Optional, Sequence, TypeVar
 import cv2
 import numpy as np
 import numpy.typing as npt
-from typing_extensions import TypeVar
 from .observation import Observation
-from .spaces import ActionSpace, ContinuousSpace, Space
+from .spaces import ContinuousSpace, Space, DiscreteSpace, MultiDiscreteSpace
 from .state import State
 from .step import Step
-ActionType = TypeVar("ActionType", default=Any)
-ActionSpaceType = TypeVar("ActionSpaceType", bound=ActionSpace, default=Any)
+ActionSpaceType = TypeVar("ActionSpaceType", bound=Space)
 @dataclass
-class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
+class MARLEnv(ABC, Generic[ActionSpaceType]):
     """
     Multi-Agent Reinforcement Learning environment.
@@ -70,6 +68,7 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
     def __init__(
         self,
+        n_agents: int,
         action_space: ActionSpaceType,
         observation_shape: tuple[int, ...],
         state_shape: tuple[int, ...],
@@ -81,8 +80,8 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
         super().__init__()
         self.name = self.__class__.__name__
         self.action_space = action_space
-        self.n_actions = action_space.n_actions
-        self.n_agents = action_space.n_agents
+        self.n_actions = action_space.shape[-1]
+        self.n_agents = n_agents
         self.observation_shape = observation_shape
         self.state_shape = state_shape
         self.extras_shape = extras_shape
@@ -113,9 +112,16 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
         """The number of objectives in the environment."""
         return self.reward_space.size
-    def sample_action(self) -> ActionType:
+    def sample_action(self):
         """Sample an available action from the action space."""
-        return self.action_space.sample(self.available_actions())  # type: ignore
+        match self.action_space:
+            case MultiDiscreteSpace() as aspace:
+                return aspace.sample(mask=self.available_actions())
+            case ContinuousSpace() as aspace:
+                return aspace.sample()
+            case DiscreteSpace() as aspace:
+                return np.array([aspace.sample(mask=self.available_actions())])
+        raise NotImplementedError("Action space not supported")
     def available_actions(self) -> npt.NDArray[np.bool]:
         """
@@ -147,7 +153,7 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
         raise NotImplementedError("Method not implemented")
     @abstractmethod
-    def step(self, actions: ActionType) -> Step:
+    def step(self, action: Sequence | np.ndarray) -> Step:
         """Perform a step in the environment.
         Returns a Step object that can be unpacked as a 6-tuple containing:
@@ -180,7 +186,7 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
         """Retrieve an image of the environment"""
         raise NotImplementedError("No image available for this environment")
-    def replay(self, actions: Sequence[ActionType], seed: Optional[int] = None):
+    def replay(self, actions: Sequence, seed: Optional[int] = None):
         """Replay a sequence of actions."""
         from .episode import Episode  # Avoid circular import

{multi_agent_rlenv-3.4.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/episode.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from functools import cached_property
-from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, overload
+from typing import Any, Callable, Optional, Sequence, overload
 import numpy as np
 import numpy.typing as npt
@@ -14,11 +14,8 @@ from .env import MARLEnv
 from marlenv.exceptions import EnvironmentMismatchException, ReplayMismatchException
-A = TypeVar("A")
 @dataclass
-class Episode(Generic[A]):
+class Episode:
     """Episode model made of observations, actions, rewards, ..."""
     all_observations: list[npt.NDArray[np.float32]]
@@ -55,7 +52,7 @@ class Episode(Generic[A]):
         )
     @staticmethod
-    def from_transitions(transitions: Sequence[Transition[A]]) -> "Episode":
+    def from_transitions(transitions: Sequence[Transition]) -> "Episode":
         """Create an episode from a list of transitions"""
         episode = Episode.new(transitions[0].obs, transitions[0].state)
         for transition in transitions:
@@ -214,11 +211,11 @@ class Episode(Generic[A]):
     def replay(
         self,
-        env: MARLEnv[A, Any],
+        env: MARLEnv,
         seed: Optional[int] = None,
         *,
-        after_reset: Optional[Callable[[Observation, State, MARLEnv[A]], None]] = None,
-        after_step: Optional[Callable[[int, Step, MARLEnv[A]], None]] = None,
+        after_reset: Optional[Callable[[Observation, State, MARLEnv], None]] = None,
+        after_step: Optional[Callable[[int, Step, MARLEnv], None]] = None,
     ):
         """
         Replay the episode in the environment (i.e. perform the actions) and assert that the outcomes match.
@@ -243,12 +240,12 @@ class Episode(Generic[A]):
                 raise ReplayMismatchException("observation", step.obs.data, self.next_obs[i], time_step=i)
             if not np.array_equal(step.state.data, self.next_states[i]):
                 raise ReplayMismatchException("state", step.state.data, self.next_states[i], time_step=i)
-            if not np.array_equal(step.reward, self.rewards[i]):
+            if not np.isclose(step.reward, self.rewards[i]):
                 raise ReplayMismatchException("reward", step.reward, self.rewards[i], time_step=i)
             if after_step is not None:
                 after_step(i, step, env)
-    def get_images(self, env: MARLEnv[A, Any], seed: Optional[int] = None) -> list[np.ndarray]:
+    def get_images(self, env: MARLEnv, seed: Optional[int] = None) -> list[np.ndarray]:
         images = []
         def collect_image(*_, **__):
@@ -257,7 +254,7 @@ class Episode(Generic[A]):
         self.replay(env, seed, after_reset=collect_image, after_step=collect_image)
         return images
-    def render(self, env: MARLEnv[A, Any], seed: Optional[int] = None, fps: int = 5):
+    def render(self, env: MARLEnv, seed: Optional[int] = None, fps: int = 5):
         def render_callback(*_, **__):
             env.render()
             cv2.waitKey(1000 // fps)
@@ -288,10 +285,10 @@ class Episode(Generic[A]):
         return returns
     @overload
-    def add(self, transition: Transition[A], /): ...
+    def add(self, transition: Transition, /): ...
     @overload
-    def add(self, step: Step, action: A, /): ...
+    def add(self, step: Step, action: np.ndarray, /): ...
     def add(self, *data):
         match data:
@@ -322,10 +319,10 @@ class Episode(Generic[A]):
     def add_data(
         self,
-        next_obs,
-        next_state,
-        action: A,
-        reward: np.ndarray,
+        next_obs: Observation,
+        next_state: State,
+        action: np.ndarray,
+        reward: npt.NDArray[np.float32],
         others: dict[str, Any],
         done: bool,
         truncated: bool,

multi-agent-rlenv 3.4.0__tar.gz → 3.5.1__tar.gz

multi-agent-rlenv 3.4.0tar.gz → 3.5.1tar.gz