multi-agent-rlenv 3.3.5__tar.gz → 3.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/PKG-INFO +1 -1
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/__init__.py +1 -1
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/__init__.py +1 -1
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/gym_adapter.py +14 -14
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/overcooked_adapter.py +18 -11
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/pettingzoo_adapter.py +6 -6
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/env_builder.py +1 -1
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/__init__.py +2 -2
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/centralised.py +1 -1
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/test_adapters.py +8 -8
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/test_serialization.py +89 -12
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/test_wrappers.py +4 -4
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/.github/workflows/ci.yaml +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/.github/workflows/docs.yaml +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/.gitignore +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/LICENSE +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/README.md +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/pyproject.toml +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/pymarl_adapter.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/smac_adapter.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/env_pool.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/exceptions.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/mock_env.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/__init__.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/env.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/episode.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/observation.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/spaces.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/state.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/step.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/models/transition.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/py.typed +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/available_actions_mask.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/blind_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/delayed_rewards.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/paddings.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/rlenv_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/time_limit.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/video_recorder.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/__init__.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/test_episode.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/test_models.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/test_pool.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/test_spaces.py +0 -0
- {multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multi-agent-rlenv
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.6
|
|
4
4
|
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
5
|
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
6
|
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
|
|
|
62
62
|
If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
__version__ = "3.3.
|
|
65
|
+
__version__ = "3.3.6"
|
|
66
66
|
|
|
67
67
|
from . import models
|
|
68
68
|
from . import wrappers
|
|
@@ -20,7 +20,7 @@ if find_spec("smac") is not None:
|
|
|
20
20
|
HAS_SMAC = True
|
|
21
21
|
|
|
22
22
|
HAS_OVERCOOKED = False
|
|
23
|
-
if find_spec("overcooked_ai_py.mdp") is not None:
|
|
23
|
+
if find_spec("overcooked_ai_py") is not None and find_spec("overcooked_ai_py.mdp") is not None:
|
|
24
24
|
import numpy
|
|
25
25
|
|
|
26
26
|
# Overcooked assumes a version of numpy <2.0 where np.Inf is available.
|
|
@@ -42,26 +42,26 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
|
|
|
42
42
|
case other:
|
|
43
43
|
raise NotImplementedError(f"Action space {other} not supported")
|
|
44
44
|
super().__init__(space, env.observation_space.shape, (1,))
|
|
45
|
-
self.
|
|
46
|
-
if self.
|
|
47
|
-
self.name = self.
|
|
45
|
+
self._gym_env = env
|
|
46
|
+
if self._gym_env.unwrapped.spec is not None:
|
|
47
|
+
self.name = self._gym_env.unwrapped.spec.id
|
|
48
48
|
else:
|
|
49
49
|
self.name = "gym-no-id"
|
|
50
|
-
self.
|
|
50
|
+
self._last_obs = None
|
|
51
51
|
|
|
52
52
|
def get_observation(self):
|
|
53
|
-
if self.
|
|
53
|
+
if self._last_obs is None:
|
|
54
54
|
raise ValueError("No observation available. Call reset() first.")
|
|
55
|
-
return self.
|
|
55
|
+
return self._last_obs
|
|
56
56
|
|
|
57
57
|
def step(self, actions):
|
|
58
|
-
obs, reward, done, truncated, info = self.
|
|
59
|
-
self.
|
|
58
|
+
obs, reward, done, truncated, info = self._gym_env.step(list(actions)[0])
|
|
59
|
+
self._last_obs = Observation(
|
|
60
60
|
np.array([obs], dtype=np.float32),
|
|
61
61
|
self.available_actions(),
|
|
62
62
|
)
|
|
63
63
|
return Step(
|
|
64
|
-
self.
|
|
64
|
+
self._last_obs,
|
|
65
65
|
self.get_state(),
|
|
66
66
|
np.array([reward]),
|
|
67
67
|
done,
|
|
@@ -73,18 +73,18 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
|
|
|
73
73
|
return State(np.zeros(1, dtype=np.float32))
|
|
74
74
|
|
|
75
75
|
def reset(self):
|
|
76
|
-
obs_data, _info = self.
|
|
77
|
-
self.
|
|
76
|
+
obs_data, _info = self._gym_env.reset()
|
|
77
|
+
self._last_obs = Observation(
|
|
78
78
|
np.array([obs_data], dtype=np.float32),
|
|
79
79
|
self.available_actions(),
|
|
80
80
|
)
|
|
81
|
-
return self.
|
|
81
|
+
return self._last_obs, self.get_state()
|
|
82
82
|
|
|
83
83
|
def get_image(self):
|
|
84
|
-
image = np.array(self.
|
|
84
|
+
image = np.array(self._gym_env.render())
|
|
85
85
|
if sys.platform in ("linux", "linux2"):
|
|
86
86
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
87
87
|
return image
|
|
88
88
|
|
|
89
89
|
def seed(self, seed_value: int):
|
|
90
|
-
self.
|
|
90
|
+
self._gym_env.reset(seed=seed_value)
|
{multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/overcooked_adapter.py
RENAMED
|
@@ -23,8 +23,9 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
23
23
|
assert isinstance(oenv.mdp, OvercookedGridworld)
|
|
24
24
|
self._mdp = oenv.mdp
|
|
25
25
|
self._visualizer = StateVisualizer()
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
width, height, layers = tuple(self._mdp.lossless_state_encoding_shape)
|
|
27
|
+
# -1 because we extract the "urgent" layer to the extras
|
|
28
|
+
shape = (int(layers - 1), int(width), int(height))
|
|
28
29
|
super().__init__(
|
|
29
30
|
action_space=DiscreteActionSpace(
|
|
30
31
|
n_agents=self._mdp.num_players,
|
|
@@ -32,10 +33,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
32
33
|
action_names=[Action.ACTION_TO_CHAR[a] for a in Action.ALL_ACTIONS],
|
|
33
34
|
),
|
|
34
35
|
observation_shape=shape,
|
|
35
|
-
extras_shape=(
|
|
36
|
-
extras_meanings=["timestep"],
|
|
36
|
+
extras_shape=(2,),
|
|
37
|
+
extras_meanings=["timestep", "urgent"],
|
|
37
38
|
state_shape=shape,
|
|
38
|
-
state_extra_shape=(
|
|
39
|
+
state_extra_shape=(2,),
|
|
39
40
|
reward_space=ContinuousSpace.from_shape(1),
|
|
40
41
|
)
|
|
41
42
|
self.horizon = int(self._oenv.horizon)
|
|
@@ -53,19 +54,25 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
53
54
|
return self.state.timestep
|
|
54
55
|
|
|
55
56
|
def _state_data(self):
|
|
56
|
-
|
|
57
|
+
players_layers = self._mdp.lossless_state_encoding(self.state)
|
|
58
|
+
state = np.array(players_layers, dtype=np.float32)
|
|
57
59
|
# Use axes (agents, channels, height, width) instead of (agents, height, width, channels)
|
|
58
60
|
state = np.transpose(state, (0, 3, 1, 2))
|
|
59
|
-
|
|
61
|
+
# The last last layer is for "urgency", put it in the extras
|
|
62
|
+
urgency = float(np.all(state[:, -1]))
|
|
63
|
+
state = state[:, :-1]
|
|
64
|
+
return state, urgency
|
|
60
65
|
|
|
61
66
|
def get_state(self):
|
|
62
|
-
|
|
67
|
+
data, is_urgent = self._state_data()
|
|
68
|
+
return State(data[0], np.array([self.time_step / self.horizon, is_urgent], dtype=np.float32))
|
|
63
69
|
|
|
64
70
|
def get_observation(self) -> Observation:
|
|
71
|
+
data, is_urgent = self._state_data()
|
|
65
72
|
return Observation(
|
|
66
|
-
data=
|
|
73
|
+
data=data,
|
|
67
74
|
available_actions=self.available_actions(),
|
|
68
|
-
extras=np.array([[self.time_step / self.horizon]] * self.n_agents, dtype=np.float32),
|
|
75
|
+
extras=np.array([[self.time_step / self.horizon, is_urgent]] * self.n_agents, dtype=np.float32),
|
|
69
76
|
)
|
|
70
77
|
|
|
71
78
|
def available_actions(self):
|
|
@@ -74,7 +81,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
74
81
|
for agent_num, agent_actions in enumerate(actions):
|
|
75
82
|
for action in agent_actions:
|
|
76
83
|
available_actions[agent_num, Action.ACTION_TO_INDEX[action]] = True
|
|
77
|
-
return np.array(available_actions)
|
|
84
|
+
return np.array(available_actions, dtype=np.bool)
|
|
78
85
|
|
|
79
86
|
def step(self, actions: Sequence[int] | npt.NDArray[np.int32 | np.int64]) -> Step:
|
|
80
87
|
actions = [Action.ALL_ACTIONS[a] for a in actions]
|
{multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/adapters/pettingzoo_adapter.py
RENAMED
|
@@ -32,7 +32,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
32
32
|
obs_space = env.observation_space(env.possible_agents[0])
|
|
33
33
|
if obs_space.shape is None:
|
|
34
34
|
raise NotImplementedError("Only discrete observation spaces are supported")
|
|
35
|
-
self.
|
|
35
|
+
self._pz_env = env
|
|
36
36
|
env.reset()
|
|
37
37
|
super().__init__(space, obs_space.shape, self.get_state().shape)
|
|
38
38
|
self.agents = env.possible_agents
|
|
@@ -40,13 +40,13 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
40
40
|
|
|
41
41
|
def get_state(self):
|
|
42
42
|
try:
|
|
43
|
-
return self.
|
|
43
|
+
return self._pz_env.state()
|
|
44
44
|
except NotImplementedError:
|
|
45
45
|
return np.array([0])
|
|
46
46
|
|
|
47
47
|
def step(self, actions: npt.NDArray | Sequence):
|
|
48
48
|
action_dict = dict(zip(self.agents, actions))
|
|
49
|
-
obs, reward, term, trunc, info = self.
|
|
49
|
+
obs, reward, term, trunc, info = self._pz_env.step(action_dict)
|
|
50
50
|
obs_data = np.array([v for v in obs.values()])
|
|
51
51
|
reward = np.sum([r for r in reward.values()], keepdims=True)
|
|
52
52
|
self.last_observation = Observation(obs_data, self.available_actions())
|
|
@@ -54,7 +54,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
54
54
|
return Step(self.last_observation, state, reward, any(term.values()), any(trunc.values()), info)
|
|
55
55
|
|
|
56
56
|
def reset(self):
|
|
57
|
-
obs = self.
|
|
57
|
+
obs = self._pz_env.reset()[0]
|
|
58
58
|
obs_data = np.array([v for v in obs.values()])
|
|
59
59
|
self.last_observation = Observation(obs_data, self.available_actions(), self.get_state())
|
|
60
60
|
return self.last_observation
|
|
@@ -65,7 +65,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
65
65
|
return self.last_observation
|
|
66
66
|
|
|
67
67
|
def seed(self, seed_value: int):
|
|
68
|
-
self.
|
|
68
|
+
self._pz_env.reset(seed=seed_value)
|
|
69
69
|
|
|
70
70
|
def render(self, *_):
|
|
71
|
-
return self.
|
|
71
|
+
return self._pz_env.render()
|
|
@@ -127,7 +127,7 @@ class Builder(Generic[A, AS]):
|
|
|
127
127
|
from marlenv.models import DiscreteActionSpace
|
|
128
128
|
|
|
129
129
|
assert isinstance(self._env.action_space, DiscreteActionSpace)
|
|
130
|
-
self._env = wrappers.
|
|
130
|
+
self._env = wrappers.Centralized(self._env) # type: ignore
|
|
131
131
|
return self
|
|
132
132
|
|
|
133
133
|
def record(
|
|
@@ -7,7 +7,7 @@ from .paddings import PadObservations, PadExtras
|
|
|
7
7
|
from .penalty_wrapper import TimePenalty
|
|
8
8
|
from .available_actions_wrapper import AvailableActions
|
|
9
9
|
from .blind_wrapper import Blind
|
|
10
|
-
from .centralised import
|
|
10
|
+
from .centralised import Centralized
|
|
11
11
|
from .available_actions_mask import AvailableActionsMask
|
|
12
12
|
from .delayed_rewards import DelayedReward
|
|
13
13
|
|
|
@@ -24,6 +24,6 @@ __all__ = [
|
|
|
24
24
|
"TimePenalty",
|
|
25
25
|
"AvailableActions",
|
|
26
26
|
"Blind",
|
|
27
|
-
"
|
|
27
|
+
"Centralized",
|
|
28
28
|
"DelayedReward",
|
|
29
29
|
]
|
|
@@ -14,7 +14,7 @@ A = TypeVar("A", bound=npt.NDArray | Sequence[int] | Sequence[Sequence[float]])
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass
|
|
17
|
-
class
|
|
17
|
+
class Centralized(RLEnvWrapper[A, DiscreteActionSpace]):
|
|
18
18
|
joint_action_space: ActionSpace
|
|
19
19
|
|
|
20
20
|
def __init__(self, env: MARLEnv[A, DiscreteActionSpace]):
|
|
@@ -147,9 +147,9 @@ def test_overcooked_attributes():
|
|
|
147
147
|
height, width = env._mdp.shape
|
|
148
148
|
assert env.n_agents == 2
|
|
149
149
|
assert env.n_actions == Action.NUM_ACTIONS
|
|
150
|
-
assert env.observation_shape == (
|
|
150
|
+
assert env.observation_shape == (25, height, width)
|
|
151
151
|
assert env.reward_space.shape == (1,)
|
|
152
|
-
assert env.extras_shape == (
|
|
152
|
+
assert env.extras_shape == (2,)
|
|
153
153
|
assert not env.is_multi_objective
|
|
154
154
|
|
|
155
155
|
|
|
@@ -166,13 +166,13 @@ def test_overcooked_obs_state():
|
|
|
166
166
|
assert state.data.dtype == np.float32
|
|
167
167
|
assert obs.extras.dtype == np.float32
|
|
168
168
|
assert state.extras.dtype == np.float32
|
|
169
|
-
assert obs.shape == (
|
|
170
|
-
assert obs.extras_shape == (
|
|
171
|
-
assert state.shape == (
|
|
172
|
-
assert state.extras_shape == (
|
|
169
|
+
assert obs.shape == (25, height, width)
|
|
170
|
+
assert obs.extras_shape == (2,)
|
|
171
|
+
assert state.shape == (25, height, width)
|
|
172
|
+
assert state.extras_shape == (2,)
|
|
173
173
|
|
|
174
|
-
assert np.all(obs.extras == i / HORIZON)
|
|
175
|
-
assert np.all(state.extras == i / HORIZON)
|
|
174
|
+
assert np.all(obs.extras[:, 0] == i / HORIZON)
|
|
175
|
+
assert np.all(state.extras[0] == i / HORIZON)
|
|
176
176
|
|
|
177
177
|
step = env.random_step()
|
|
178
178
|
obs = step.obs
|
|
@@ -6,7 +6,7 @@ import os
|
|
|
6
6
|
from copy import deepcopy
|
|
7
7
|
|
|
8
8
|
import marlenv
|
|
9
|
-
from marlenv import DiscreteMockEnv
|
|
9
|
+
from marlenv import DiscreteMockEnv, wrappers
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def test_registry():
|
|
@@ -76,21 +76,78 @@ def test_serialize_episode_fields():
|
|
|
76
76
|
assert field in episode
|
|
77
77
|
|
|
78
78
|
|
|
79
|
+
def serde_and_check_key_values(env: object):
|
|
80
|
+
serialized = orjson.dumps(env, option=orjson.OPT_SERIALIZE_NUMPY)
|
|
81
|
+
deserialized = orjson.loads(serialized)
|
|
82
|
+
checked_keys = []
|
|
83
|
+
for key, value in env.__dict__.items():
|
|
84
|
+
if key.startswith("_"):
|
|
85
|
+
continue
|
|
86
|
+
checked_keys.append(key)
|
|
87
|
+
assert key in deserialized
|
|
88
|
+
match value:
|
|
89
|
+
case int() | float() | str() | bool() | list() | dict():
|
|
90
|
+
assert deserialized[key] == value
|
|
91
|
+
case np.ndarray():
|
|
92
|
+
assert np.all(deserialized[key] == value)
|
|
93
|
+
assert len(checked_keys) > 0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_serialize_blind():
|
|
97
|
+
env = DiscreteMockEnv(4)
|
|
98
|
+
serde_and_check_key_values(wrappers.Blind(env, 0.2))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_serialize_time_limit():
|
|
102
|
+
env = DiscreteMockEnv(4)
|
|
103
|
+
serde_and_check_key_values(wrappers.TimeLimit(env, 10))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_serialize_time_penalty():
|
|
107
|
+
env = DiscreteMockEnv(4)
|
|
108
|
+
serde_and_check_key_values(wrappers.TimePenalty(env, 0.2))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def test_serialize_agent_id():
|
|
112
|
+
env = DiscreteMockEnv(4)
|
|
113
|
+
serde_and_check_key_values(wrappers.AgentId(env))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def test_serialize_last_action():
|
|
117
|
+
env = DiscreteMockEnv(4)
|
|
118
|
+
serde_and_check_key_values(wrappers.LastAction(env))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_serialize_available_actions():
|
|
122
|
+
env = DiscreteMockEnv(4)
|
|
123
|
+
serde_and_check_key_values(wrappers.AvailableActions(env))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def test_serialize_video():
|
|
127
|
+
env = DiscreteMockEnv(4)
|
|
128
|
+
serde_and_check_key_values(wrappers.VideoRecorder(env))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_serialize_centralised():
|
|
132
|
+
env = DiscreteMockEnv(4)
|
|
133
|
+
serde_and_check_key_values(wrappers.Centralized(env))
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_serialize_pad_extras():
|
|
137
|
+
env = DiscreteMockEnv(4)
|
|
138
|
+
serde_and_check_key_values(wrappers.PadExtras(env, 5))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def test_serialize_pad_observation():
|
|
142
|
+
env = DiscreteMockEnv(4)
|
|
143
|
+
serde_and_check_key_values(wrappers.PadObservations(env, 5))
|
|
144
|
+
|
|
145
|
+
|
|
79
146
|
def test_wrappers_serializable():
|
|
80
147
|
env = DiscreteMockEnv(4)
|
|
81
148
|
env = marlenv.Builder(env).agent_id().available_actions().time_limit(10).last_action().time_penalty(5).blind(0.2).build()
|
|
82
|
-
as_bytes = orjson.dumps(env, option=orjson.OPT_SERIALIZE_NUMPY)
|
|
83
|
-
deserialized = orjson.loads(as_bytes)
|
|
84
149
|
|
|
85
|
-
|
|
86
|
-
for key, value in env.__dict__.items():
|
|
87
|
-
if key.startswith("_"):
|
|
88
|
-
continue
|
|
89
|
-
assert key in deserialized
|
|
90
|
-
if key == "wrapped":
|
|
91
|
-
check_key_values(value, deserialized[key])
|
|
92
|
-
|
|
93
|
-
check_key_values(env, deserialized)
|
|
150
|
+
serde_and_check_key_values(env)
|
|
94
151
|
|
|
95
152
|
|
|
96
153
|
def test_serialize_observation():
|
|
@@ -204,3 +261,23 @@ def test_serialize_json_overcooked():
|
|
|
204
261
|
assert deserialized["n_actions"] == env.n_actions
|
|
205
262
|
assert deserialized["name"] == env.name
|
|
206
263
|
assert deserialized["extras_meanings"] == env.extras_meanings
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@pytest.mark.skipif(not marlenv.adapters.HAS_GYM, reason="Gymnasium is not installed")
|
|
267
|
+
def test_json_serialize_gym():
|
|
268
|
+
env = marlenv.make("CartPole-v1")
|
|
269
|
+
serde_and_check_key_values(env)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
@pytest.mark.skipif(not marlenv.adapters.HAS_PETTINGZOO, reason="PettingZoo is not installed")
|
|
273
|
+
def test_json_serialize_pettingzoo():
|
|
274
|
+
from pettingzoo.sisl import pursuit_v4
|
|
275
|
+
|
|
276
|
+
env = marlenv.adapters.PettingZoo(pursuit_v4.parallel_env())
|
|
277
|
+
serde_and_check_key_values(env)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@pytest.mark.skipif(not marlenv.adapters.HAS_SMAC, reason="SMAC is not installed")
|
|
281
|
+
def test_json_serialize_smac():
|
|
282
|
+
env = marlenv.adapters.SMAC("3m")
|
|
283
|
+
serde_and_check_key_values(env)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from typing import Any
|
|
3
3
|
from marlenv import Builder, DiscreteMOMockEnv, DiscreteMockEnv, MARLEnv
|
|
4
|
-
from marlenv.wrappers import
|
|
4
|
+
from marlenv.wrappers import Centralized, AvailableActionsMask, TimeLimit, LastAction, DelayedReward
|
|
5
5
|
import marlenv
|
|
6
6
|
|
|
7
7
|
|
|
@@ -181,7 +181,7 @@ def test_centralised_shape():
|
|
|
181
181
|
|
|
182
182
|
def test_centralised_action():
|
|
183
183
|
mock = DiscreteMockEnv(2)
|
|
184
|
-
env =
|
|
184
|
+
env = Centralized(mock)
|
|
185
185
|
for action1 in range(mock.n_actions):
|
|
186
186
|
for action2 in range(mock.n_actions):
|
|
187
187
|
joint_action = action1 * mock.n_actions + action2
|
|
@@ -192,7 +192,7 @@ def test_centralised_action():
|
|
|
192
192
|
|
|
193
193
|
def test_centralised_obs_and_state():
|
|
194
194
|
wrapped = DiscreteMockEnv(2)
|
|
195
|
-
env =
|
|
195
|
+
env = Centralized(wrapped)
|
|
196
196
|
assert env.observation_shape == (2 * wrapped.obs_size,)
|
|
197
197
|
assert env.state_shape == (wrapped.agent_state_size * wrapped.n_agents,)
|
|
198
198
|
obs, state = env.reset()
|
|
@@ -214,7 +214,7 @@ def test_centralised_available_actions():
|
|
|
214
214
|
mask = np.zeros((N_AGENTS, mock.n_actions), dtype=np.bool_)
|
|
215
215
|
mask[0, 0] = True
|
|
216
216
|
mask[1, 0] = True
|
|
217
|
-
env =
|
|
217
|
+
env = Centralized(AvailableActionsMask(mock, mask))
|
|
218
218
|
expected_joint_mask = np.zeros((1, mock.n_actions**N_AGENTS))
|
|
219
219
|
expected_joint_mask[0, 0] = 1
|
|
220
220
|
obs, _ = env.reset()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/agent_id_wrapper.py
RENAMED
|
File without changes
|
{multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/available_actions_mask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.3.5 → multi_agent_rlenv-3.3.6}/src/marlenv/wrappers/last_action_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|