multi-agent-rlenv 3.3.5__py3-none-any.whl → 3.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marlenv/__init__.py +1 -1
- marlenv/adapters/__init__.py +1 -1
- marlenv/adapters/gym_adapter.py +14 -14
- marlenv/adapters/overcooked_adapter.py +18 -11
- marlenv/adapters/pettingzoo_adapter.py +6 -6
- marlenv/env_builder.py +1 -1
- marlenv/wrappers/__init__.py +2 -2
- marlenv/wrappers/centralised.py +1 -1
- {multi_agent_rlenv-3.3.5.dist-info → multi_agent_rlenv-3.3.6.dist-info}/METADATA +1 -1
- {multi_agent_rlenv-3.3.5.dist-info → multi_agent_rlenv-3.3.6.dist-info}/RECORD +12 -12
- {multi_agent_rlenv-3.3.5.dist-info → multi_agent_rlenv-3.3.6.dist-info}/WHEEL +0 -0
- {multi_agent_rlenv-3.3.5.dist-info → multi_agent_rlenv-3.3.6.dist-info}/licenses/LICENSE +0 -0
marlenv/__init__.py
CHANGED
|
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
|
|
|
62
62
|
If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
__version__ = "3.3.
|
|
65
|
+
__version__ = "3.3.6"
|
|
66
66
|
|
|
67
67
|
from . import models
|
|
68
68
|
from . import wrappers
|
marlenv/adapters/__init__.py
CHANGED
|
@@ -20,7 +20,7 @@ if find_spec("smac") is not None:
|
|
|
20
20
|
HAS_SMAC = True
|
|
21
21
|
|
|
22
22
|
HAS_OVERCOOKED = False
|
|
23
|
-
if find_spec("overcooked_ai_py.mdp") is not None:
|
|
23
|
+
if find_spec("overcooked_ai_py") is not None and find_spec("overcooked_ai_py.mdp") is not None:
|
|
24
24
|
import numpy
|
|
25
25
|
|
|
26
26
|
# Overcooked assumes a version of numpy <2.0 where np.Inf is available.
|
marlenv/adapters/gym_adapter.py
CHANGED
|
@@ -42,26 +42,26 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
|
|
|
42
42
|
case other:
|
|
43
43
|
raise NotImplementedError(f"Action space {other} not supported")
|
|
44
44
|
super().__init__(space, env.observation_space.shape, (1,))
|
|
45
|
-
self.
|
|
46
|
-
if self.
|
|
47
|
-
self.name = self.
|
|
45
|
+
self._gym_env = env
|
|
46
|
+
if self._gym_env.unwrapped.spec is not None:
|
|
47
|
+
self.name = self._gym_env.unwrapped.spec.id
|
|
48
48
|
else:
|
|
49
49
|
self.name = "gym-no-id"
|
|
50
|
-
self.
|
|
50
|
+
self._last_obs = None
|
|
51
51
|
|
|
52
52
|
def get_observation(self):
|
|
53
|
-
if self.
|
|
53
|
+
if self._last_obs is None:
|
|
54
54
|
raise ValueError("No observation available. Call reset() first.")
|
|
55
|
-
return self.
|
|
55
|
+
return self._last_obs
|
|
56
56
|
|
|
57
57
|
def step(self, actions):
|
|
58
|
-
obs, reward, done, truncated, info = self.
|
|
59
|
-
self.
|
|
58
|
+
obs, reward, done, truncated, info = self._gym_env.step(list(actions)[0])
|
|
59
|
+
self._last_obs = Observation(
|
|
60
60
|
np.array([obs], dtype=np.float32),
|
|
61
61
|
self.available_actions(),
|
|
62
62
|
)
|
|
63
63
|
return Step(
|
|
64
|
-
self.
|
|
64
|
+
self._last_obs,
|
|
65
65
|
self.get_state(),
|
|
66
66
|
np.array([reward]),
|
|
67
67
|
done,
|
|
@@ -73,18 +73,18 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
|
|
|
73
73
|
return State(np.zeros(1, dtype=np.float32))
|
|
74
74
|
|
|
75
75
|
def reset(self):
|
|
76
|
-
obs_data, _info = self.
|
|
77
|
-
self.
|
|
76
|
+
obs_data, _info = self._gym_env.reset()
|
|
77
|
+
self._last_obs = Observation(
|
|
78
78
|
np.array([obs_data], dtype=np.float32),
|
|
79
79
|
self.available_actions(),
|
|
80
80
|
)
|
|
81
|
-
return self.
|
|
81
|
+
return self._last_obs, self.get_state()
|
|
82
82
|
|
|
83
83
|
def get_image(self):
|
|
84
|
-
image = np.array(self.
|
|
84
|
+
image = np.array(self._gym_env.render())
|
|
85
85
|
if sys.platform in ("linux", "linux2"):
|
|
86
86
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
87
87
|
return image
|
|
88
88
|
|
|
89
89
|
def seed(self, seed_value: int):
|
|
90
|
-
self.
|
|
90
|
+
self._gym_env.reset(seed=seed_value)
|
|
@@ -23,8 +23,9 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
23
23
|
assert isinstance(oenv.mdp, OvercookedGridworld)
|
|
24
24
|
self._mdp = oenv.mdp
|
|
25
25
|
self._visualizer = StateVisualizer()
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
width, height, layers = tuple(self._mdp.lossless_state_encoding_shape)
|
|
27
|
+
# -1 because we extract the "urgent" layer to the extras
|
|
28
|
+
shape = (int(layers - 1), int(width), int(height))
|
|
28
29
|
super().__init__(
|
|
29
30
|
action_space=DiscreteActionSpace(
|
|
30
31
|
n_agents=self._mdp.num_players,
|
|
@@ -32,10 +33,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
32
33
|
action_names=[Action.ACTION_TO_CHAR[a] for a in Action.ALL_ACTIONS],
|
|
33
34
|
),
|
|
34
35
|
observation_shape=shape,
|
|
35
|
-
extras_shape=(
|
|
36
|
-
extras_meanings=["timestep"],
|
|
36
|
+
extras_shape=(2,),
|
|
37
|
+
extras_meanings=["timestep", "urgent"],
|
|
37
38
|
state_shape=shape,
|
|
38
|
-
state_extra_shape=(
|
|
39
|
+
state_extra_shape=(2,),
|
|
39
40
|
reward_space=ContinuousSpace.from_shape(1),
|
|
40
41
|
)
|
|
41
42
|
self.horizon = int(self._oenv.horizon)
|
|
@@ -53,19 +54,25 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
53
54
|
return self.state.timestep
|
|
54
55
|
|
|
55
56
|
def _state_data(self):
|
|
56
|
-
|
|
57
|
+
players_layers = self._mdp.lossless_state_encoding(self.state)
|
|
58
|
+
state = np.array(players_layers, dtype=np.float32)
|
|
57
59
|
# Use axes (agents, channels, height, width) instead of (agents, height, width, channels)
|
|
58
60
|
state = np.transpose(state, (0, 3, 1, 2))
|
|
59
|
-
|
|
61
|
+
# The last last layer is for "urgency", put it in the extras
|
|
62
|
+
urgency = float(np.all(state[:, -1]))
|
|
63
|
+
state = state[:, :-1]
|
|
64
|
+
return state, urgency
|
|
60
65
|
|
|
61
66
|
def get_state(self):
|
|
62
|
-
|
|
67
|
+
data, is_urgent = self._state_data()
|
|
68
|
+
return State(data[0], np.array([self.time_step / self.horizon, is_urgent], dtype=np.float32))
|
|
63
69
|
|
|
64
70
|
def get_observation(self) -> Observation:
|
|
71
|
+
data, is_urgent = self._state_data()
|
|
65
72
|
return Observation(
|
|
66
|
-
data=
|
|
73
|
+
data=data,
|
|
67
74
|
available_actions=self.available_actions(),
|
|
68
|
-
extras=np.array([[self.time_step / self.horizon]] * self.n_agents, dtype=np.float32),
|
|
75
|
+
extras=np.array([[self.time_step / self.horizon, is_urgent]] * self.n_agents, dtype=np.float32),
|
|
69
76
|
)
|
|
70
77
|
|
|
71
78
|
def available_actions(self):
|
|
@@ -74,7 +81,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
74
81
|
for agent_num, agent_actions in enumerate(actions):
|
|
75
82
|
for action in agent_actions:
|
|
76
83
|
available_actions[agent_num, Action.ACTION_TO_INDEX[action]] = True
|
|
77
|
-
return np.array(available_actions)
|
|
84
|
+
return np.array(available_actions, dtype=np.bool)
|
|
78
85
|
|
|
79
86
|
def step(self, actions: Sequence[int] | npt.NDArray[np.int32 | np.int64]) -> Step:
|
|
80
87
|
actions = [Action.ALL_ACTIONS[a] for a in actions]
|
|
@@ -32,7 +32,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
32
32
|
obs_space = env.observation_space(env.possible_agents[0])
|
|
33
33
|
if obs_space.shape is None:
|
|
34
34
|
raise NotImplementedError("Only discrete observation spaces are supported")
|
|
35
|
-
self.
|
|
35
|
+
self._pz_env = env
|
|
36
36
|
env.reset()
|
|
37
37
|
super().__init__(space, obs_space.shape, self.get_state().shape)
|
|
38
38
|
self.agents = env.possible_agents
|
|
@@ -40,13 +40,13 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
40
40
|
|
|
41
41
|
def get_state(self):
|
|
42
42
|
try:
|
|
43
|
-
return self.
|
|
43
|
+
return self._pz_env.state()
|
|
44
44
|
except NotImplementedError:
|
|
45
45
|
return np.array([0])
|
|
46
46
|
|
|
47
47
|
def step(self, actions: npt.NDArray | Sequence):
|
|
48
48
|
action_dict = dict(zip(self.agents, actions))
|
|
49
|
-
obs, reward, term, trunc, info = self.
|
|
49
|
+
obs, reward, term, trunc, info = self._pz_env.step(action_dict)
|
|
50
50
|
obs_data = np.array([v for v in obs.values()])
|
|
51
51
|
reward = np.sum([r for r in reward.values()], keepdims=True)
|
|
52
52
|
self.last_observation = Observation(obs_data, self.available_actions())
|
|
@@ -54,7 +54,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
54
54
|
return Step(self.last_observation, state, reward, any(term.values()), any(trunc.values()), info)
|
|
55
55
|
|
|
56
56
|
def reset(self):
|
|
57
|
-
obs = self.
|
|
57
|
+
obs = self._pz_env.reset()[0]
|
|
58
58
|
obs_data = np.array([v for v in obs.values()])
|
|
59
59
|
self.last_observation = Observation(obs_data, self.available_actions(), self.get_state())
|
|
60
60
|
return self.last_observation
|
|
@@ -65,7 +65,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
|
|
|
65
65
|
return self.last_observation
|
|
66
66
|
|
|
67
67
|
def seed(self, seed_value: int):
|
|
68
|
-
self.
|
|
68
|
+
self._pz_env.reset(seed=seed_value)
|
|
69
69
|
|
|
70
70
|
def render(self, *_):
|
|
71
|
-
return self.
|
|
71
|
+
return self._pz_env.render()
|
marlenv/env_builder.py
CHANGED
|
@@ -127,7 +127,7 @@ class Builder(Generic[A, AS]):
|
|
|
127
127
|
from marlenv.models import DiscreteActionSpace
|
|
128
128
|
|
|
129
129
|
assert isinstance(self._env.action_space, DiscreteActionSpace)
|
|
130
|
-
self._env = wrappers.
|
|
130
|
+
self._env = wrappers.Centralized(self._env) # type: ignore
|
|
131
131
|
return self
|
|
132
132
|
|
|
133
133
|
def record(
|
marlenv/wrappers/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ from .paddings import PadObservations, PadExtras
|
|
|
7
7
|
from .penalty_wrapper import TimePenalty
|
|
8
8
|
from .available_actions_wrapper import AvailableActions
|
|
9
9
|
from .blind_wrapper import Blind
|
|
10
|
-
from .centralised import
|
|
10
|
+
from .centralised import Centralized
|
|
11
11
|
from .available_actions_mask import AvailableActionsMask
|
|
12
12
|
from .delayed_rewards import DelayedReward
|
|
13
13
|
|
|
@@ -24,6 +24,6 @@ __all__ = [
|
|
|
24
24
|
"TimePenalty",
|
|
25
25
|
"AvailableActions",
|
|
26
26
|
"Blind",
|
|
27
|
-
"
|
|
27
|
+
"Centralized",
|
|
28
28
|
"DelayedReward",
|
|
29
29
|
]
|
marlenv/wrappers/centralised.py
CHANGED
|
@@ -14,7 +14,7 @@ A = TypeVar("A", bound=npt.NDArray | Sequence[int] | Sequence[Sequence[float]])
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass
|
|
17
|
-
class
|
|
17
|
+
class Centralized(RLEnvWrapper[A, DiscreteActionSpace]):
|
|
18
18
|
joint_action_space: ActionSpace
|
|
19
19
|
|
|
20
20
|
def __init__(self, env: MARLEnv[A, DiscreteActionSpace]):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multi-agent-rlenv
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.6
|
|
4
4
|
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
5
|
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
6
|
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
marlenv/__init__.py,sha256=
|
|
2
|
-
marlenv/env_builder.py,sha256=
|
|
1
|
+
marlenv/__init__.py,sha256=iEVXbl4mQmey7P2uFdeKEYWEmZ8QxNS_f52jNdw4nZs,3741
|
|
2
|
+
marlenv/env_builder.py,sha256=s_lQANqP3iNc8nmcr3CanRVsExnn9qh0ihh4lFr0c4c,5560
|
|
3
3
|
marlenv/env_pool.py,sha256=R3WIrnQ5Zvff4HR1ecfkDmuO2zl7v1ywQ0K2_nvWFzs,1070
|
|
4
4
|
marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
|
|
5
5
|
marlenv/mock_env.py,sha256=qB0fYFIfbopJf7Va8kCeVI5vsOy1-2JdEYe9gdV1Ruw,4761
|
|
6
6
|
marlenv/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
marlenv/adapters/__init__.py,sha256=
|
|
8
|
-
marlenv/adapters/gym_adapter.py,sha256=
|
|
9
|
-
marlenv/adapters/overcooked_adapter.py,sha256=
|
|
10
|
-
marlenv/adapters/pettingzoo_adapter.py,sha256=
|
|
7
|
+
marlenv/adapters/__init__.py,sha256=rWiqQOqTx3kVL5ZkPo3rkczrlQBBhQbU55zGI26SEeY,929
|
|
8
|
+
marlenv/adapters/gym_adapter.py,sha256=6CBEjANViTJBTUBmtVyrhJrzjBJxNs_4hmMnXXG2mkU,2906
|
|
9
|
+
marlenv/adapters/overcooked_adapter.py,sha256=JZhB50cQGWGjaHWuPwskUKr6YthEptpYC3cD7i9GVvk,6832
|
|
10
|
+
marlenv/adapters/pettingzoo_adapter.py,sha256=4F1au6uctsqRhGfcZOeDRH-8hmrFXnA5xH1Z1Pnek3s,2870
|
|
11
11
|
marlenv/adapters/pymarl_adapter.py,sha256=x__E90XpFbfSWhnBHtkcD6WYkmKki1LByNbUFoDBUcg,3416
|
|
12
12
|
marlenv/adapters/smac_adapter.py,sha256=fOfKo1hL4ioKtM5qQGcwtfdkdwUEACjAZqaGmkoQUcU,8373
|
|
13
13
|
marlenv/models/__init__.py,sha256=9M-rnj94nsdyO4zm_VEtyYBmde3iD2_eIY4bMB-IqCo,555
|
|
@@ -18,12 +18,12 @@ marlenv/models/spaces.py,sha256=pw8Sum_fHBkR-lyfTqUij4azMCNm8oBZrYZe4WVR7rA,7652
|
|
|
18
18
|
marlenv/models/state.py,sha256=958PXTHadi3gtRnhGgcGtqBnF44R11kdcx62NN2gwxA,1717
|
|
19
19
|
marlenv/models/step.py,sha256=LKGAV2Cu-k9Gz1hwrfvGx51l8axtQRqDE9WVL5r2A1Q,3037
|
|
20
20
|
marlenv/models/transition.py,sha256=2vvuhSSq911weCXio9nuyfsLVh_7ORSU_znOqpLLdLg,5107
|
|
21
|
-
marlenv/wrappers/__init__.py,sha256=
|
|
21
|
+
marlenv/wrappers/__init__.py,sha256=wl23NUYcl0vPJb2QLpe4Xj8ZocUIOarAZX8CgWqdSQE,808
|
|
22
22
|
marlenv/wrappers/agent_id_wrapper.py,sha256=oTIAYxKD1JtHfrZN43mf-3e8pxjd0nxm07vxs3BfrGY,1187
|
|
23
23
|
marlenv/wrappers/available_actions_mask.py,sha256=JoCJ9eqHlkY8wfY-oaceEi8yp1Efs1iK6IO2Ibf9oZA,1468
|
|
24
24
|
marlenv/wrappers/available_actions_wrapper.py,sha256=9UTwP3LXvncBITJeQnEqwiP_lj-ipULACkGs-2QbMrI,1026
|
|
25
25
|
marlenv/wrappers/blind_wrapper.py,sha256=YEayRf_dclhzx6LXsasZ-IM7C71kyPb1gV0pHYYfjig,857
|
|
26
|
-
marlenv/wrappers/centralised.py,sha256=
|
|
26
|
+
marlenv/wrappers/centralised.py,sha256=E2Saezwfw9uWX4R9jDgH3iQLW8c_h2Y1TdtmUi9nIvs,3226
|
|
27
27
|
marlenv/wrappers/delayed_rewards.py,sha256=6oGJe-L_gGI-pQMResbkjsMDvXpni2SQvnTQ6wsZqGo,1170
|
|
28
28
|
marlenv/wrappers/last_action_wrapper.py,sha256=u7a3Da5sg_gMrwZ3SE7PAwt2m9xSYYDKjngQyOmcJ74,2886
|
|
29
29
|
marlenv/wrappers/paddings.py,sha256=VQOF4zaP61R74tQ4XTTT-FkK2QSy31AukICnqCy6zB0,2119
|
|
@@ -31,7 +31,7 @@ marlenv/wrappers/penalty_wrapper.py,sha256=v4_H8OEN2-yujLzRb6P7W7KwmXHtjAFsxcdp3
|
|
|
31
31
|
marlenv/wrappers/rlenv_wrapper.py,sha256=C2XekgBIM4x3Wa2Mtsn7rihRD4ymC2hORI473Af0sfw,2962
|
|
32
32
|
marlenv/wrappers/time_limit.py,sha256=CDIMMJPMyIDHSFxUJaC7nb7Kd86-07NgZeFhrpZm82o,3985
|
|
33
33
|
marlenv/wrappers/video_recorder.py,sha256=d5AFu6qHqby9mOcBsYWYPxAPiK1vtnfMYdZ81AnCekI,2624
|
|
34
|
-
multi_agent_rlenv-3.3.
|
|
35
|
-
multi_agent_rlenv-3.3.
|
|
36
|
-
multi_agent_rlenv-3.3.
|
|
37
|
-
multi_agent_rlenv-3.3.
|
|
34
|
+
multi_agent_rlenv-3.3.6.dist-info/METADATA,sha256=oHsLxFw-wlgzPyswB6r3QIWZWM_injRIFJuJczyZDTo,4897
|
|
35
|
+
multi_agent_rlenv-3.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
36
|
+
multi_agent_rlenv-3.3.6.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
|
|
37
|
+
multi_agent_rlenv-3.3.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|