multi-agent-rlenv 3.3.3__py3-none-any.whl → 3.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marlenv/__init__.py +1 -1
- marlenv/adapters/overcooked_adapter.py +9 -5
- marlenv/models/env.py +2 -2
- {multi_agent_rlenv-3.3.3.dist-info → multi_agent_rlenv-3.3.5.dist-info}/METADATA +1 -1
- {multi_agent_rlenv-3.3.3.dist-info → multi_agent_rlenv-3.3.5.dist-info}/RECORD +7 -7
- {multi_agent_rlenv-3.3.3.dist-info → multi_agent_rlenv-3.3.5.dist-info}/WHEEL +0 -0
- {multi_agent_rlenv-3.3.3.dist-info → multi_agent_rlenv-3.3.5.dist-info}/licenses/LICENSE +0 -0
marlenv/__init__.py
CHANGED
|
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
|
|
|
62
62
|
If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
__version__ = "3.3.
|
|
65
|
+
__version__ = "3.3.5"
|
|
66
66
|
|
|
67
67
|
from . import models
|
|
68
68
|
from . import wrappers
|
|
@@ -22,7 +22,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
22
22
|
self._oenv = oenv
|
|
23
23
|
assert isinstance(oenv.mdp, OvercookedGridworld)
|
|
24
24
|
self._mdp = oenv.mdp
|
|
25
|
-
self.
|
|
25
|
+
self._visualizer = StateVisualizer()
|
|
26
26
|
shape = tuple(int(s) for s in self._mdp.get_lossless_state_encoding_shape())
|
|
27
27
|
shape = (shape[2], shape[0], shape[1])
|
|
28
28
|
super().__init__(
|
|
@@ -53,19 +53,19 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
53
53
|
return self.state.timestep
|
|
54
54
|
|
|
55
55
|
def _state_data(self):
|
|
56
|
-
state = np.array(self._mdp.lossless_state_encoding(self.state))
|
|
56
|
+
state = np.array(self._mdp.lossless_state_encoding(self.state), dtype=np.float32)
|
|
57
57
|
# Use axes (agents, channels, height, width) instead of (agents, height, width, channels)
|
|
58
58
|
state = np.transpose(state, (0, 3, 1, 2))
|
|
59
59
|
return state
|
|
60
60
|
|
|
61
61
|
def get_state(self):
|
|
62
|
-
return State(self._state_data()[0], np.array([self.time_step / self.horizon]))
|
|
62
|
+
return State(self._state_data()[0], np.array([self.time_step / self.horizon], dtype=np.float32))
|
|
63
63
|
|
|
64
64
|
def get_observation(self) -> Observation:
|
|
65
65
|
return Observation(
|
|
66
66
|
data=self._state_data(),
|
|
67
67
|
available_actions=self.available_actions(),
|
|
68
|
-
extras=np.array([[self.time_step / self.horizon]] * self.n_agents),
|
|
68
|
+
extras=np.array([[self.time_step / self.horizon]] * self.n_agents, dtype=np.float32),
|
|
69
69
|
)
|
|
70
70
|
|
|
71
71
|
def available_actions(self):
|
|
@@ -88,6 +88,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
88
88
|
info=info,
|
|
89
89
|
)
|
|
90
90
|
|
|
91
|
+
def reset(self):
|
|
92
|
+
self._oenv.reset()
|
|
93
|
+
return self.get_observation(), self.get_state()
|
|
94
|
+
|
|
91
95
|
def __deepcopy__(self, memo: dict):
|
|
92
96
|
mdp = deepcopy(self._mdp)
|
|
93
97
|
return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=self.horizon))
|
|
@@ -111,7 +115,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
111
115
|
]:
|
|
112
116
|
rewards_dict[key] = value
|
|
113
117
|
|
|
114
|
-
image = self.
|
|
118
|
+
image = self._visualizer.render_state(
|
|
115
119
|
state=self._oenv.state,
|
|
116
120
|
grid=self._mdp.terrain_mtx,
|
|
117
121
|
hud_data=StateVisualizer.default_hud_data(self._oenv.state, **rewards_dict),
|
marlenv/models/env.py
CHANGED
|
@@ -127,7 +127,7 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
|
|
|
127
127
|
|
|
128
128
|
def seed(self, seed_value: int):
|
|
129
129
|
"""Set the environment seed"""
|
|
130
|
-
|
|
130
|
+
return
|
|
131
131
|
|
|
132
132
|
@abstractmethod
|
|
133
133
|
def get_observation(self) -> Observation:
|
|
@@ -158,9 +158,9 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
|
|
|
158
158
|
"""Perform a random step in the environment."""
|
|
159
159
|
return self.step(self.sample_action())
|
|
160
160
|
|
|
161
|
+
@abstractmethod
|
|
161
162
|
def reset(self) -> tuple[Observation, State]:
|
|
162
163
|
"""Reset the environment and return the initial observation and state."""
|
|
163
|
-
return self.get_observation(), self.get_state()
|
|
164
164
|
|
|
165
165
|
def render(self):
|
|
166
166
|
"""Render the environment in a window (or in console)"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multi-agent-rlenv
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.5
|
|
4
4
|
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
5
|
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
6
|
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
marlenv/__init__.py,sha256=
|
|
1
|
+
marlenv/__init__.py,sha256=XbptMcX24_x5Tk4G6ff6hjI-FkE9MKHLULtCrmvEKl8,3741
|
|
2
2
|
marlenv/env_builder.py,sha256=_rdwcWRqnHP7i4M4Oje1Y2nrEBKH9EzTpqOuw_PNUyw,5560
|
|
3
3
|
marlenv/env_pool.py,sha256=R3WIrnQ5Zvff4HR1ecfkDmuO2zl7v1ywQ0K2_nvWFzs,1070
|
|
4
4
|
marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
|
|
@@ -6,12 +6,12 @@ marlenv/mock_env.py,sha256=qB0fYFIfbopJf7Va8kCeVI5vsOy1-2JdEYe9gdV1Ruw,4761
|
|
|
6
6
|
marlenv/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
marlenv/adapters/__init__.py,sha256=NEmuHPWz4SGQcgF7QuIeA0QaXK141JoYco-7mqj9Ghk,883
|
|
8
8
|
marlenv/adapters/gym_adapter.py,sha256=Vx6ZrYI7kiNlJODmqyjXu9WCdbCr6trcMNot0pvYD74,2864
|
|
9
|
-
marlenv/adapters/overcooked_adapter.py,sha256=
|
|
9
|
+
marlenv/adapters/overcooked_adapter.py,sha256=B6umkRG6POUQNkeHgdIVuYRv8WjIA-3S1EVP6uaKwGY,6458
|
|
10
10
|
marlenv/adapters/pettingzoo_adapter.py,sha256=9rwSc_b7qV3ChtEIevOkJvtIp7WoY3CVnu6L9DxlMB4,2852
|
|
11
11
|
marlenv/adapters/pymarl_adapter.py,sha256=x__E90XpFbfSWhnBHtkcD6WYkmKki1LByNbUFoDBUcg,3416
|
|
12
12
|
marlenv/adapters/smac_adapter.py,sha256=fOfKo1hL4ioKtM5qQGcwtfdkdwUEACjAZqaGmkoQUcU,8373
|
|
13
13
|
marlenv/models/__init__.py,sha256=9M-rnj94nsdyO4zm_VEtyYBmde3iD2_eIY4bMB-IqCo,555
|
|
14
|
-
marlenv/models/env.py,sha256=
|
|
14
|
+
marlenv/models/env.py,sha256=S0Yd75X4O7desCHrsdEdz045Aw1Zrtu-pkfz5obYQJI,7433
|
|
15
15
|
marlenv/models/episode.py,sha256=ZGBx6lb2snrUhDgFEwHPV1dp-XvMA7k4quQVUNQxsP0,15140
|
|
16
16
|
marlenv/models/observation.py,sha256=kAmh1hIoC2TGrZlGVzV0y4TXXCSrI7gcmG0raeoncYk,3153
|
|
17
17
|
marlenv/models/spaces.py,sha256=pw8Sum_fHBkR-lyfTqUij4azMCNm8oBZrYZe4WVR7rA,7652
|
|
@@ -31,7 +31,7 @@ marlenv/wrappers/penalty_wrapper.py,sha256=v4_H8OEN2-yujLzRb6P7W7KwmXHtjAFsxcdp3
|
|
|
31
31
|
marlenv/wrappers/rlenv_wrapper.py,sha256=C2XekgBIM4x3Wa2Mtsn7rihRD4ymC2hORI473Af0sfw,2962
|
|
32
32
|
marlenv/wrappers/time_limit.py,sha256=CDIMMJPMyIDHSFxUJaC7nb7Kd86-07NgZeFhrpZm82o,3985
|
|
33
33
|
marlenv/wrappers/video_recorder.py,sha256=d5AFu6qHqby9mOcBsYWYPxAPiK1vtnfMYdZ81AnCekI,2624
|
|
34
|
-
multi_agent_rlenv-3.3.
|
|
35
|
-
multi_agent_rlenv-3.3.
|
|
36
|
-
multi_agent_rlenv-3.3.
|
|
37
|
-
multi_agent_rlenv-3.3.
|
|
34
|
+
multi_agent_rlenv-3.3.5.dist-info/METADATA,sha256=Yts2uzrt6q7VFqf6QNjwtYsuDlTHWDcQHKxKkFzptFg,4897
|
|
35
|
+
multi_agent_rlenv-3.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
36
|
+
multi_agent_rlenv-3.3.5.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
|
|
37
|
+
multi_agent_rlenv-3.3.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|