multi-agent-rlenv 3.3.2__py3-none-any.whl → 3.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
marlenv/__init__.py CHANGED
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
62
62
  If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
63
63
  """
64
64
 
65
- __version__ = "3.3.2"
65
+ __version__ = "3.3.5"
66
66
 
67
67
  from . import models
68
68
  from . import wrappers
@@ -22,7 +22,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
22
22
  self._oenv = oenv
23
23
  assert isinstance(oenv.mdp, OvercookedGridworld)
24
24
  self._mdp = oenv.mdp
25
- self.visualizer = StateVisualizer()
25
+ self._visualizer = StateVisualizer()
26
26
  shape = tuple(int(s) for s in self._mdp.get_lossless_state_encoding_shape())
27
27
  shape = (shape[2], shape[0], shape[1])
28
28
  super().__init__(
@@ -53,19 +53,19 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
53
53
  return self.state.timestep
54
54
 
55
55
  def _state_data(self):
56
- state = np.array(self._mdp.lossless_state_encoding(self.state))
56
+ state = np.array(self._mdp.lossless_state_encoding(self.state), dtype=np.float32)
57
57
  # Use axes (agents, channels, height, width) instead of (agents, height, width, channels)
58
58
  state = np.transpose(state, (0, 3, 1, 2))
59
59
  return state
60
60
 
61
61
  def get_state(self):
62
- return State(self._state_data()[0], np.array([self.time_step / self.horizon]))
62
+ return State(self._state_data()[0], np.array([self.time_step / self.horizon], dtype=np.float32))
63
63
 
64
64
  def get_observation(self) -> Observation:
65
65
  return Observation(
66
66
  data=self._state_data(),
67
67
  available_actions=self.available_actions(),
68
- extras=np.array([[self.time_step / self.horizon]] * self.n_agents),
68
+ extras=np.array([[self.time_step / self.horizon]] * self.n_agents, dtype=np.float32),
69
69
  )
70
70
 
71
71
  def available_actions(self):
@@ -88,6 +88,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
88
88
  info=info,
89
89
  )
90
90
 
91
+ def reset(self):
92
+ self._oenv.reset()
93
+ return self.get_observation(), self.get_state()
94
+
91
95
  def __deepcopy__(self, memo: dict):
92
96
  mdp = deepcopy(self._mdp)
93
97
  return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=self.horizon))
@@ -96,6 +100,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
96
100
  return {"horizon": self.horizon, "mdp": self._mdp}
97
101
 
98
102
  def __setstate__(self, state: dict):
103
+ from overcooked_ai_py.mdp.overcooked_mdp import Recipe
104
+
105
+ mdp = state["mdp"]
106
+ Recipe.configure(mdp.recipe_config)
99
107
  self.__init__(OvercookedEnv.from_mdp(state["mdp"], horizon=state["horizon"]))
100
108
 
101
109
  def get_image(self):
@@ -107,7 +115,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
107
115
  ]:
108
116
  rewards_dict[key] = value
109
117
 
110
- image = self.visualizer.render_state(
118
+ image = self._visualizer.render_state(
111
119
  state=self._oenv.state,
112
120
  grid=self._mdp.terrain_mtx,
113
121
  hud_data=StateVisualizer.default_hud_data(self._oenv.state, **rewards_dict),
marlenv/models/env.py CHANGED
@@ -127,7 +127,7 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
127
127
 
128
128
  def seed(self, seed_value: int):
129
129
  """Set the environment seed"""
130
- raise NotImplementedError("Method not implemented")
130
+ return
131
131
 
132
132
  @abstractmethod
133
133
  def get_observation(self) -> Observation:
@@ -158,9 +158,9 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
158
158
  """Perform a random step in the environment."""
159
159
  return self.step(self.sample_action())
160
160
 
161
+ @abstractmethod
161
162
  def reset(self) -> tuple[Observation, State]:
162
163
  """Reset the environment and return the initial observation and state."""
163
- return self.get_observation(), self.get_state()
164
164
 
165
165
  def render(self):
166
166
  """Render the environment in a window (or in console)"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: multi-agent-rlenv
3
- Version: 3.3.2
3
+ Version: 3.3.5
4
4
  Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
5
  Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
6
  Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
@@ -1,4 +1,4 @@
1
- marlenv/__init__.py,sha256=GFEcoE8jkA0vbuQyrVRpgsS-iLIelxjpwsB_6pGJGjs,3741
1
+ marlenv/__init__.py,sha256=XbptMcX24_x5Tk4G6ff6hjI-FkE9MKHLULtCrmvEKl8,3741
2
2
  marlenv/env_builder.py,sha256=_rdwcWRqnHP7i4M4Oje1Y2nrEBKH9EzTpqOuw_PNUyw,5560
3
3
  marlenv/env_pool.py,sha256=R3WIrnQ5Zvff4HR1ecfkDmuO2zl7v1ywQ0K2_nvWFzs,1070
4
4
  marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
@@ -6,12 +6,12 @@ marlenv/mock_env.py,sha256=qB0fYFIfbopJf7Va8kCeVI5vsOy1-2JdEYe9gdV1Ruw,4761
6
6
  marlenv/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  marlenv/adapters/__init__.py,sha256=NEmuHPWz4SGQcgF7QuIeA0QaXK141JoYco-7mqj9Ghk,883
8
8
  marlenv/adapters/gym_adapter.py,sha256=Vx6ZrYI7kiNlJODmqyjXu9WCdbCr6trcMNot0pvYD74,2864
9
- marlenv/adapters/overcooked_adapter.py,sha256=Yf1xmjSgDOOSzR3QNqjxQ_ROFEdKUe_C8BF2nLlW3Us,6162
9
+ marlenv/adapters/overcooked_adapter.py,sha256=B6umkRG6POUQNkeHgdIVuYRv8WjIA-3S1EVP6uaKwGY,6458
10
10
  marlenv/adapters/pettingzoo_adapter.py,sha256=9rwSc_b7qV3ChtEIevOkJvtIp7WoY3CVnu6L9DxlMB4,2852
11
11
  marlenv/adapters/pymarl_adapter.py,sha256=x__E90XpFbfSWhnBHtkcD6WYkmKki1LByNbUFoDBUcg,3416
12
12
  marlenv/adapters/smac_adapter.py,sha256=fOfKo1hL4ioKtM5qQGcwtfdkdwUEACjAZqaGmkoQUcU,8373
13
13
  marlenv/models/__init__.py,sha256=9M-rnj94nsdyO4zm_VEtyYBmde3iD2_eIY4bMB-IqCo,555
14
- marlenv/models/env.py,sha256=faezAKOIccBauOFeo9wu5sX32pFmP3AMmGyJzaTRJcM,7514
14
+ marlenv/models/env.py,sha256=S0Yd75X4O7desCHrsdEdz045Aw1Zrtu-pkfz5obYQJI,7433
15
15
  marlenv/models/episode.py,sha256=ZGBx6lb2snrUhDgFEwHPV1dp-XvMA7k4quQVUNQxsP0,15140
16
16
  marlenv/models/observation.py,sha256=kAmh1hIoC2TGrZlGVzV0y4TXXCSrI7gcmG0raeoncYk,3153
17
17
  marlenv/models/spaces.py,sha256=pw8Sum_fHBkR-lyfTqUij4azMCNm8oBZrYZe4WVR7rA,7652
@@ -31,7 +31,7 @@ marlenv/wrappers/penalty_wrapper.py,sha256=v4_H8OEN2-yujLzRb6P7W7KwmXHtjAFsxcdp3
31
31
  marlenv/wrappers/rlenv_wrapper.py,sha256=C2XekgBIM4x3Wa2Mtsn7rihRD4ymC2hORI473Af0sfw,2962
32
32
  marlenv/wrappers/time_limit.py,sha256=CDIMMJPMyIDHSFxUJaC7nb7Kd86-07NgZeFhrpZm82o,3985
33
33
  marlenv/wrappers/video_recorder.py,sha256=d5AFu6qHqby9mOcBsYWYPxAPiK1vtnfMYdZ81AnCekI,2624
34
- multi_agent_rlenv-3.3.2.dist-info/METADATA,sha256=s9O2h4QdJZ4Ytq1hRiBFJPsAtWV0bd1JLxDx4MChaI0,4897
35
- multi_agent_rlenv-3.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
- multi_agent_rlenv-3.3.2.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
37
- multi_agent_rlenv-3.3.2.dist-info/RECORD,,
34
+ multi_agent_rlenv-3.3.5.dist-info/METADATA,sha256=Yts2uzrt6q7VFqf6QNjwtYsuDlTHWDcQHKxKkFzptFg,4897
35
+ multi_agent_rlenv-3.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
+ multi_agent_rlenv-3.3.5.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
37
+ multi_agent_rlenv-3.3.5.dist-info/RECORD,,