multi-agent-rlenv 3.3.3__py3-none-any.whl → 3.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
marlenv/__init__.py CHANGED
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
62
62
  If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
63
63
  """
64
64
 
65
- __version__ = "3.3.3"
65
+ __version__ = "3.3.6"
66
66
 
67
67
  from . import models
68
68
  from . import wrappers
@@ -20,7 +20,7 @@ if find_spec("smac") is not None:
20
20
  HAS_SMAC = True
21
21
 
22
22
  HAS_OVERCOOKED = False
23
- if find_spec("overcooked_ai_py.mdp") is not None:
23
+ if find_spec("overcooked_ai_py") is not None and find_spec("overcooked_ai_py.mdp") is not None:
24
24
  import numpy
25
25
 
26
26
  # Overcooked assumes a version of numpy <2.0 where np.Inf is available.
@@ -42,26 +42,26 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
42
42
  case other:
43
43
  raise NotImplementedError(f"Action space {other} not supported")
44
44
  super().__init__(space, env.observation_space.shape, (1,))
45
- self.env = env
46
- if self.env.unwrapped.spec is not None:
47
- self.name = self.env.unwrapped.spec.id
45
+ self._gym_env = env
46
+ if self._gym_env.unwrapped.spec is not None:
47
+ self.name = self._gym_env.unwrapped.spec.id
48
48
  else:
49
49
  self.name = "gym-no-id"
50
- self.last_obs = None
50
+ self._last_obs = None
51
51
 
52
52
  def get_observation(self):
53
- if self.last_obs is None:
53
+ if self._last_obs is None:
54
54
  raise ValueError("No observation available. Call reset() first.")
55
- return self.last_obs
55
+ return self._last_obs
56
56
 
57
57
  def step(self, actions):
58
- obs, reward, done, truncated, info = self.env.step(list(actions)[0])
59
- self.last_obs = Observation(
58
+ obs, reward, done, truncated, info = self._gym_env.step(list(actions)[0])
59
+ self._last_obs = Observation(
60
60
  np.array([obs], dtype=np.float32),
61
61
  self.available_actions(),
62
62
  )
63
63
  return Step(
64
- self.last_obs,
64
+ self._last_obs,
65
65
  self.get_state(),
66
66
  np.array([reward]),
67
67
  done,
@@ -73,18 +73,18 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
73
73
  return State(np.zeros(1, dtype=np.float32))
74
74
 
75
75
  def reset(self):
76
- obs_data, _info = self.env.reset()
77
- self.last_obs = Observation(
76
+ obs_data, _info = self._gym_env.reset()
77
+ self._last_obs = Observation(
78
78
  np.array([obs_data], dtype=np.float32),
79
79
  self.available_actions(),
80
80
  )
81
- return self.last_obs, self.get_state()
81
+ return self._last_obs, self.get_state()
82
82
 
83
83
  def get_image(self):
84
- image = np.array(self.env.render())
84
+ image = np.array(self._gym_env.render())
85
85
  if sys.platform in ("linux", "linux2"):
86
86
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
87
87
  return image
88
88
 
89
89
  def seed(self, seed_value: int):
90
- self.env.reset(seed=seed_value)
90
+ self._gym_env.reset(seed=seed_value)
@@ -22,9 +22,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
22
22
  self._oenv = oenv
23
23
  assert isinstance(oenv.mdp, OvercookedGridworld)
24
24
  self._mdp = oenv.mdp
25
- self.visualizer = StateVisualizer()
26
- shape = tuple(int(s) for s in self._mdp.get_lossless_state_encoding_shape())
27
- shape = (shape[2], shape[0], shape[1])
25
+ self._visualizer = StateVisualizer()
26
+ width, height, layers = tuple(self._mdp.lossless_state_encoding_shape)
27
+ # -1 because we extract the "urgent" layer to the extras
28
+ shape = (int(layers - 1), int(width), int(height))
28
29
  super().__init__(
29
30
  action_space=DiscreteActionSpace(
30
31
  n_agents=self._mdp.num_players,
@@ -32,10 +33,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
32
33
  action_names=[Action.ACTION_TO_CHAR[a] for a in Action.ALL_ACTIONS],
33
34
  ),
34
35
  observation_shape=shape,
35
- extras_shape=(1,),
36
- extras_meanings=["timestep"],
36
+ extras_shape=(2,),
37
+ extras_meanings=["timestep", "urgent"],
37
38
  state_shape=shape,
38
- state_extra_shape=(1,),
39
+ state_extra_shape=(2,),
39
40
  reward_space=ContinuousSpace.from_shape(1),
40
41
  )
41
42
  self.horizon = int(self._oenv.horizon)
@@ -53,19 +54,25 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
53
54
  return self.state.timestep
54
55
 
55
56
  def _state_data(self):
56
- state = np.array(self._mdp.lossless_state_encoding(self.state))
57
+ players_layers = self._mdp.lossless_state_encoding(self.state)
58
+ state = np.array(players_layers, dtype=np.float32)
57
59
  # Use axes (agents, channels, height, width) instead of (agents, height, width, channels)
58
60
  state = np.transpose(state, (0, 3, 1, 2))
59
- return state
61
+ # The last last layer is for "urgency", put it in the extras
62
+ urgency = float(np.all(state[:, -1]))
63
+ state = state[:, :-1]
64
+ return state, urgency
60
65
 
61
66
  def get_state(self):
62
- return State(self._state_data()[0], np.array([self.time_step / self.horizon]))
67
+ data, is_urgent = self._state_data()
68
+ return State(data[0], np.array([self.time_step / self.horizon, is_urgent], dtype=np.float32))
63
69
 
64
70
  def get_observation(self) -> Observation:
71
+ data, is_urgent = self._state_data()
65
72
  return Observation(
66
- data=self._state_data(),
73
+ data=data,
67
74
  available_actions=self.available_actions(),
68
- extras=np.array([[self.time_step / self.horizon]] * self.n_agents),
75
+ extras=np.array([[self.time_step / self.horizon, is_urgent]] * self.n_agents, dtype=np.float32),
69
76
  )
70
77
 
71
78
  def available_actions(self):
@@ -74,7 +81,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
74
81
  for agent_num, agent_actions in enumerate(actions):
75
82
  for action in agent_actions:
76
83
  available_actions[agent_num, Action.ACTION_TO_INDEX[action]] = True
77
- return np.array(available_actions)
84
+ return np.array(available_actions, dtype=np.bool)
78
85
 
79
86
  def step(self, actions: Sequence[int] | npt.NDArray[np.int32 | np.int64]) -> Step:
80
87
  actions = [Action.ALL_ACTIONS[a] for a in actions]
@@ -88,6 +95,10 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
88
95
  info=info,
89
96
  )
90
97
 
98
+ def reset(self):
99
+ self._oenv.reset()
100
+ return self.get_observation(), self.get_state()
101
+
91
102
  def __deepcopy__(self, memo: dict):
92
103
  mdp = deepcopy(self._mdp)
93
104
  return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=self.horizon))
@@ -111,7 +122,7 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
111
122
  ]:
112
123
  rewards_dict[key] = value
113
124
 
114
- image = self.visualizer.render_state(
125
+ image = self._visualizer.render_state(
115
126
  state=self._oenv.state,
116
127
  grid=self._mdp.terrain_mtx,
117
128
  hud_data=StateVisualizer.default_hud_data(self._oenv.state, **rewards_dict),
@@ -32,7 +32,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
32
32
  obs_space = env.observation_space(env.possible_agents[0])
33
33
  if obs_space.shape is None:
34
34
  raise NotImplementedError("Only discrete observation spaces are supported")
35
- self._env = env
35
+ self._pz_env = env
36
36
  env.reset()
37
37
  super().__init__(space, obs_space.shape, self.get_state().shape)
38
38
  self.agents = env.possible_agents
@@ -40,13 +40,13 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
40
40
 
41
41
  def get_state(self):
42
42
  try:
43
- return self._env.state()
43
+ return self._pz_env.state()
44
44
  except NotImplementedError:
45
45
  return np.array([0])
46
46
 
47
47
  def step(self, actions: npt.NDArray | Sequence):
48
48
  action_dict = dict(zip(self.agents, actions))
49
- obs, reward, term, trunc, info = self._env.step(action_dict)
49
+ obs, reward, term, trunc, info = self._pz_env.step(action_dict)
50
50
  obs_data = np.array([v for v in obs.values()])
51
51
  reward = np.sum([r for r in reward.values()], keepdims=True)
52
52
  self.last_observation = Observation(obs_data, self.available_actions())
@@ -54,7 +54,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
54
54
  return Step(self.last_observation, state, reward, any(term.values()), any(trunc.values()), info)
55
55
 
56
56
  def reset(self):
57
- obs = self._env.reset()[0]
57
+ obs = self._pz_env.reset()[0]
58
58
  obs_data = np.array([v for v in obs.values()])
59
59
  self.last_observation = Observation(obs_data, self.available_actions(), self.get_state())
60
60
  return self.last_observation
@@ -65,7 +65,7 @@ class PettingZoo(MARLEnv[npt.NDArray, ActionSpace]):
65
65
  return self.last_observation
66
66
 
67
67
  def seed(self, seed_value: int):
68
- self._env.reset(seed=seed_value)
68
+ self._pz_env.reset(seed=seed_value)
69
69
 
70
70
  def render(self, *_):
71
- return self._env.render()
71
+ return self._pz_env.render()
marlenv/env_builder.py CHANGED
@@ -127,7 +127,7 @@ class Builder(Generic[A, AS]):
127
127
  from marlenv.models import DiscreteActionSpace
128
128
 
129
129
  assert isinstance(self._env.action_space, DiscreteActionSpace)
130
- self._env = wrappers.Centralised(self._env) # type: ignore
130
+ self._env = wrappers.Centralized(self._env) # type: ignore
131
131
  return self
132
132
 
133
133
  def record(
marlenv/models/env.py CHANGED
@@ -127,7 +127,7 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
127
127
 
128
128
  def seed(self, seed_value: int):
129
129
  """Set the environment seed"""
130
- raise NotImplementedError("Method not implemented")
130
+ return
131
131
 
132
132
  @abstractmethod
133
133
  def get_observation(self) -> Observation:
@@ -158,9 +158,9 @@ class MARLEnv(ABC, Generic[ActionType, ActionSpaceType]):
158
158
  """Perform a random step in the environment."""
159
159
  return self.step(self.sample_action())
160
160
 
161
+ @abstractmethod
161
162
  def reset(self) -> tuple[Observation, State]:
162
163
  """Reset the environment and return the initial observation and state."""
163
- return self.get_observation(), self.get_state()
164
164
 
165
165
  def render(self):
166
166
  """Render the environment in a window (or in console)"""
@@ -7,7 +7,7 @@ from .paddings import PadObservations, PadExtras
7
7
  from .penalty_wrapper import TimePenalty
8
8
  from .available_actions_wrapper import AvailableActions
9
9
  from .blind_wrapper import Blind
10
- from .centralised import Centralised
10
+ from .centralised import Centralized
11
11
  from .available_actions_mask import AvailableActionsMask
12
12
  from .delayed_rewards import DelayedReward
13
13
 
@@ -24,6 +24,6 @@ __all__ = [
24
24
  "TimePenalty",
25
25
  "AvailableActions",
26
26
  "Blind",
27
- "Centralised",
27
+ "Centralized",
28
28
  "DelayedReward",
29
29
  ]
@@ -14,7 +14,7 @@ A = TypeVar("A", bound=npt.NDArray | Sequence[int] | Sequence[Sequence[float]])
14
14
 
15
15
 
16
16
  @dataclass
17
- class Centralised(RLEnvWrapper[A, DiscreteActionSpace]):
17
+ class Centralized(RLEnvWrapper[A, DiscreteActionSpace]):
18
18
  joint_action_space: ActionSpace
19
19
 
20
20
  def __init__(self, env: MARLEnv[A, DiscreteActionSpace]):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: multi-agent-rlenv
3
- Version: 3.3.3
3
+ Version: 3.3.6
4
4
  Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
5
  Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
6
  Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
@@ -1,29 +1,29 @@
1
- marlenv/__init__.py,sha256=JSZFpurtqjZjg-3iAjw0xoBH6HxTrv2M4gLC_Vw7foY,3741
2
- marlenv/env_builder.py,sha256=_rdwcWRqnHP7i4M4Oje1Y2nrEBKH9EzTpqOuw_PNUyw,5560
1
+ marlenv/__init__.py,sha256=iEVXbl4mQmey7P2uFdeKEYWEmZ8QxNS_f52jNdw4nZs,3741
2
+ marlenv/env_builder.py,sha256=s_lQANqP3iNc8nmcr3CanRVsExnn9qh0ihh4lFr0c4c,5560
3
3
  marlenv/env_pool.py,sha256=R3WIrnQ5Zvff4HR1ecfkDmuO2zl7v1ywQ0K2_nvWFzs,1070
4
4
  marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
5
5
  marlenv/mock_env.py,sha256=qB0fYFIfbopJf7Va8kCeVI5vsOy1-2JdEYe9gdV1Ruw,4761
6
6
  marlenv/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- marlenv/adapters/__init__.py,sha256=NEmuHPWz4SGQcgF7QuIeA0QaXK141JoYco-7mqj9Ghk,883
8
- marlenv/adapters/gym_adapter.py,sha256=Vx6ZrYI7kiNlJODmqyjXu9WCdbCr6trcMNot0pvYD74,2864
9
- marlenv/adapters/overcooked_adapter.py,sha256=ASAOYHDCExy6AZVBwJ4p7i2-ncfFy5Yo1yUCHb66J1A,6297
10
- marlenv/adapters/pettingzoo_adapter.py,sha256=9rwSc_b7qV3ChtEIevOkJvtIp7WoY3CVnu6L9DxlMB4,2852
7
+ marlenv/adapters/__init__.py,sha256=rWiqQOqTx3kVL5ZkPo3rkczrlQBBhQbU55zGI26SEeY,929
8
+ marlenv/adapters/gym_adapter.py,sha256=6CBEjANViTJBTUBmtVyrhJrzjBJxNs_4hmMnXXG2mkU,2906
9
+ marlenv/adapters/overcooked_adapter.py,sha256=JZhB50cQGWGjaHWuPwskUKr6YthEptpYC3cD7i9GVvk,6832
10
+ marlenv/adapters/pettingzoo_adapter.py,sha256=4F1au6uctsqRhGfcZOeDRH-8hmrFXnA5xH1Z1Pnek3s,2870
11
11
  marlenv/adapters/pymarl_adapter.py,sha256=x__E90XpFbfSWhnBHtkcD6WYkmKki1LByNbUFoDBUcg,3416
12
12
  marlenv/adapters/smac_adapter.py,sha256=fOfKo1hL4ioKtM5qQGcwtfdkdwUEACjAZqaGmkoQUcU,8373
13
13
  marlenv/models/__init__.py,sha256=9M-rnj94nsdyO4zm_VEtyYBmde3iD2_eIY4bMB-IqCo,555
14
- marlenv/models/env.py,sha256=faezAKOIccBauOFeo9wu5sX32pFmP3AMmGyJzaTRJcM,7514
14
+ marlenv/models/env.py,sha256=S0Yd75X4O7desCHrsdEdz045Aw1Zrtu-pkfz5obYQJI,7433
15
15
  marlenv/models/episode.py,sha256=ZGBx6lb2snrUhDgFEwHPV1dp-XvMA7k4quQVUNQxsP0,15140
16
16
  marlenv/models/observation.py,sha256=kAmh1hIoC2TGrZlGVzV0y4TXXCSrI7gcmG0raeoncYk,3153
17
17
  marlenv/models/spaces.py,sha256=pw8Sum_fHBkR-lyfTqUij4azMCNm8oBZrYZe4WVR7rA,7652
18
18
  marlenv/models/state.py,sha256=958PXTHadi3gtRnhGgcGtqBnF44R11kdcx62NN2gwxA,1717
19
19
  marlenv/models/step.py,sha256=LKGAV2Cu-k9Gz1hwrfvGx51l8axtQRqDE9WVL5r2A1Q,3037
20
20
  marlenv/models/transition.py,sha256=2vvuhSSq911weCXio9nuyfsLVh_7ORSU_znOqpLLdLg,5107
21
- marlenv/wrappers/__init__.py,sha256=P7YCK1KYJvE6BAlH--nOW9PSlrohhuw-1wlfgCTOl9U,808
21
+ marlenv/wrappers/__init__.py,sha256=wl23NUYcl0vPJb2QLpe4Xj8ZocUIOarAZX8CgWqdSQE,808
22
22
  marlenv/wrappers/agent_id_wrapper.py,sha256=oTIAYxKD1JtHfrZN43mf-3e8pxjd0nxm07vxs3BfrGY,1187
23
23
  marlenv/wrappers/available_actions_mask.py,sha256=JoCJ9eqHlkY8wfY-oaceEi8yp1Efs1iK6IO2Ibf9oZA,1468
24
24
  marlenv/wrappers/available_actions_wrapper.py,sha256=9UTwP3LXvncBITJeQnEqwiP_lj-ipULACkGs-2QbMrI,1026
25
25
  marlenv/wrappers/blind_wrapper.py,sha256=YEayRf_dclhzx6LXsasZ-IM7C71kyPb1gV0pHYYfjig,857
26
- marlenv/wrappers/centralised.py,sha256=J4hOMRT2fit936LifANNJtP7UbBEb_xIyF4VL9-fZGw,3226
26
+ marlenv/wrappers/centralised.py,sha256=E2Saezwfw9uWX4R9jDgH3iQLW8c_h2Y1TdtmUi9nIvs,3226
27
27
  marlenv/wrappers/delayed_rewards.py,sha256=6oGJe-L_gGI-pQMResbkjsMDvXpni2SQvnTQ6wsZqGo,1170
28
28
  marlenv/wrappers/last_action_wrapper.py,sha256=u7a3Da5sg_gMrwZ3SE7PAwt2m9xSYYDKjngQyOmcJ74,2886
29
29
  marlenv/wrappers/paddings.py,sha256=VQOF4zaP61R74tQ4XTTT-FkK2QSy31AukICnqCy6zB0,2119
@@ -31,7 +31,7 @@ marlenv/wrappers/penalty_wrapper.py,sha256=v4_H8OEN2-yujLzRb6P7W7KwmXHtjAFsxcdp3
31
31
  marlenv/wrappers/rlenv_wrapper.py,sha256=C2XekgBIM4x3Wa2Mtsn7rihRD4ymC2hORI473Af0sfw,2962
32
32
  marlenv/wrappers/time_limit.py,sha256=CDIMMJPMyIDHSFxUJaC7nb7Kd86-07NgZeFhrpZm82o,3985
33
33
  marlenv/wrappers/video_recorder.py,sha256=d5AFu6qHqby9mOcBsYWYPxAPiK1vtnfMYdZ81AnCekI,2624
34
- multi_agent_rlenv-3.3.3.dist-info/METADATA,sha256=XC6OPrNx7othVjaeuNSXsinjCoChSiNmtbUTWGQdX_g,4897
35
- multi_agent_rlenv-3.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
- multi_agent_rlenv-3.3.3.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
37
- multi_agent_rlenv-3.3.3.dist-info/RECORD,,
34
+ multi_agent_rlenv-3.3.6.dist-info/METADATA,sha256=oHsLxFw-wlgzPyswB6r3QIWZWM_injRIFJuJczyZDTo,4897
35
+ multi_agent_rlenv-3.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
+ multi_agent_rlenv-3.3.6.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
37
+ multi_agent_rlenv-3.3.6.dist-info/RECORD,,