multi-agent-rlenv 3.6.3__py3-none-any.whl → 3.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
marlenv/__init__.py CHANGED
@@ -65,9 +65,9 @@ If you want to create a new environment, you can simply create a class that inhe
65
65
  from importlib.metadata import version, PackageNotFoundError
66
66
 
67
67
  try:
68
- __version__ = version("overcooked")
68
+ __version__ = version("multi-agent-rlenv")
69
69
  except PackageNotFoundError:
70
- __version__ = "0.0.0" # fallback pratique en dev/CI
70
+ __version__ = "0.0.0" # fallback for CI
71
71
 
72
72
 
73
73
  from . import models
@@ -44,8 +44,8 @@ class Gym(MARLEnv[Space]):
44
44
  raise ValueError("No observation available. Call reset() first.")
45
45
  return self._last_obs
46
46
 
47
- def step(self, actions):
48
- obs, reward, done, truncated, info = self._gym_env.step(list(actions)[0])
47
+ def step(self, action):
48
+ obs, reward, done, truncated, info = self._gym_env.step(list(action)[0])
49
49
  self._last_obs = Observation(
50
50
  np.array([obs], dtype=np.float32),
51
51
  self.available_actions(),
@@ -74,7 +74,7 @@ class Gym(MARLEnv[Space]):
74
74
  image = np.array(self._gym_env.render())
75
75
  if sys.platform in ("linux", "linux2"):
76
76
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
77
- return image
77
+ return np.array(image, dtype=np.uint8)
78
78
 
79
79
  def seed(self, seed_value: int):
80
80
  self._gym_env.reset(seed=seed_value)
@@ -33,39 +33,39 @@ class PettingZoo(MARLEnv[Space]):
33
33
  if obs_space.shape is None:
34
34
  raise NotImplementedError("Only discrete observation spaces are supported")
35
35
  self._pz_env = env
36
- env.reset()
37
- super().__init__(n_agents, space, obs_space.shape, self.get_state().shape)
36
+ self.n_agents = n_agents
37
+ self.n_actions = space.shape[-1]
38
+ self.last_observation, state = self.reset()
39
+ super().__init__(n_agents, space, obs_space.shape, state.shape)
38
40
  self.agents = env.possible_agents
39
- self.last_observation = None
40
41
 
41
42
  def get_state(self):
42
43
  try:
43
- return self._pz_env.state()
44
+ return State(self._pz_env.state())
44
45
  except NotImplementedError:
45
- return np.array([0])
46
+ assert self.last_observation is not None, "Cannot get the state unless there is a previous observation"
47
+ return State(self.last_observation.data)
46
48
 
47
- def step(self, actions: npt.NDArray | Sequence):
48
- action_dict = dict(zip(self.agents, actions))
49
+ def step(self, action: npt.NDArray | Sequence):
50
+ action_dict = dict(zip(self.agents, action))
49
51
  obs, reward, term, trunc, info = self._pz_env.step(action_dict)
50
52
  obs_data = np.array([v for v in obs.values()])
51
53
  reward = np.sum([r for r in reward.values()], keepdims=True)
52
54
  self.last_observation = Observation(obs_data, self.available_actions())
53
- state = State(self.get_state())
55
+ state = self.get_state()
54
56
  return Step(self.last_observation, state, reward, any(term.values()), any(trunc.values()), info)
55
57
 
56
58
  def reset(self):
57
59
  obs = self._pz_env.reset()[0]
58
60
  obs_data = np.array([v for v in obs.values()])
59
- self.last_observation = Observation(obs_data, self.available_actions(), self.get_state())
60
- return self.last_observation
61
+ self.last_observation = Observation(obs_data, self.available_actions())
62
+ return self.last_observation, self.get_state()
61
63
 
62
64
  def get_observation(self):
63
- if self.last_observation is None:
64
- raise ValueError("No observation available. Call reset() first.")
65
65
  return self.last_observation
66
66
 
67
67
  def seed(self, seed_value: int):
68
68
  self._pz_env.reset(seed=seed_value)
69
69
 
70
- def render(self, *_):
71
- return self._pz_env.render()
70
+ def render(self):
71
+ self._pz_env.render()
@@ -3,7 +3,7 @@ from typing import overload
3
3
 
4
4
  import numpy as np
5
5
  import numpy.typing as npt
6
- from smac.env import StarCraft2Env
6
+ from smac.env import StarCraft2Env # pyright: ignore[reportMissingImports]
7
7
 
8
8
  from marlenv.models import MARLEnv, Observation, State, Step, MultiDiscreteSpace, DiscreteSpace
9
9
 
@@ -169,17 +169,18 @@ class SMAC(MARLEnv[MultiDiscreteSpace]):
169
169
 
170
170
  def reset(self):
171
171
  obs, state = self._env.reset()
172
- obs = Observation(np.array(obs), self.available_actions(), state)
173
- return obs
172
+ obs = Observation(np.array(obs), self.available_actions())
173
+ state = State(state)
174
+ return obs, state
174
175
 
175
176
  def get_observation(self):
176
- return self._env.get_obs()
177
+ return Observation(np.array(self._env.get_obs()), self.available_actions())
177
178
 
178
179
  def get_state(self):
179
180
  return State(self._env.get_state())
180
181
 
181
- def step(self, actions):
182
- reward, done, info = self._env.step(actions)
182
+ def step(self, action):
183
+ reward, done, info = self._env.step(action)
183
184
  obs = Observation(
184
185
  self._env.get_obs(), # type: ignore
185
186
  self.available_actions(),
@@ -199,7 +200,9 @@ class SMAC(MARLEnv[MultiDiscreteSpace]):
199
200
  return np.array(self._env.get_avail_actions()) == 1
200
201
 
201
202
  def get_image(self):
202
- return self._env.render(mode="rgb_array")
203
+ img = self._env.render(mode="rgb_array")
204
+ assert img is not None
205
+ return img
203
206
 
204
207
  def seed(self, seed_value: int):
205
208
  self._env = StarCraft2Env(map_name=self._env.map_name, seed=seed_value)
@@ -1,13 +1,10 @@
1
1
  from marlenv.adapters import SMAC
2
2
  from .deepsea import DeepSea
3
+ from .matrix_game import MatrixGame
4
+ from .coordinated_grid import CoordinatedGrid
3
5
 
4
6
 
5
- __all__ = [
6
- "SMAC",
7
- "DeepSea",
8
- "lle",
9
- "overcooked",
10
- ]
7
+ __all__ = ["SMAC", "DeepSea", "lle", "overcooked", "MatrixGame", "connect_n", "CoordinatedGrid"]
11
8
 
12
9
 
13
10
  def lle():
@@ -20,3 +17,9 @@ def overcooked():
20
17
  from overcooked import Overcooked # pyright: ignore[reportMissingImports]
21
18
 
22
19
  return Overcooked
20
+
21
+
22
+ def connect_n():
23
+ from .connectn import ConnectN
24
+
25
+ return ConnectN
@@ -0,0 +1,11 @@
1
+ """
2
+ Connect-N game environment.
3
+
4
+ Inspiration from: https://github.com/Gualor/connect4-montecarlo
5
+ """
6
+
7
+ from .board import GameBoard
8
+ from .env import ConnectN
9
+
10
+
11
+ __all__ = ["ConnectN", "GameBoard"]
@@ -0,0 +1,186 @@
1
+ from enum import IntEnum
2
+
3
+ import numpy as np
4
+
5
+
6
+ class StepResult(IntEnum):
7
+ NOTHING = 0
8
+ TIE = 1
9
+ WIN = 2
10
+
11
+
12
+ class GameBoard:
13
+ """Connect4 game board class."""
14
+
15
+ def __init__(self, width: int, height: int, n: int):
16
+ assert width >= n or height >= height, "Impossible to win with this combination of width, height and n"
17
+ self.turn = 1
18
+ self.board = np.zeros(shape=(height, width), dtype=np.float32)
19
+ self.width = width
20
+ self.height = height
21
+ self.n_to_align = n
22
+ self.n_items_in_column = np.zeros(width, dtype=np.int32)
23
+
24
+ self.str_row = "+" + "-" * (self.width * 4 - 1) + "+"
25
+ self.numbers = "|" + " ".join([f" {i} " for i in range(self.width)]) + "|"
26
+
27
+ def valid_moves(self):
28
+ """Get list of valid moves (i.e. not full columns)."""
29
+ return self.n_items_in_column < self.height
30
+
31
+ def clear(self):
32
+ self.board = np.zeros(shape=(self.height, self.width), dtype=np.float32)
33
+ self.n_items_in_column = np.zeros(self.width, dtype=np.int32)
34
+ self.turn = 0
35
+
36
+ def show(self):
37
+ """Print out game board on console."""
38
+ print(self.str_row)
39
+ for j in range(self.height - 1, -1, -1):
40
+ for i in range(self.width):
41
+ match self.board[j, i]:
42
+ case 1:
43
+ print("| X", end=" ")
44
+ case -1:
45
+ print("| O", end=" ")
46
+ case _:
47
+ print("| ", end=" ")
48
+ print("|")
49
+ print(self.str_row)
50
+ print(self.numbers)
51
+ print(self.str_row)
52
+
53
+ def check_win(self, move_played: tuple[int, int]) -> bool:
54
+ if self.check_rows(move_played):
55
+ return True
56
+ if self.check_cols(move_played):
57
+ return True
58
+ if self.check_diags(move_played):
59
+ return True
60
+ return False
61
+
62
+ def check_tie(self) -> bool:
63
+ """
64
+ Check whether the game is a tie (i.e. the board is full).
65
+
66
+ Note that it does not check for a win, so it should be called after check_win.
67
+ """
68
+ # If the last row is full, the game is a tie
69
+ return bool(np.all(self.board[-1] != 0))
70
+
71
+ def check_rows(self, move_played: tuple[int, int]) -> bool:
72
+ row, col = move_played
73
+ start_index = max(0, col - self.n_to_align + 1)
74
+ end_index = min(self.width - self.n_to_align, col) + 1
75
+ for start in range(start_index, end_index):
76
+ slice = self.board[row, start : start + self.n_to_align]
77
+ if np.all(slice == self.turn):
78
+ return True
79
+ return False
80
+
81
+ def check_cols(self, move_played: tuple[int, int]) -> bool:
82
+ row, col = move_played
83
+ start_index = max(0, row - self.n_to_align + 1)
84
+ end_index = min(self.height - self.n_to_align, row) + 1
85
+ for start in range(start_index, end_index):
86
+ slice = self.board[start : start + self.n_to_align, col]
87
+ if np.all(slice == self.turn):
88
+ return True
89
+ return False
90
+
91
+ def check_diags(self, move_played: tuple[int, int]) -> bool:
92
+ row, col = move_played
93
+ # count the adjacent items in the / diagonal
94
+ n_adjacent = 0
95
+ # Top right
96
+ row_i, col_i = row + 1, col + 1
97
+ while row_i < self.height and col_i < self.width and self.board[row_i, col_i] == self.turn:
98
+ n_adjacent += 1
99
+ row_i += 1
100
+ col_i += 1
101
+ # Bottom left
102
+ row_i, col_i = row - 1, col - 1
103
+ while row_i >= 0 and col_i >= 0 and self.board[row_i, col_i] == self.turn:
104
+ n_adjacent += 1
105
+ row_i -= 1
106
+ col_i -= 1
107
+ if n_adjacent >= self.n_to_align - 1:
108
+ return True
109
+
110
+ # Count adjacent items in the \ diagonal
111
+ n_adjacent = 0
112
+ # Top left
113
+ row_i, col_i = row + 1, col - 1
114
+ while row_i < self.height and col_i >= 0 and self.board[row_i, col_i] == self.turn:
115
+ n_adjacent += 1
116
+ row_i += 1
117
+ col_i -= 1
118
+ # Bottom right
119
+ row_i, col_i = row - 1, col + 1
120
+ while row_i >= 0 and col_i < self.width and self.board[row_i, col_i] == self.turn:
121
+ n_adjacent += 1
122
+ row_i -= 1
123
+ col_i += 1
124
+
125
+ return n_adjacent >= self.n_to_align - 1
126
+
127
+ def play(self, column: int) -> StepResult:
128
+ """Apply move to board.
129
+
130
+ Args:
131
+ column (int): Selected column index (between 0 and the number of cols - 1).
132
+
133
+ Returns:
134
+ bool: whether the player has won.
135
+ """
136
+ row_index = self.n_items_in_column[column]
137
+ if row_index >= self.height:
138
+ raise ValueError(f"Column {column} is full, use `valid_moves` to check valid moves.")
139
+ self.n_items_in_column[column] += 1
140
+ self.board[row_index, column] = self.turn
141
+ if self.check_win((row_index, column)):
142
+ result = StepResult.WIN
143
+ elif self.check_tie():
144
+ result = StepResult.TIE
145
+ else:
146
+ result = StepResult.NOTHING
147
+ self.switch_turn()
148
+ return result
149
+
150
+ def switch_turn(self) -> None:
151
+ """Switch turn between players."""
152
+ self.turn = -self.turn
153
+
154
+
155
+ def test_win():
156
+ board = GameBoard(4, 1, 2)
157
+ assert board.play(0) == StepResult.NOTHING
158
+ assert board.play(2) == StepResult.NOTHING
159
+ assert board.play(1) == StepResult.WIN
160
+
161
+
162
+ def test_tie():
163
+ board = GameBoard(4, 1, 2)
164
+ assert board.play(0) == StepResult.NOTHING
165
+ assert board.play(1) == StepResult.NOTHING
166
+ assert board.play(2) == StepResult.NOTHING
167
+ assert board.play(3) == StepResult.TIE
168
+
169
+
170
+ def test_win_diag():
171
+ board = GameBoard(2, 2, 2)
172
+ assert board.play(0) == StepResult.NOTHING
173
+ assert board.play(1) == StepResult.NOTHING
174
+ assert board.play(1) == StepResult.WIN
175
+
176
+ board.clear()
177
+ assert board.play(1) == StepResult.NOTHING
178
+ assert board.play(1) == StepResult.NOTHING
179
+ assert board.play(0) == StepResult.WIN
180
+
181
+
182
+ if __name__ == "__main__":
183
+ test_win()
184
+ test_tie()
185
+ test_win_diag()
186
+ print("All tests passed!")
@@ -0,0 +1,51 @@
1
+ from typing import Sequence
2
+ import numpy as np
3
+ import numpy.typing as npt
4
+ from marlenv import MARLEnv, MultiDiscreteSpace, Step, State, Observation, DiscreteSpace
5
+
6
+ from .board import GameBoard, StepResult
7
+
8
+
9
+ class ConnectN(MARLEnv[MultiDiscreteSpace]):
10
+ def __init__(self, width: int = 7, height: int = 6, n: int = 4):
11
+ self.board = GameBoard(width, height, n)
12
+ action_space = DiscreteSpace(self.board.width).repeat(1)
13
+ observation_shape = (self.board.height, self.board.width)
14
+ state_shape = observation_shape
15
+ super().__init__(1, action_space, observation_shape, state_shape)
16
+
17
+ def reset(self):
18
+ self.board.clear()
19
+ return self.get_observation(), self.get_state()
20
+
21
+ def step(self, action: Sequence[int] | npt.NDArray[np.uint32]):
22
+ match self.board.play(action[0]):
23
+ case StepResult.NOTHING:
24
+ done = False
25
+ reward = 0
26
+ case StepResult.WIN:
27
+ done = True
28
+ reward = 1
29
+ case StepResult.TIE:
30
+ done = True
31
+ reward = 0
32
+ return Step(self.get_observation(), self.get_state(), reward, done, False)
33
+
34
+ def available_actions(self):
35
+ """Full columns are not available."""
36
+ return np.expand_dims(self.board.valid_moves(), axis=0)
37
+
38
+ def get_observation(self):
39
+ return Observation(self.board.board.copy(), self.available_actions())
40
+
41
+ def get_state(self):
42
+ return State(self.board.board.copy(), np.array([self.board.turn]))
43
+
44
+ def set_state(self, state: State):
45
+ self.board.board = state.data.copy() # type: ignore Currently a type error because of the unchecked shape
46
+ self.board.turn = int(state.extras[0])
47
+ n_completed = np.count_nonzero(self.board.board, axis=0)
48
+ self.board.n_items_in_column = n_completed
49
+
50
+ def render(self):
51
+ self.board.show()
@@ -0,0 +1,139 @@
1
+ import numpy as np
2
+ import itertools
3
+ from marlenv import MARLEnv, DiscreteSpace, Observation, State, Step
4
+
5
+
6
+ N_ROWS = 11
7
+ N_COLS = 12
8
+
9
+
10
+ class CoordinatedGrid(MARLEnv):
11
+ """
12
+ Coordinated grid world environment used in the EMC paper to test the effectiveness of the proposed method.
13
+ https://proceedings.neurips.cc/paper_files/paper/2021/file/1e8ca836c962598551882e689265c1c5-Paper.pdf
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ episode_limit=30,
19
+ time_penalty=2,
20
+ ):
21
+ super().__init__(
22
+ n_agents=2,
23
+ action_space=DiscreteSpace(5, ["SOUTH", "NORTH", "WEST", "EAST", "STAY"]).repeat(2),
24
+ observation_shape=(N_ROWS + N_COLS,),
25
+ state_shape=(N_ROWS + N_COLS,) * 2,
26
+ )
27
+ self._episode_steps = 0
28
+ self.episode_limit = episode_limit
29
+ self.center = N_COLS // 2
30
+ ###larger gridworld
31
+ visible_row = [i for i in range(N_ROWS // 2 - 2, N_ROWS // 2 + 3)]
32
+ visible_col = [i for i in range(N_COLS // 2 - 3, N_COLS // 2 + 3)]
33
+ self.vision_index = [[i, j] for i, j in list(itertools.product(visible_row, visible_col))]
34
+ self.agents_location = [[0, 0], [N_ROWS - 1, N_COLS - 1]]
35
+ self.time_penalty = time_penalty
36
+
37
+ def reset(self):
38
+ self.agents_location = [[0, 0], [N_ROWS - 1, N_COLS - 1]]
39
+ self._episode_steps = 0
40
+ return self.get_observation(), self.get_state()
41
+
42
+ def get_observation(self):
43
+ obs_1 = [[0 for _ in range(N_ROWS)], [0 for _ in range(N_COLS)]]
44
+ # obs_2 = obs_1.copy()
45
+ import copy
46
+
47
+ obs_2 = copy.deepcopy(obs_1)
48
+
49
+ obs_1[0][self.agents_location[0][0]] = 1
50
+ obs_1[1][self.agents_location[0][1]] = 1
51
+ obs_1 = obs_1[0] + obs_1[1]
52
+
53
+ obs_2[0][self.agents_location[1][0]] = 1
54
+ obs_2[1][self.agents_location[1][1]] = 1
55
+ obs_2 = obs_2[0] + obs_2[1]
56
+
57
+ if self.agents_location[0] in self.vision_index and self.agents_location[1] in self.vision_index:
58
+ temp = obs_1.copy()
59
+ obs_1 += obs_2.copy()
60
+ obs_2 += temp.copy()
61
+ elif self.agents_location[0] in self.vision_index:
62
+ obs_2 += obs_1.copy()
63
+ obs_1 += [0 for _ in range(N_ROWS + N_COLS)]
64
+ elif self.agents_location[1] in self.vision_index:
65
+ obs_1 += obs_2.copy()
66
+ obs_2 += [0 for _ in range(N_ROWS + N_COLS)]
67
+ else:
68
+ obs_2 += [0 for _ in range(N_ROWS + N_COLS)]
69
+ obs_1 += [0 for _ in range(N_ROWS + N_COLS)]
70
+
71
+ obs_data = np.array([obs_1, obs_2])
72
+ return Observation(obs_data, self.available_actions())
73
+
74
+ def get_state(self):
75
+ obs = self.get_observation()
76
+ state_data = obs.data.reshape(-1)
77
+ return State(state_data)
78
+
79
+ def available_actions(self):
80
+ avail_actions = np.full((self.n_agents, self.n_actions), True)
81
+ for agent_num, (y, x) in enumerate(self.agents_location):
82
+ if x == 0:
83
+ avail_actions[agent_num, 0] = 0
84
+ elif x == N_ROWS - 1:
85
+ avail_actions[agent_num, 1] = 0
86
+ if y == 0:
87
+ avail_actions[agent_num, 2] = 0
88
+ # Check for center line (depends on the agent number)
89
+ elif y == self.center + agent_num - 1:
90
+ avail_actions[agent_num, 3] = 0
91
+ return avail_actions
92
+
93
+ def step(self, action):
94
+ for idx, action in enumerate(action):
95
+ match action:
96
+ case 0:
97
+ self.agents_location[idx][0] -= 1
98
+ case 1:
99
+ self.agents_location[idx][0] += 1
100
+ case 2:
101
+ self.agents_location[idx][1] -= 1
102
+ case 3:
103
+ self.agents_location[idx][1] += 1
104
+ case 4:
105
+ pass
106
+ case _:
107
+ raise ValueError(f"Invalid action {action} for agent {idx}!")
108
+
109
+ self._episode_steps += 1
110
+ terminated = self._episode_steps >= self.episode_limit
111
+ env_info = {"battle_won": False}
112
+ n_arrived = self.n_agents_arrived()
113
+ if n_arrived == 1:
114
+ reward = -self.time_penalty
115
+ elif n_arrived == 2:
116
+ reward = 10
117
+ terminated = True
118
+ env_info = {"battle_won": True}
119
+ else:
120
+ reward = 0
121
+ return Step(self.get_observation(), self.get_state(), reward, terminated, terminated, env_info)
122
+
123
+ def n_agents_arrived(self):
124
+ n = 0
125
+ if self.agents_location[0] == [N_ROWS // 2, self.center - 1]:
126
+ n += 1
127
+ if self.agents_location[1] == [N_ROWS // 2, self.center]:
128
+ n += 1
129
+ return n
130
+
131
+ def render(self):
132
+ print("Agents location: ", self.agents_location)
133
+ for row in range(N_ROWS):
134
+ for col in range(N_COLS):
135
+ if [row, col] in self.agents_location:
136
+ print("X", end=" ")
137
+ else:
138
+ print(".", end=" ")
139
+ print()
@@ -45,7 +45,7 @@ class DeepSea(MARLEnv[MultiDiscreteSpace]):
45
45
  self._col = 0
46
46
  return self.get_observation(), self.get_state()
47
47
 
48
- def step(self, action: Sequence[int]):
48
+ def step(self, action: Sequence[int] | np.ndarray):
49
49
  self._row += 1
50
50
  if action[0] == LEFT:
51
51
  self._col -= 1
@@ -0,0 +1,52 @@
1
+ import numpy as np
2
+ from marlenv import MARLEnv, Observation, DiscreteSpace, State, Step
3
+
4
+
5
+ class MatrixGame(MARLEnv):
6
+ """Single step matrix game used in QTRAN, Qatten and QPLEX papers."""
7
+
8
+ N_AGENTS = 2
9
+ UNIT_DIM = 1
10
+ OBS_SHAPE = (1,)
11
+ STATE_SIZE = UNIT_DIM * N_AGENTS
12
+
13
+ QPLEX_PAYOFF_MATRIX = [
14
+ [8.0, -12.0, -12.0],
15
+ [-12.0, 0.0, 0.0],
16
+ [-12.0, 0.0, 0.0],
17
+ ]
18
+
19
+ def __init__(self, payoff_matrix: list[list[float]]):
20
+ action_names = [chr(ord("A") + i) for i in range(len(payoff_matrix[0]))]
21
+ super().__init__(
22
+ 2,
23
+ action_space=DiscreteSpace(len(payoff_matrix[0]), action_names).repeat(2),
24
+ observation_shape=MatrixGame.OBS_SHAPE,
25
+ state_shape=(MatrixGame.STATE_SIZE,),
26
+ )
27
+ self.current_step = 0
28
+ self.payoffs = payoff_matrix
29
+
30
+ def reset(self):
31
+ self.current_step = 0
32
+ return self.get_observation(), self.get_state()
33
+
34
+ def get_observation(self):
35
+ return Observation(
36
+ np.array([[self.current_step]] * MatrixGame.N_AGENTS, np.float32),
37
+ self.available_actions(),
38
+ )
39
+
40
+ def step(self, action):
41
+ action = list(action)
42
+ self.current_step += 1
43
+ return Step(self.get_observation(), self.get_state(), self.payoffs[action[0]][action[1]], True)
44
+
45
+ def render(self):
46
+ return
47
+
48
+ def get_state(self):
49
+ return State(np.zeros((MatrixGame.STATE_SIZE,), np.float32))
50
+
51
+ def seed(self, seed_value):
52
+ return
@@ -0,0 +1,93 @@
1
+ from enum import IntEnum
2
+ import cv2
3
+ import marlenv
4
+ import numpy as np
5
+ import numpy.typing as npt
6
+ from typing import Sequence
7
+ from marlenv import Observation, State, DiscreteSpace, Step
8
+
9
+ PAYOFF_INITIAL = [[0, 0], [0, 0]]
10
+ PAYOFF_2A = [[7, 7], [7, 7]]
11
+ PAYOFF_2B = [[0, 1], [1, 8]]
12
+
13
+
14
+ class TwoStepsState(IntEnum):
15
+ INITIAL = 0
16
+ STATE_2A = 1
17
+ STATE_2B = 2
18
+ END = 3
19
+
20
+ def one_hot(self):
21
+ res = np.zeros((4,), dtype=np.float32)
22
+ res[self.value] = 1
23
+ return res
24
+
25
+ @staticmethod
26
+ def from_one_hot(x: np.ndarray):
27
+ for s in TwoStepsState:
28
+ if x[s.value] == 1:
29
+ return s
30
+ raise ValueError()
31
+
32
+
33
+ class TwoStepsGame(marlenv.MARLEnv):
34
+ """
35
+ Two-steps game used in QMix paper (https://arxiv.org/pdf/1803.11485.pdf, section 5)
36
+ to demonstrate its superior representationability compared to VDN.
37
+ """
38
+
39
+ def __init__(self):
40
+ self.state = TwoStepsState.INITIAL
41
+ self._identity = np.identity(2, dtype=np.float32)
42
+ super().__init__(
43
+ 2,
44
+ DiscreteSpace(2).repeat(2),
45
+ observation_shape=(self.state.one_hot().shape[0] + 2,),
46
+ state_shape=self.state.one_hot().shape,
47
+ )
48
+
49
+ def reset(self):
50
+ self.state = TwoStepsState.INITIAL
51
+ return self.observation(), self.get_state()
52
+
53
+ def step(self, action: npt.NDArray[np.int32] | Sequence):
54
+ match self.state:
55
+ case TwoStepsState.INITIAL:
56
+ # In the initial step, only agent 0's actions have an influence on the state
57
+ payoffs = PAYOFF_INITIAL
58
+ if action[0] == 0:
59
+ self.state = TwoStepsState.STATE_2A
60
+ elif action[0] == 1:
61
+ self.state = TwoStepsState.STATE_2B
62
+ else:
63
+ raise ValueError(f"Invalid action: {action[0]}")
64
+ case TwoStepsState.STATE_2A:
65
+ payoffs = PAYOFF_2A
66
+ self.state = TwoStepsState.END
67
+ case TwoStepsState.STATE_2B:
68
+ payoffs = PAYOFF_2B
69
+ self.state = TwoStepsState.END
70
+ case TwoStepsState.END:
71
+ raise ValueError("Episode is already over")
72
+ reward = payoffs[action[0]][action[1]]
73
+ done = self.state == TwoStepsState.END
74
+ return Step(self.observation(), self.get_state(), reward, done, False)
75
+
76
+ def get_state(self):
77
+ return State(self.state.one_hot())
78
+
79
+ def observation(self):
80
+ obs_data = np.array([self.state.one_hot(), self.state.one_hot()])
81
+ extras = self._identity
82
+ return Observation(obs_data, self.available_actions(), extras)
83
+
84
+ def render(self):
85
+ print(self.state)
86
+
87
+ def get_image(self):
88
+ state = self.state.one_hot()
89
+ img = cv2.cvtColor(state, cv2.COLOR_GRAY2BGR)
90
+ return np.array(img, dtype=np.uint8)
91
+
92
+ def set_state(self, state: State):
93
+ self.state = TwoStepsState.from_one_hot(state.data)
marlenv/env_pool.py CHANGED
@@ -20,10 +20,10 @@ class EnvPool(RLEnvWrapper[ActionSpaceType]):
20
20
  assert env.has_same_inouts(self.envs[0]), "All environments must have the same inputs and outputs"
21
21
  super().__init__(self.envs[0])
22
22
 
23
- def seed(self, seed: int):
24
- random.seed(seed)
23
+ def seed(self, seed_value: int):
24
+ random.seed(seed_value)
25
25
  for env in self.envs:
26
- env.seed(seed)
26
+ env.seed(seed_value)
27
27
 
28
28
  def reset(self):
29
29
  self.wrapped = random.choice(self.envs)
marlenv/mock_env.py CHANGED
@@ -73,9 +73,9 @@ class DiscreteMockEnv(MARLEnv[MultiDiscreteSpace]):
73
73
  def render(self, mode: str = "human"):
74
74
  return
75
75
 
76
- def step(self, actions):
76
+ def step(self, action):
77
77
  self.t += 1
78
- self.actions_history.append(actions)
78
+ self.actions_history.append(action)
79
79
  return Step(
80
80
  self.get_observation(),
81
81
  self.get_state(),
marlenv/models/spaces.py CHANGED
@@ -8,7 +8,7 @@ import numpy.typing as npt
8
8
 
9
9
 
10
10
  @dataclass
11
- class Space(ABC):
11
+ class Space[T](ABC):
12
12
  shape: tuple[int, ...]
13
13
  size: int
14
14
  labels: list[str]
@@ -21,7 +21,7 @@ class Space(ABC):
21
21
  self.labels = labels
22
22
 
23
23
  @abstractmethod
24
- def sample(self, mask: Optional[npt.NDArray[np.bool_]] = None) -> npt.NDArray[np.float32]:
24
+ def sample(self, mask: npt.NDArray[np.bool] | None = None) -> T:
25
25
  """Sample a value from the space."""
26
26
 
27
27
  def __eq__(self, value: object) -> bool:
@@ -44,7 +44,7 @@ class Space(ABC):
44
44
 
45
45
 
46
46
  @dataclass
47
- class DiscreteSpace(Space):
47
+ class DiscreteSpace(Space[int]):
48
48
  size: int
49
49
  """Number of categories"""
50
50
 
@@ -53,7 +53,7 @@ class DiscreteSpace(Space):
53
53
  self.size = size
54
54
  self.space = np.arange(size)
55
55
 
56
- def sample(self, mask: Optional[npt.NDArray[np.bool]] = None):
56
+ def sample(self, mask: npt.NDArray[np.bool] | None = None):
57
57
  space = self.space.copy()
58
58
  if mask is not None:
59
59
  space = space[mask]
@@ -87,7 +87,7 @@ class DiscreteSpace(Space):
87
87
 
88
88
 
89
89
  @dataclass
90
- class MultiDiscreteSpace(Space):
90
+ class MultiDiscreteSpace(Space[npt.NDArray[np.int32]]):
91
91
  n_dims: int
92
92
  spaces: tuple[DiscreteSpace, ...]
93
93
 
@@ -123,7 +123,7 @@ class MultiDiscreteSpace(Space):
123
123
 
124
124
 
125
125
  @dataclass
126
- class ContinuousSpace(Space):
126
+ class ContinuousSpace(Space[npt.NDArray[np.float32]]):
127
127
  """A continuous space (box) in R^n."""
128
128
 
129
129
  low: npt.NDArray[np.float32]
@@ -192,7 +192,7 @@ class ContinuousSpace(Space):
192
192
  action = np.array(action)
193
193
  return np.clip(action, self.low, self.high)
194
194
 
195
- def sample(self) -> npt.NDArray[np.float32]:
195
+ def sample(self, *args, **kwargs):
196
196
  r = np.random.random(self.shape) * (self.high - self.low) + self.low
197
197
  return r.astype(np.float32)
198
198
 
marlenv/utils/schedule.py CHANGED
@@ -145,17 +145,15 @@ class Schedule:
145
145
  @staticmethod
146
146
  def from_json(data: dict[str, Any]):
147
147
  """Create a Schedule from a JSON-like dictionary."""
148
- classname = data.get("name")
149
- if classname == "LinearSchedule":
150
- return LinearSchedule(data["start_value"], data["end_value"], data["n_steps"])
151
- elif classname == "ExpSchedule":
152
- return ExpSchedule(data["start_value"], data["end_value"], data["n_steps"])
153
- elif classname == "ConstantSchedule":
154
- return ConstantSchedule(data["value"])
155
- elif classname == "ArbitrarySchedule":
148
+ candidates = [LinearSchedule, ExpSchedule, ConstantSchedule]
149
+ data = data.copy()
150
+ classname = data.pop("name")
151
+ for cls in candidates:
152
+ if cls.__name__ == classname:
153
+ return cls(**data)
154
+ if classname == "ArbitrarySchedule":
156
155
  raise NotImplementedError("ArbitrarySchedule cannot be deserialized from JSON")
157
- else:
158
- raise ValueError(f"Unknown schedule type: {classname}")
156
+ raise ValueError(f"Unknown schedule type: {classname}")
159
157
 
160
158
 
161
159
  @dataclass(eq=False)
@@ -18,8 +18,8 @@ class AgentId(RLEnvWrapper[AS]):
18
18
  super().__init__(env, extra_shape=(env.n_agents + env.extras_shape[0],), extra_meanings=meanings)
19
19
  self._identity = np.identity(env.n_agents, dtype=np.float32)
20
20
 
21
- def step(self, actions):
22
- step = super().step(actions)
21
+ def step(self, action):
22
+ step = super().step(action)
23
23
  step.obs.add_extra(self._identity)
24
24
  return step
25
25
 
@@ -18,8 +18,8 @@ class Blind(RLEnvWrapper[AS]):
18
18
  super().__init__(env)
19
19
  self.p = float(p)
20
20
 
21
- def step(self, actions):
22
- step = super().step(actions)
21
+ def step(self, action):
22
+ step = super().step(action)
23
23
  if random.random() < self.p:
24
24
  step.obs.data = np.zeros_like(step.obs.data)
25
25
  return step
@@ -42,9 +42,9 @@ class Centralized(RLEnvWrapper[MultiDiscreteSpace]):
42
42
  action_names = [str(a) for a in product(*agent_actions)]
43
43
  return DiscreteSpace(env.n_actions**env.n_agents, action_names).repeat(1)
44
44
 
45
- def step(self, actions: npt.NDArray | Sequence):
46
- action = actions[0]
47
- individual_actions = self._individual_actions(action)
45
+ def step(self, action: npt.NDArray | Sequence):
46
+ action1 = action[0]
47
+ individual_actions = self._individual_actions(action1)
48
48
  individual_actions = np.array(individual_actions)
49
49
  step = self.wrapped.step(individual_actions) # type: ignore
50
50
  step.obs = self._joint_observation(step.obs)
@@ -27,8 +27,8 @@ class DelayedReward(RLEnvWrapper[AS]):
27
27
  self.reward_queue.append(np.zeros(self.reward_space.shape, dtype=np.float32))
28
28
  return super().reset()
29
29
 
30
- def step(self, actions):
31
- step = super().step(actions)
30
+ def step(self, action):
31
+ step = super().step(action)
32
32
  self.reward_queue.append(step.reward)
33
33
  # If the step is terminal, we sum all the remaining rewards
34
34
  if step.is_terminal:
@@ -33,13 +33,13 @@ class LastAction(RLEnvWrapper[AS]):
33
33
  state.add_extra(self.last_one_hot_actions.flatten())
34
34
  return obs, state
35
35
 
36
- def step(self, actions):
37
- step = super().step(actions)
36
+ def step(self, action):
37
+ step = super().step(action)
38
38
  match self.wrapped.action_space:
39
39
  case ContinuousSpace():
40
- self.last_actions = actions
40
+ self.last_actions = action
41
41
  case DiscreteSpace() | MultiDiscreteSpace():
42
- self.last_one_hot_actions = self.compute_one_hot_actions(actions)
42
+ self.last_one_hot_actions = self.compute_one_hot_actions(action)
43
43
  case other:
44
44
  raise NotImplementedError(f"Action space {other} not supported")
45
45
  step.obs.add_extra(self.last_one_hot_actions)
@@ -24,8 +24,8 @@ class PadExtras(RLEnvWrapper[AS]):
24
24
  )
25
25
  self.n = n_added
26
26
 
27
- def step(self, actions):
28
- step = super().step(actions)
27
+ def step(self, action):
28
+ step = super().step(action)
29
29
  step.obs = self._add_extras(step.obs)
30
30
  return step
31
31
 
@@ -48,8 +48,8 @@ class PadObservations(RLEnvWrapper[AS]):
48
48
  super().__init__(env, observation_shape=(env.observation_shape[0] + n_added,))
49
49
  self.n = n_added
50
50
 
51
- def step(self, actions):
52
- step = super().step(actions)
51
+ def step(self, action):
52
+ step = super().step(action)
53
53
  step.obs = self._add_obs(step.obs)
54
54
  return step
55
55
 
@@ -39,9 +39,9 @@ class PotentialShaping(RLEnvWrapper[A], ABC):
39
39
  self._current_potential = self.compute_potential()
40
40
  return self.add_extras(obs), state
41
41
 
42
- def step(self, actions):
42
+ def step(self, action):
43
43
  prev_potential = self._current_potential
44
- step = super().step(actions)
44
+ step = super().step(action)
45
45
 
46
46
  self._current_potential = self.compute_potential()
47
47
  shaped_reward = self.gamma * self._current_potential - prev_potential
@@ -62,8 +62,8 @@ class RLEnvWrapper(MARLEnv[AS]):
62
62
  def agent_state_size(self):
63
63
  return self.wrapped.agent_state_size
64
64
 
65
- def step(self, actions: np.ndarray | Sequence):
66
- return self.wrapped.step(actions)
65
+ def step(self, action: np.ndarray | Sequence):
66
+ return self.wrapped.step(action)
67
67
 
68
68
  def reset(self):
69
69
  return self.wrapped.reset()
@@ -0,0 +1,35 @@
1
+ from typing import Sequence
2
+ from marlenv import RLEnvWrapper, MARLEnv, Space, State
3
+ import numpy as np
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class StateCounter[S: Space](RLEnvWrapper[S]):
9
+ def __init__(self, wrapped: MARLEnv[S]):
10
+ super().__init__(wrapped)
11
+ self._per_agent = [set[int]() for _ in range(self.n_agents)]
12
+ self._joint = set[int]()
13
+
14
+ def _register(self, state: State):
15
+ self._joint.add(hash(state))
16
+ eh = hash(state.extras.tobytes())
17
+ for i in range(self.n_agents):
18
+ agent_data = state.data[i * self.agent_state_size : (i + 1) * self.agent_state_size]
19
+ h = hash((agent_data.tobytes(), eh))
20
+ self._per_agent[i].add(h)
21
+
22
+ def step(self, action: np.ndarray | Sequence):
23
+ step = super().step(action)
24
+ self._register(step.state)
25
+ if step.is_terminal:
26
+ step.info = step.info | {
27
+ "joint-count": len(self._joint),
28
+ **{f"agent-{i}-count": len(agent_set) for i, agent_set in enumerate(self._per_agent)},
29
+ }
30
+ return step
31
+
32
+ def reset(self):
33
+ obs, state = super().reset()
34
+ self._register(state)
35
+ return obs, state
@@ -64,9 +64,9 @@ class TimeLimit(RLEnvWrapper[AS]):
64
64
  self.add_time_extra(obs, state)
65
65
  return obs, state
66
66
 
67
- def step(self, actions):
67
+ def step(self, action):
68
68
  self._current_step += 1
69
- step = super().step(actions)
69
+ step = super().step(action)
70
70
  if self.add_extra:
71
71
  self.add_time_extra(step.obs, step.state)
72
72
  # If we reach the time limit
@@ -44,10 +44,10 @@ class VideoRecorder(RLEnvWrapper[AS]):
44
44
  case other:
45
45
  raise ValueError(f"Unsupported file video encoding: {other}")
46
46
 
47
- def step(self, actions):
47
+ def step(self, action):
48
48
  if self._recorder is None:
49
49
  raise RuntimeError("VideoRecorder not initialized")
50
- step = super().step(actions)
50
+ step = super().step(action)
51
51
  img = self.get_image()
52
52
  self._recorder.write(img)
53
53
  if step.is_terminal:
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: multi-agent-rlenv
3
- Version: 3.6.3
3
+ Version: 3.7.1
4
4
  Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
5
  Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
6
  Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
7
7
  License-File: LICENSE
8
8
  Classifier: Operating System :: OS Independent
9
9
  Classifier: Programming Language :: Python :: 3
10
- Requires-Python: <4,>=3.10
10
+ Requires-Python: <4,>=3.12
11
11
  Requires-Dist: numpy>=2.0.0
12
12
  Requires-Dist: opencv-python>=4.0
13
13
  Requires-Dist: typing-extensions>=4.0
@@ -0,0 +1,51 @@
1
+ marlenv/__init__.py,sha256=mxpDjgGSc5eq67w1PIKat0UWkAg9A7VjchWwtzzsvW8,3881
2
+ marlenv/env_builder.py,sha256=RUMFvW7dAJtHMLm8-oPVpjBefDtNliZtjlHci97Xj-Q,3874
3
+ marlenv/env_pool.py,sha256=mJhJUROX9k2A2njwnUOBl2EAuhotksQMugH_Zydg1IU,951
4
+ marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
5
+ marlenv/mock_env.py,sha256=rvl4QAn046HM79IMMiAj1Aoy3_GBSNBBR1_9fHPutR8,4682
6
+ marlenv/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ marlenv/adapters/__init__.py,sha256=JsKWaeXvUjWEg3JC9TOtyjtZlTI9AAkLyab-jDa5yzM,783
8
+ marlenv/adapters/gym_adapter.py,sha256=DXQ1czcvRoL9hTwcVzfMyXArZeVIHP1gAKqZJO87y7Y,3065
9
+ marlenv/adapters/pettingzoo_adapter.py,sha256=A3dcwsQa7jlWc14TybXpBknl0FPK5YK9w-6tzMBHlDI,2913
10
+ marlenv/adapters/pymarl_adapter.py,sha256=2s7EY31s1hrml3q-BBaXo_eDMXTjkebozZPvzsgrb9c,3353
11
+ marlenv/adapters/smac_adapter.py,sha256=nGWNRpn1F6ZFIoTcU0IJGApc_1GHaoBOVsoNljJ-PAg,8509
12
+ marlenv/catalog/__init__.py,sha256=UCJGbmVzNtKvO3fZQWxR_EigGpXhAyIMevyXxghB2F8,535
13
+ marlenv/catalog/coordinated_grid.py,sha256=Kq5UzG9rr5gYRO0QWFCmKmO56JIzgIR19an9_pvypJU,4997
14
+ marlenv/catalog/deepsea.py,sha256=yTyvskWZiAZem11L8cZwHedBIDQ4EAxE2IaUKrjKL2U,2413
15
+ marlenv/catalog/matrix_game.py,sha256=zkErnh6ZIa1kBryYMVLw-jeMCd2AJ-BlP2yROxpbb0w,1519
16
+ marlenv/catalog/two_steps.py,sha256=lI-q4-Q8283QZTjY0wk7OfXWB6Ln-lquYUjHyT4URi4,2970
17
+ marlenv/catalog/connectn/__init__.py,sha256=BKfM0ZofMK6zqGURi2bzILyNFfYjfbZpKTs5ikKiJAk,195
18
+ marlenv/catalog/connectn/board.py,sha256=GVcFA1OJgLUmQoTIfOO9M7nL9dFv-4T3tGrVsP15zyg,6124
19
+ marlenv/catalog/connectn/env.py,sha256=Ot5vfAbzS6eRe3-nLW_AkhEH7F1WVvv4_odoxZU7HNg,1905
20
+ marlenv/models/__init__.py,sha256=uihmRs71Gg5z7Bvau_xtaQVg7xEtX8sTzi74bIHL5P0,443
21
+ marlenv/models/env.py,sha256=BG1iVHxGD_p827mF0ewyOBn6wU2gtFsHLW1b4UtW-V0,7841
22
+ marlenv/models/episode.py,sha256=zsyxsW4LIioPKyY4DZKn64A31e5ZvlwOf3HIGuRUzhs,13531
23
+ marlenv/models/observation.py,sha256=RhvKvmys4bu3UwwVsvu7fJ7TMKt2QkKnBD1e0hw2r7s,3528
24
+ marlenv/models/spaces.py,sha256=d_aIPWwPdaOWZeNRUUdzSiDxs9XQb9itPnrE_EyhhfQ,7810
25
+ marlenv/models/state.py,sha256=LbP--JxBzRwMFpEAaZyxCX13xKQ27xPE2fabohaq9YI,2058
26
+ marlenv/models/step.py,sha256=00PhD_ccdCIYAY1SVJdJU91weU0Y_tNIJwK16TN_53I,3056
27
+ marlenv/models/transition.py,sha256=UkJVRNxZoyRkjE7YmKtUf_4xA7cOEh20O60dTldbvys,5070
28
+ marlenv/utils/__init__.py,sha256=ky5mz_T7EF65YNaEN1UDCUYZVlz7hFyKResgIJlE_1Q,462
29
+ marlenv/utils/cached_property_collector.py,sha256=IOjbr61f0DqLhcidXKrl7MhN1BOEGiTzCANIKQCxaF0,600
30
+ marlenv/utils/import_placeholders.py,sha256=QN7gsfbFgSP2Lh-7YBC1RH-SNjbFacvRFmBgNs4Eb90,972
31
+ marlenv/utils/schedule.py,sha256=4S0V0RyYHuReVafeHnpfvSLf3oF0buAzD09qMFfexa0,9133
32
+ marlenv/wrappers/__init__.py,sha256=Z4_M-mxRNKQeu52tkmQ4B2m3-zrsmjfXXL5NsWQ4vu4,952
33
+ marlenv/wrappers/action_randomizer.py,sha256=A1kejqGOTA0sc_RQL0EOd6sMSbcIdiV5zlscjKUlzdY,474
34
+ marlenv/wrappers/agent_id_wrapper.py,sha256=i2EhZtWTt4xcu3lPINizg_OS0ISx3DW8lBhKqUxFt2c,1124
35
+ marlenv/wrappers/available_actions_mask.py,sha256=OMyt2KntsR8JA2RuRgvwdzqzPe-_H-KKkbUUJfe_mks,1404
36
+ marlenv/wrappers/available_actions_wrapper.py,sha256=_HRl9zsjJgSrLgVuT-BjpnnfrfM8ic6wBUWlg67uCx4,926
37
+ marlenv/wrappers/blind_wrapper.py,sha256=fEZH4zb8XhC01-G5Oll_rjBdHpvQF-Ax6g6KZgin8Dk,763
38
+ marlenv/wrappers/centralised.py,sha256=HYgdvFrOD8ETXAMLYD_YwdQ02mSGBaEswDZutQFIuvk,3131
39
+ marlenv/wrappers/delayed_rewards.py,sha256=wGj_a0IQ-2OsgMbuBEaiUKbyJLd5dZGcnlxtKTwCjIU,1106
40
+ marlenv/wrappers/last_action_wrapper.py,sha256=Hq4SffR4dhiYmt_LjKhyez56o1Zkiw57617oo8_p4ps,2599
41
+ marlenv/wrappers/paddings.py,sha256=DlYYnPbRPTBcpYuuUQPTlU0ZT4j26WmUIrJP-B5RF44,2020
42
+ marlenv/wrappers/penalty_wrapper.py,sha256=3YBoUV6ETksZ8tFEOq1WYXvPs3ejMAehE6-QA8e4JOE,864
43
+ marlenv/wrappers/potential_shaping.py,sha256=9-CtYe1uGcPTfbc-jx5jHPbgjZ2DpwuyNjmOyhaUvKQ,1659
44
+ marlenv/wrappers/rlenv_wrapper.py,sha256=iFSQsDMkUUbQJKEO8l6SosNi-eOUVSh4pIJVu7aM8Qo,2991
45
+ marlenv/wrappers/state_counter.py,sha256=QmEMb55vOnK-VJuvKsDIIBgcNRsHuovqgpK2pcCY7sA,1211
46
+ marlenv/wrappers/time_limit.py,sha256=HctKeiepPQ2NAIa208SnvknioSkRIuUQ4X-Xhf_XTs0,3974
47
+ marlenv/wrappers/video_recorder.py,sha256=mtWcqaYNCu-zjVXvpa8DJe3_062tpK_TChOu-Xyxs3s,2533
48
+ multi_agent_rlenv-3.7.1.dist-info/METADATA,sha256=1yiAoMwqkzgpKby8KV09M-vVLRgJS5ZKZMqZCfHn80A,5751
49
+ multi_agent_rlenv-3.7.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
50
+ multi_agent_rlenv-3.7.1.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
51
+ multi_agent_rlenv-3.7.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,44 +0,0 @@
1
- marlenv/__init__.py,sha256=MJgaW73zWYJKTNMWE8V3hTvrcMk-WEX3RaG-K_oIDD8,3886
2
- marlenv/env_builder.py,sha256=RUMFvW7dAJtHMLm8-oPVpjBefDtNliZtjlHci97Xj-Q,3874
3
- marlenv/env_pool.py,sha256=nCEBkGQU62fcvCAANyAqY8gCFjYlVnSCg-V3Fhx00yc,933
4
- marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
5
- marlenv/mock_env.py,sha256=kKvTdZl4_xSTTI9V6otZ1P709sfPYrqZSbbZaTip9iI,4684
6
- marlenv/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- marlenv/adapters/__init__.py,sha256=JsKWaeXvUjWEg3JC9TOtyjtZlTI9AAkLyab-jDa5yzM,783
8
- marlenv/adapters/gym_adapter.py,sha256=5HZF3g0QD4n7K4GQoMis4q0zj97uFTLdzdxMYHzM_UE,3041
9
- marlenv/adapters/pettingzoo_adapter.py,sha256=w9Ta-X4L_6ZXdDGmREOdcU0vpLR8lGP__s49DyK3dk8,2852
10
- marlenv/adapters/pymarl_adapter.py,sha256=2s7EY31s1hrml3q-BBaXo_eDMXTjkebozZPvzsgrb9c,3353
11
- marlenv/adapters/smac_adapter.py,sha256=8uWC7YKsaSXeTS8AUhpGOKvrWMbVEQT2-pml5BaFUB0,8343
12
- marlenv/catalog/__init__.py,sha256=dlT_xXdjV9gdXc518xRXC4Np1TSpAMV5Wdwk4M2br8c,351
13
- marlenv/catalog/deepsea.py,sha256=kwmTkp_iUf1ltpYIcZoZVJKriWSAQa8z6pv7qy4jFDE,2400
14
- marlenv/models/__init__.py,sha256=uihmRs71Gg5z7Bvau_xtaQVg7xEtX8sTzi74bIHL5P0,443
15
- marlenv/models/env.py,sha256=BG1iVHxGD_p827mF0ewyOBn6wU2gtFsHLW1b4UtW-V0,7841
16
- marlenv/models/episode.py,sha256=zsyxsW4LIioPKyY4DZKn64A31e5ZvlwOf3HIGuRUzhs,13531
17
- marlenv/models/observation.py,sha256=RhvKvmys4bu3UwwVsvu7fJ7TMKt2QkKnBD1e0hw2r7s,3528
18
- marlenv/models/spaces.py,sha256=1aPmTcoOTU9nlwlcN7aswNrORwghOYAGqCLAMpk39SA,7793
19
- marlenv/models/state.py,sha256=LbP--JxBzRwMFpEAaZyxCX13xKQ27xPE2fabohaq9YI,2058
20
- marlenv/models/step.py,sha256=00PhD_ccdCIYAY1SVJdJU91weU0Y_tNIJwK16TN_53I,3056
21
- marlenv/models/transition.py,sha256=UkJVRNxZoyRkjE7YmKtUf_4xA7cOEh20O60dTldbvys,5070
22
- marlenv/utils/__init__.py,sha256=ky5mz_T7EF65YNaEN1UDCUYZVlz7hFyKResgIJlE_1Q,462
23
- marlenv/utils/cached_property_collector.py,sha256=IOjbr61f0DqLhcidXKrl7MhN1BOEGiTzCANIKQCxaF0,600
24
- marlenv/utils/import_placeholders.py,sha256=QN7gsfbFgSP2Lh-7YBC1RH-SNjbFacvRFmBgNs4Eb90,972
25
- marlenv/utils/schedule.py,sha256=BdjefYgAtGlh1wWGHENid4WNnPOU78kkNiRvR5A9GEA,9308
26
- marlenv/wrappers/__init__.py,sha256=Z4_M-mxRNKQeu52tkmQ4B2m3-zrsmjfXXL5NsWQ4vu4,952
27
- marlenv/wrappers/action_randomizer.py,sha256=A1kejqGOTA0sc_RQL0EOd6sMSbcIdiV5zlscjKUlzdY,474
28
- marlenv/wrappers/agent_id_wrapper.py,sha256=9qHV3LMQ4AjcDCSuvQhz5h9hUf7Xtrdi2sIxmNZk5NA,1126
29
- marlenv/wrappers/available_actions_mask.py,sha256=OMyt2KntsR8JA2RuRgvwdzqzPe-_H-KKkbUUJfe_mks,1404
30
- marlenv/wrappers/available_actions_wrapper.py,sha256=_HRl9zsjJgSrLgVuT-BjpnnfrfM8ic6wBUWlg67uCx4,926
31
- marlenv/wrappers/blind_wrapper.py,sha256=In4zhygJqWf5ibwq80kf5NtE1fRtHGw-IpAPvBUs1HU,765
32
- marlenv/wrappers/centralised.py,sha256=1QWxjzLecC3vlf0G3EVC58_v1uR2shluzUa_9HrcN9o,3131
33
- marlenv/wrappers/delayed_rewards.py,sha256=P8az9rYmu67OzL1ZEFqfTQcCxRI_AXKXrKUBQ3pURl8,1108
34
- marlenv/wrappers/last_action_wrapper.py,sha256=QVepSLcWExqACwKvAM0G2LALapSoWdd7YHmah2LZ3vE,2603
35
- marlenv/wrappers/paddings.py,sha256=0aAi7RP1yL8I5mR4Oxzl9-itKys88mgsPjqe7q-frbk,2024
36
- marlenv/wrappers/penalty_wrapper.py,sha256=3YBoUV6ETksZ8tFEOq1WYXvPs3ejMAehE6-QA8e4JOE,864
37
- marlenv/wrappers/potential_shaping.py,sha256=T_QvnmWReCgpyoInxRw2UXbmdvcBD5U-vV1ledLG7y8,1661
38
- marlenv/wrappers/rlenv_wrapper.py,sha256=S6G1VjFklTEzU6bj0AXrTDXnsTQJARq8VB4uUH6AXe4,2993
39
- marlenv/wrappers/time_limit.py,sha256=GxbxcbfFyuVg14ylQU2C_cjmV9q4uDAt5wepfgX_PyM,3976
40
- marlenv/wrappers/video_recorder.py,sha256=ucBQSNRPqDr-2mYxrTCqlrWcxSWtSJ7XlRC9-LdukBM,2535
41
- multi_agent_rlenv-3.6.3.dist-info/METADATA,sha256=jfW3dd1O5u8VurE32m6YTKeDXHU6iIDxF04G8sSisWM,5751
42
- multi_agent_rlenv-3.6.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
43
- multi_agent_rlenv-3.6.3.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
44
- multi_agent_rlenv-3.6.3.dist-info/RECORD,,