multi-agent-rlenv 3.7.0__py3-none-any.whl → 3.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marlenv/adapters/__init__.py +17 -14
- marlenv/adapters/pettingzoo_adapter.py +1 -1
- marlenv/adapters/smac_adapter.py +2 -15
- marlenv/catalog/__init__.py +15 -7
- marlenv/catalog/connectn/__init__.py +11 -0
- marlenv/catalog/connectn/board.py +186 -0
- marlenv/catalog/connectn/env.py +51 -0
- marlenv/catalog/coordinated_grid.py +139 -0
- marlenv/catalog/matrix_game.py +52 -0
- marlenv/models/observation.py +1 -1
- marlenv/models/state.py +1 -1
- marlenv/utils/import_placeholders.py +2 -0
- marlenv/wrappers/state_counter.py +35 -0
- {multi_agent_rlenv-3.7.0.dist-info → multi_agent_rlenv-3.7.2.dist-info}/METADATA +1 -1
- {multi_agent_rlenv-3.7.0.dist-info → multi_agent_rlenv-3.7.2.dist-info}/RECORD +17 -11
- {multi_agent_rlenv-3.7.0.dist-info → multi_agent_rlenv-3.7.2.dist-info}/WHEEL +0 -0
- {multi_agent_rlenv-3.7.0.dist-info → multi_agent_rlenv-3.7.2.dist-info}/licenses/LICENSE +0 -0
marlenv/adapters/__init__.py
CHANGED
|
@@ -1,25 +1,28 @@
|
|
|
1
|
-
from importlib.util import find_spec
|
|
2
1
|
from .pymarl_adapter import PymarlAdapter
|
|
3
|
-
from marlenv.utils import
|
|
2
|
+
from marlenv.utils import dummy_function
|
|
4
3
|
|
|
5
|
-
|
|
6
|
-
if HAS_GYM:
|
|
4
|
+
try:
|
|
7
5
|
from .gym_adapter import Gym, make
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
|
|
7
|
+
HAS_GYM = True
|
|
8
|
+
except ImportError:
|
|
9
|
+
HAS_GYM = False
|
|
10
10
|
make = dummy_function("gymnasium")
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
if HAS_PETTINGZOO:
|
|
12
|
+
try:
|
|
14
13
|
from .pettingzoo_adapter import PettingZoo
|
|
15
|
-
else:
|
|
16
|
-
PettingZoo = dummy_type("pettingzoo")
|
|
17
14
|
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
HAS_PETTINGZOO = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
HAS_PETTINGZOO = False
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
20
|
from .smac_adapter import SMAC
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
|
|
22
|
+
HAS_SMAC = True
|
|
23
|
+
except ImportError:
|
|
24
|
+
HAS_SMAC = False
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
__all__ = [
|
|
25
28
|
"PymarlAdapter",
|
|
@@ -3,7 +3,7 @@ from typing import Sequence
|
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import numpy.typing as npt
|
|
6
|
-
from gymnasium import spaces
|
|
6
|
+
from gymnasium import spaces
|
|
7
7
|
from pettingzoo import ParallelEnv
|
|
8
8
|
|
|
9
9
|
from marlenv.models import MARLEnv, Observation, State, Step, DiscreteSpace, ContinuousSpace, Space
|
marlenv/adapters/smac_adapter.py
CHANGED
|
@@ -3,7 +3,7 @@ from typing import overload
|
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import numpy.typing as npt
|
|
6
|
-
from smac.env import StarCraft2Env #
|
|
6
|
+
from smac.env import StarCraft2Env # pyright: ignore[reportMissingImports]
|
|
7
7
|
|
|
8
8
|
from marlenv.models import MARLEnv, Observation, State, Step, MultiDiscreteSpace, DiscreteSpace
|
|
9
9
|
|
|
@@ -181,20 +181,7 @@ class SMAC(MARLEnv[MultiDiscreteSpace]):
|
|
|
181
181
|
|
|
182
182
|
def step(self, action):
|
|
183
183
|
reward, done, info = self._env.step(action)
|
|
184
|
-
|
|
185
|
-
self._env.get_obs(), # type: ignore
|
|
186
|
-
self.available_actions(),
|
|
187
|
-
)
|
|
188
|
-
state = self.get_state()
|
|
189
|
-
step = Step(
|
|
190
|
-
obs,
|
|
191
|
-
state,
|
|
192
|
-
reward,
|
|
193
|
-
done,
|
|
194
|
-
False,
|
|
195
|
-
info,
|
|
196
|
-
)
|
|
197
|
-
return step
|
|
184
|
+
return Step(self.get_observation(), self.get_state(), reward, done, False, info)
|
|
198
185
|
|
|
199
186
|
def available_actions(self) -> npt.NDArray[np.bool]:
|
|
200
187
|
return np.array(self._env.get_avail_actions()) == 1
|
marlenv/catalog/__init__.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
from marlenv.adapters import SMAC
|
|
2
1
|
from .deepsea import DeepSea
|
|
2
|
+
from .matrix_game import MatrixGame
|
|
3
|
+
from .coordinated_grid import CoordinatedGrid
|
|
3
4
|
|
|
4
5
|
|
|
5
|
-
__all__ = [
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
6
|
+
__all__ = ["smac", "DeepSea", "lle", "overcooked", "MatrixGame", "connect_n", "CoordinatedGrid"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def smac():
|
|
10
|
+
from marlenv.adapters import SMAC
|
|
11
|
+
|
|
12
|
+
return SMAC
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
def lle():
|
|
@@ -20,3 +22,9 @@ def overcooked():
|
|
|
20
22
|
from overcooked import Overcooked # pyright: ignore[reportMissingImports]
|
|
21
23
|
|
|
22
24
|
return Overcooked
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def connect_n():
|
|
28
|
+
from .connectn import ConnectN
|
|
29
|
+
|
|
30
|
+
return ConnectN
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from enum import IntEnum
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StepResult(IntEnum):
|
|
7
|
+
NOTHING = 0
|
|
8
|
+
TIE = 1
|
|
9
|
+
WIN = 2
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GameBoard:
|
|
13
|
+
"""Connect4 game board class."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, width: int, height: int, n: int):
|
|
16
|
+
assert width >= n or height >= height, "Impossible to win with this combination of width, height and n"
|
|
17
|
+
self.turn = 1
|
|
18
|
+
self.board = np.zeros(shape=(height, width), dtype=np.float32)
|
|
19
|
+
self.width = width
|
|
20
|
+
self.height = height
|
|
21
|
+
self.n_to_align = n
|
|
22
|
+
self.n_items_in_column = np.zeros(width, dtype=np.int32)
|
|
23
|
+
|
|
24
|
+
self.str_row = "+" + "-" * (self.width * 4 - 1) + "+"
|
|
25
|
+
self.numbers = "|" + " ".join([f" {i} " for i in range(self.width)]) + "|"
|
|
26
|
+
|
|
27
|
+
def valid_moves(self):
|
|
28
|
+
"""Get list of valid moves (i.e. not full columns)."""
|
|
29
|
+
return self.n_items_in_column < self.height
|
|
30
|
+
|
|
31
|
+
def clear(self):
|
|
32
|
+
self.board = np.zeros(shape=(self.height, self.width), dtype=np.float32)
|
|
33
|
+
self.n_items_in_column = np.zeros(self.width, dtype=np.int32)
|
|
34
|
+
self.turn = 0
|
|
35
|
+
|
|
36
|
+
def show(self):
|
|
37
|
+
"""Print out game board on console."""
|
|
38
|
+
print(self.str_row)
|
|
39
|
+
for j in range(self.height - 1, -1, -1):
|
|
40
|
+
for i in range(self.width):
|
|
41
|
+
match self.board[j, i]:
|
|
42
|
+
case 1:
|
|
43
|
+
print("| X", end=" ")
|
|
44
|
+
case -1:
|
|
45
|
+
print("| O", end=" ")
|
|
46
|
+
case _:
|
|
47
|
+
print("| ", end=" ")
|
|
48
|
+
print("|")
|
|
49
|
+
print(self.str_row)
|
|
50
|
+
print(self.numbers)
|
|
51
|
+
print(self.str_row)
|
|
52
|
+
|
|
53
|
+
def check_win(self, move_played: tuple[int, int]) -> bool:
|
|
54
|
+
if self.check_rows(move_played):
|
|
55
|
+
return True
|
|
56
|
+
if self.check_cols(move_played):
|
|
57
|
+
return True
|
|
58
|
+
if self.check_diags(move_played):
|
|
59
|
+
return True
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
def check_tie(self) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Check whether the game is a tie (i.e. the board is full).
|
|
65
|
+
|
|
66
|
+
Note that it does not check for a win, so it should be called after check_win.
|
|
67
|
+
"""
|
|
68
|
+
# If the last row is full, the game is a tie
|
|
69
|
+
return bool(np.all(self.board[-1] != 0))
|
|
70
|
+
|
|
71
|
+
def check_rows(self, move_played: tuple[int, int]) -> bool:
|
|
72
|
+
row, col = move_played
|
|
73
|
+
start_index = max(0, col - self.n_to_align + 1)
|
|
74
|
+
end_index = min(self.width - self.n_to_align, col) + 1
|
|
75
|
+
for start in range(start_index, end_index):
|
|
76
|
+
slice = self.board[row, start : start + self.n_to_align]
|
|
77
|
+
if np.all(slice == self.turn):
|
|
78
|
+
return True
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
def check_cols(self, move_played: tuple[int, int]) -> bool:
|
|
82
|
+
row, col = move_played
|
|
83
|
+
start_index = max(0, row - self.n_to_align + 1)
|
|
84
|
+
end_index = min(self.height - self.n_to_align, row) + 1
|
|
85
|
+
for start in range(start_index, end_index):
|
|
86
|
+
slice = self.board[start : start + self.n_to_align, col]
|
|
87
|
+
if np.all(slice == self.turn):
|
|
88
|
+
return True
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
def check_diags(self, move_played: tuple[int, int]) -> bool:
|
|
92
|
+
row, col = move_played
|
|
93
|
+
# count the adjacent items in the / diagonal
|
|
94
|
+
n_adjacent = 0
|
|
95
|
+
# Top right
|
|
96
|
+
row_i, col_i = row + 1, col + 1
|
|
97
|
+
while row_i < self.height and col_i < self.width and self.board[row_i, col_i] == self.turn:
|
|
98
|
+
n_adjacent += 1
|
|
99
|
+
row_i += 1
|
|
100
|
+
col_i += 1
|
|
101
|
+
# Bottom left
|
|
102
|
+
row_i, col_i = row - 1, col - 1
|
|
103
|
+
while row_i >= 0 and col_i >= 0 and self.board[row_i, col_i] == self.turn:
|
|
104
|
+
n_adjacent += 1
|
|
105
|
+
row_i -= 1
|
|
106
|
+
col_i -= 1
|
|
107
|
+
if n_adjacent >= self.n_to_align - 1:
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
# Count adjacent items in the \ diagonal
|
|
111
|
+
n_adjacent = 0
|
|
112
|
+
# Top left
|
|
113
|
+
row_i, col_i = row + 1, col - 1
|
|
114
|
+
while row_i < self.height and col_i >= 0 and self.board[row_i, col_i] == self.turn:
|
|
115
|
+
n_adjacent += 1
|
|
116
|
+
row_i += 1
|
|
117
|
+
col_i -= 1
|
|
118
|
+
# Bottom right
|
|
119
|
+
row_i, col_i = row - 1, col + 1
|
|
120
|
+
while row_i >= 0 and col_i < self.width and self.board[row_i, col_i] == self.turn:
|
|
121
|
+
n_adjacent += 1
|
|
122
|
+
row_i -= 1
|
|
123
|
+
col_i += 1
|
|
124
|
+
|
|
125
|
+
return n_adjacent >= self.n_to_align - 1
|
|
126
|
+
|
|
127
|
+
def play(self, column: int) -> StepResult:
|
|
128
|
+
"""Apply move to board.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
column (int): Selected column index (between 0 and the number of cols - 1).
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
bool: whether the player has won.
|
|
135
|
+
"""
|
|
136
|
+
row_index = self.n_items_in_column[column]
|
|
137
|
+
if row_index >= self.height:
|
|
138
|
+
raise ValueError(f"Column {column} is full, use `valid_moves` to check valid moves.")
|
|
139
|
+
self.n_items_in_column[column] += 1
|
|
140
|
+
self.board[row_index, column] = self.turn
|
|
141
|
+
if self.check_win((row_index, column)):
|
|
142
|
+
result = StepResult.WIN
|
|
143
|
+
elif self.check_tie():
|
|
144
|
+
result = StepResult.TIE
|
|
145
|
+
else:
|
|
146
|
+
result = StepResult.NOTHING
|
|
147
|
+
self.switch_turn()
|
|
148
|
+
return result
|
|
149
|
+
|
|
150
|
+
def switch_turn(self) -> None:
|
|
151
|
+
"""Switch turn between players."""
|
|
152
|
+
self.turn = -self.turn
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def test_win():
|
|
156
|
+
board = GameBoard(4, 1, 2)
|
|
157
|
+
assert board.play(0) == StepResult.NOTHING
|
|
158
|
+
assert board.play(2) == StepResult.NOTHING
|
|
159
|
+
assert board.play(1) == StepResult.WIN
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def test_tie():
|
|
163
|
+
board = GameBoard(4, 1, 2)
|
|
164
|
+
assert board.play(0) == StepResult.NOTHING
|
|
165
|
+
assert board.play(1) == StepResult.NOTHING
|
|
166
|
+
assert board.play(2) == StepResult.NOTHING
|
|
167
|
+
assert board.play(3) == StepResult.TIE
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_win_diag():
|
|
171
|
+
board = GameBoard(2, 2, 2)
|
|
172
|
+
assert board.play(0) == StepResult.NOTHING
|
|
173
|
+
assert board.play(1) == StepResult.NOTHING
|
|
174
|
+
assert board.play(1) == StepResult.WIN
|
|
175
|
+
|
|
176
|
+
board.clear()
|
|
177
|
+
assert board.play(1) == StepResult.NOTHING
|
|
178
|
+
assert board.play(1) == StepResult.NOTHING
|
|
179
|
+
assert board.play(0) == StepResult.WIN
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
if __name__ == "__main__":
|
|
183
|
+
test_win()
|
|
184
|
+
test_tie()
|
|
185
|
+
test_win_diag()
|
|
186
|
+
print("All tests passed!")
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import Sequence
|
|
2
|
+
import numpy as np
|
|
3
|
+
import numpy.typing as npt
|
|
4
|
+
from marlenv import MARLEnv, MultiDiscreteSpace, Step, State, Observation, DiscreteSpace
|
|
5
|
+
|
|
6
|
+
from .board import GameBoard, StepResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConnectN(MARLEnv[MultiDiscreteSpace]):
|
|
10
|
+
def __init__(self, width: int = 7, height: int = 6, n: int = 4):
|
|
11
|
+
self.board = GameBoard(width, height, n)
|
|
12
|
+
action_space = DiscreteSpace(self.board.width).repeat(1)
|
|
13
|
+
observation_shape = (self.board.height, self.board.width)
|
|
14
|
+
state_shape = observation_shape
|
|
15
|
+
super().__init__(1, action_space, observation_shape, state_shape)
|
|
16
|
+
|
|
17
|
+
def reset(self):
|
|
18
|
+
self.board.clear()
|
|
19
|
+
return self.get_observation(), self.get_state()
|
|
20
|
+
|
|
21
|
+
def step(self, action: Sequence[int] | npt.NDArray[np.uint32]):
|
|
22
|
+
match self.board.play(action[0]):
|
|
23
|
+
case StepResult.NOTHING:
|
|
24
|
+
done = False
|
|
25
|
+
reward = 0
|
|
26
|
+
case StepResult.WIN:
|
|
27
|
+
done = True
|
|
28
|
+
reward = 1
|
|
29
|
+
case StepResult.TIE:
|
|
30
|
+
done = True
|
|
31
|
+
reward = 0
|
|
32
|
+
return Step(self.get_observation(), self.get_state(), reward, done, False)
|
|
33
|
+
|
|
34
|
+
def available_actions(self):
|
|
35
|
+
"""Full columns are not available."""
|
|
36
|
+
return np.expand_dims(self.board.valid_moves(), axis=0)
|
|
37
|
+
|
|
38
|
+
def get_observation(self):
|
|
39
|
+
return Observation(self.board.board.copy(), self.available_actions())
|
|
40
|
+
|
|
41
|
+
def get_state(self):
|
|
42
|
+
return State(self.board.board.copy(), np.array([self.board.turn]))
|
|
43
|
+
|
|
44
|
+
def set_state(self, state: State):
|
|
45
|
+
self.board.board = state.data.copy() # type: ignore Currently a type error because of the unchecked shape
|
|
46
|
+
self.board.turn = int(state.extras[0])
|
|
47
|
+
n_completed = np.count_nonzero(self.board.board, axis=0)
|
|
48
|
+
self.board.n_items_in_column = n_completed
|
|
49
|
+
|
|
50
|
+
def render(self):
|
|
51
|
+
self.board.show()
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import itertools
|
|
3
|
+
from marlenv import MARLEnv, DiscreteSpace, Observation, State, Step
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
N_ROWS = 11
|
|
7
|
+
N_COLS = 12
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CoordinatedGrid(MARLEnv):
|
|
11
|
+
"""
|
|
12
|
+
Coordinated grid world environment used in the EMC paper to test the effectiveness of the proposed method.
|
|
13
|
+
https://proceedings.neurips.cc/paper_files/paper/2021/file/1e8ca836c962598551882e689265c1c5-Paper.pdf
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
episode_limit=30,
|
|
19
|
+
time_penalty=2,
|
|
20
|
+
):
|
|
21
|
+
super().__init__(
|
|
22
|
+
n_agents=2,
|
|
23
|
+
action_space=DiscreteSpace(5, ["SOUTH", "NORTH", "WEST", "EAST", "STAY"]).repeat(2),
|
|
24
|
+
observation_shape=(N_ROWS + N_COLS,),
|
|
25
|
+
state_shape=(N_ROWS + N_COLS,) * 2,
|
|
26
|
+
)
|
|
27
|
+
self._episode_steps = 0
|
|
28
|
+
self.episode_limit = episode_limit
|
|
29
|
+
self.center = N_COLS // 2
|
|
30
|
+
###larger gridworld
|
|
31
|
+
visible_row = [i for i in range(N_ROWS // 2 - 2, N_ROWS // 2 + 3)]
|
|
32
|
+
visible_col = [i for i in range(N_COLS // 2 - 3, N_COLS // 2 + 3)]
|
|
33
|
+
self.vision_index = [[i, j] for i, j in list(itertools.product(visible_row, visible_col))]
|
|
34
|
+
self.agents_location = [[0, 0], [N_ROWS - 1, N_COLS - 1]]
|
|
35
|
+
self.time_penalty = time_penalty
|
|
36
|
+
|
|
37
|
+
def reset(self):
|
|
38
|
+
self.agents_location = [[0, 0], [N_ROWS - 1, N_COLS - 1]]
|
|
39
|
+
self._episode_steps = 0
|
|
40
|
+
return self.get_observation(), self.get_state()
|
|
41
|
+
|
|
42
|
+
def get_observation(self):
|
|
43
|
+
obs_1 = [[0 for _ in range(N_ROWS)], [0 for _ in range(N_COLS)]]
|
|
44
|
+
# obs_2 = obs_1.copy()
|
|
45
|
+
import copy
|
|
46
|
+
|
|
47
|
+
obs_2 = copy.deepcopy(obs_1)
|
|
48
|
+
|
|
49
|
+
obs_1[0][self.agents_location[0][0]] = 1
|
|
50
|
+
obs_1[1][self.agents_location[0][1]] = 1
|
|
51
|
+
obs_1 = obs_1[0] + obs_1[1]
|
|
52
|
+
|
|
53
|
+
obs_2[0][self.agents_location[1][0]] = 1
|
|
54
|
+
obs_2[1][self.agents_location[1][1]] = 1
|
|
55
|
+
obs_2 = obs_2[0] + obs_2[1]
|
|
56
|
+
|
|
57
|
+
if self.agents_location[0] in self.vision_index and self.agents_location[1] in self.vision_index:
|
|
58
|
+
temp = obs_1.copy()
|
|
59
|
+
obs_1 += obs_2.copy()
|
|
60
|
+
obs_2 += temp.copy()
|
|
61
|
+
elif self.agents_location[0] in self.vision_index:
|
|
62
|
+
obs_2 += obs_1.copy()
|
|
63
|
+
obs_1 += [0 for _ in range(N_ROWS + N_COLS)]
|
|
64
|
+
elif self.agents_location[1] in self.vision_index:
|
|
65
|
+
obs_1 += obs_2.copy()
|
|
66
|
+
obs_2 += [0 for _ in range(N_ROWS + N_COLS)]
|
|
67
|
+
else:
|
|
68
|
+
obs_2 += [0 for _ in range(N_ROWS + N_COLS)]
|
|
69
|
+
obs_1 += [0 for _ in range(N_ROWS + N_COLS)]
|
|
70
|
+
|
|
71
|
+
obs_data = np.array([obs_1, obs_2])
|
|
72
|
+
return Observation(obs_data, self.available_actions())
|
|
73
|
+
|
|
74
|
+
def get_state(self):
|
|
75
|
+
obs = self.get_observation()
|
|
76
|
+
state_data = obs.data.reshape(-1)
|
|
77
|
+
return State(state_data)
|
|
78
|
+
|
|
79
|
+
def available_actions(self):
|
|
80
|
+
avail_actions = np.full((self.n_agents, self.n_actions), True)
|
|
81
|
+
for agent_num, (y, x) in enumerate(self.agents_location):
|
|
82
|
+
if x == 0:
|
|
83
|
+
avail_actions[agent_num, 0] = 0
|
|
84
|
+
elif x == N_ROWS - 1:
|
|
85
|
+
avail_actions[agent_num, 1] = 0
|
|
86
|
+
if y == 0:
|
|
87
|
+
avail_actions[agent_num, 2] = 0
|
|
88
|
+
# Check for center line (depends on the agent number)
|
|
89
|
+
elif y == self.center + agent_num - 1:
|
|
90
|
+
avail_actions[agent_num, 3] = 0
|
|
91
|
+
return avail_actions
|
|
92
|
+
|
|
93
|
+
def step(self, action):
|
|
94
|
+
for idx, action in enumerate(action):
|
|
95
|
+
match action:
|
|
96
|
+
case 0:
|
|
97
|
+
self.agents_location[idx][0] -= 1
|
|
98
|
+
case 1:
|
|
99
|
+
self.agents_location[idx][0] += 1
|
|
100
|
+
case 2:
|
|
101
|
+
self.agents_location[idx][1] -= 1
|
|
102
|
+
case 3:
|
|
103
|
+
self.agents_location[idx][1] += 1
|
|
104
|
+
case 4:
|
|
105
|
+
pass
|
|
106
|
+
case _:
|
|
107
|
+
raise ValueError(f"Invalid action {action} for agent {idx}!")
|
|
108
|
+
|
|
109
|
+
self._episode_steps += 1
|
|
110
|
+
terminated = self._episode_steps >= self.episode_limit
|
|
111
|
+
env_info = {"battle_won": False}
|
|
112
|
+
n_arrived = self.n_agents_arrived()
|
|
113
|
+
if n_arrived == 1:
|
|
114
|
+
reward = -self.time_penalty
|
|
115
|
+
elif n_arrived == 2:
|
|
116
|
+
reward = 10
|
|
117
|
+
terminated = True
|
|
118
|
+
env_info = {"battle_won": True}
|
|
119
|
+
else:
|
|
120
|
+
reward = 0
|
|
121
|
+
return Step(self.get_observation(), self.get_state(), reward, terminated, terminated, env_info)
|
|
122
|
+
|
|
123
|
+
def n_agents_arrived(self):
|
|
124
|
+
n = 0
|
|
125
|
+
if self.agents_location[0] == [N_ROWS // 2, self.center - 1]:
|
|
126
|
+
n += 1
|
|
127
|
+
if self.agents_location[1] == [N_ROWS // 2, self.center]:
|
|
128
|
+
n += 1
|
|
129
|
+
return n
|
|
130
|
+
|
|
131
|
+
def render(self):
|
|
132
|
+
print("Agents location: ", self.agents_location)
|
|
133
|
+
for row in range(N_ROWS):
|
|
134
|
+
for col in range(N_COLS):
|
|
135
|
+
if [row, col] in self.agents_location:
|
|
136
|
+
print("X", end=" ")
|
|
137
|
+
else:
|
|
138
|
+
print(".", end=" ")
|
|
139
|
+
print()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from marlenv import MARLEnv, Observation, DiscreteSpace, State, Step
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class MatrixGame(MARLEnv):
|
|
6
|
+
"""Single step matrix game used in QTRAN, Qatten and QPLEX papers."""
|
|
7
|
+
|
|
8
|
+
N_AGENTS = 2
|
|
9
|
+
UNIT_DIM = 1
|
|
10
|
+
OBS_SHAPE = (1,)
|
|
11
|
+
STATE_SIZE = UNIT_DIM * N_AGENTS
|
|
12
|
+
|
|
13
|
+
QPLEX_PAYOFF_MATRIX = [
|
|
14
|
+
[8.0, -12.0, -12.0],
|
|
15
|
+
[-12.0, 0.0, 0.0],
|
|
16
|
+
[-12.0, 0.0, 0.0],
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
def __init__(self, payoff_matrix: list[list[float]]):
|
|
20
|
+
action_names = [chr(ord("A") + i) for i in range(len(payoff_matrix[0]))]
|
|
21
|
+
super().__init__(
|
|
22
|
+
2,
|
|
23
|
+
action_space=DiscreteSpace(len(payoff_matrix[0]), action_names).repeat(2),
|
|
24
|
+
observation_shape=MatrixGame.OBS_SHAPE,
|
|
25
|
+
state_shape=(MatrixGame.STATE_SIZE,),
|
|
26
|
+
)
|
|
27
|
+
self.current_step = 0
|
|
28
|
+
self.payoffs = payoff_matrix
|
|
29
|
+
|
|
30
|
+
def reset(self):
|
|
31
|
+
self.current_step = 0
|
|
32
|
+
return self.get_observation(), self.get_state()
|
|
33
|
+
|
|
34
|
+
def get_observation(self):
|
|
35
|
+
return Observation(
|
|
36
|
+
np.array([[self.current_step]] * MatrixGame.N_AGENTS, np.float32),
|
|
37
|
+
self.available_actions(),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def step(self, action):
|
|
41
|
+
action = list(action)
|
|
42
|
+
self.current_step += 1
|
|
43
|
+
return Step(self.get_observation(), self.get_state(), self.payoffs[action[0]][action[1]], True)
|
|
44
|
+
|
|
45
|
+
def render(self):
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
def get_state(self):
|
|
49
|
+
return State(np.zeros((MatrixGame.STATE_SIZE,), np.float32))
|
|
50
|
+
|
|
51
|
+
def seed(self, seed_value):
|
|
52
|
+
return
|
marlenv/models/observation.py
CHANGED
|
@@ -92,7 +92,7 @@ class Observation:
|
|
|
92
92
|
"""
|
|
93
93
|
Convert the observation to a tuple of tensors of shape (1, n_agents, <dim>).
|
|
94
94
|
"""
|
|
95
|
-
import torch
|
|
95
|
+
import torch # pyright: ignore[reportMissingImports]
|
|
96
96
|
|
|
97
97
|
data = torch.from_numpy(self.data).unsqueeze(0).to(device, non_blocking=True)
|
|
98
98
|
extras = torch.from_numpy(self.extras).unsqueeze(0).to(device, non_blocking=True)
|
marlenv/models/state.py
CHANGED
|
@@ -55,7 +55,7 @@ class State(Generic[StateType]):
|
|
|
55
55
|
|
|
56
56
|
def as_tensors(self, device=None):
|
|
57
57
|
"""Convert the state to a tuple of tensors of shape (1, <dim>)."""
|
|
58
|
-
import torch
|
|
58
|
+
import torch # pyright: ignore[reportMissingImports]
|
|
59
59
|
|
|
60
60
|
data = torch.from_numpy(self.data).unsqueeze(0).to(device, non_blocking=True)
|
|
61
61
|
extras = torch.from_numpy(self.extras).unsqueeze(0).to(device, non_blocking=True)
|
|
@@ -3,6 +3,8 @@ from types import SimpleNamespace
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def _raise_error(module_name: str, package_name: Optional[str] = None):
|
|
6
|
+
if package_name is None:
|
|
7
|
+
package_name = module_name
|
|
6
8
|
raise ImportError(
|
|
7
9
|
f"The optional dependency `{module_name}` is not installed.\nInstall the `{package_name}` package (e.g. pip install {package_name})."
|
|
8
10
|
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from typing import Sequence
|
|
2
|
+
from marlenv import RLEnvWrapper, MARLEnv, Space, State
|
|
3
|
+
import numpy as np
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class StateCounter[S: Space](RLEnvWrapper[S]):
|
|
9
|
+
def __init__(self, wrapped: MARLEnv[S]):
|
|
10
|
+
super().__init__(wrapped)
|
|
11
|
+
self._per_agent = [set[int]() for _ in range(self.n_agents)]
|
|
12
|
+
self._joint = set[int]()
|
|
13
|
+
|
|
14
|
+
def _register(self, state: State):
|
|
15
|
+
self._joint.add(hash(state))
|
|
16
|
+
eh = hash(state.extras.tobytes())
|
|
17
|
+
for i in range(self.n_agents):
|
|
18
|
+
agent_data = state.data[i * self.agent_state_size : (i + 1) * self.agent_state_size]
|
|
19
|
+
h = hash((agent_data.tobytes(), eh))
|
|
20
|
+
self._per_agent[i].add(h)
|
|
21
|
+
|
|
22
|
+
def step(self, action: np.ndarray | Sequence):
|
|
23
|
+
step = super().step(action)
|
|
24
|
+
self._register(step.state)
|
|
25
|
+
if step.is_terminal:
|
|
26
|
+
step.info = step.info | {
|
|
27
|
+
"joint-count": len(self._joint),
|
|
28
|
+
**{f"agent-{i}-count": len(agent_set) for i, agent_set in enumerate(self._per_agent)},
|
|
29
|
+
}
|
|
30
|
+
return step
|
|
31
|
+
|
|
32
|
+
def reset(self):
|
|
33
|
+
obs, state = super().reset()
|
|
34
|
+
self._register(state)
|
|
35
|
+
return obs, state
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multi-agent-rlenv
|
|
3
|
-
Version: 3.7.
|
|
3
|
+
Version: 3.7.2
|
|
4
4
|
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
5
|
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
6
|
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
@@ -4,25 +4,30 @@ marlenv/env_pool.py,sha256=mJhJUROX9k2A2njwnUOBl2EAuhotksQMugH_Zydg1IU,951
|
|
|
4
4
|
marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
|
|
5
5
|
marlenv/mock_env.py,sha256=rvl4QAn046HM79IMMiAj1Aoy3_GBSNBBR1_9fHPutR8,4682
|
|
6
6
|
marlenv/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
marlenv/adapters/__init__.py,sha256=
|
|
7
|
+
marlenv/adapters/__init__.py,sha256=G-PwyGDymdAMFWtCpnlHkHQuSE40Q8bv_-yQ7gVcbbM,600
|
|
8
8
|
marlenv/adapters/gym_adapter.py,sha256=DXQ1czcvRoL9hTwcVzfMyXArZeVIHP1gAKqZJO87y7Y,3065
|
|
9
|
-
marlenv/adapters/pettingzoo_adapter.py,sha256=
|
|
9
|
+
marlenv/adapters/pettingzoo_adapter.py,sha256=UzSUdP4EUJOt49AB7H45ToA8rUkGmPQgrJKegvK86og,2877
|
|
10
10
|
marlenv/adapters/pymarl_adapter.py,sha256=2s7EY31s1hrml3q-BBaXo_eDMXTjkebozZPvzsgrb9c,3353
|
|
11
|
-
marlenv/adapters/smac_adapter.py,sha256=
|
|
12
|
-
marlenv/catalog/__init__.py,sha256=
|
|
11
|
+
marlenv/adapters/smac_adapter.py,sha256=OIR0_do9KavLlZ2f1YQNJwhl_yLCa6SVvCrp78hwU20,8279
|
|
12
|
+
marlenv/catalog/__init__.py,sha256=l9_lvqpV2wKKMYDrStbW93WGEBDhGw6KjgbZsOcLKx0,570
|
|
13
|
+
marlenv/catalog/coordinated_grid.py,sha256=Kq5UzG9rr5gYRO0QWFCmKmO56JIzgIR19an9_pvypJU,4997
|
|
13
14
|
marlenv/catalog/deepsea.py,sha256=yTyvskWZiAZem11L8cZwHedBIDQ4EAxE2IaUKrjKL2U,2413
|
|
15
|
+
marlenv/catalog/matrix_game.py,sha256=zkErnh6ZIa1kBryYMVLw-jeMCd2AJ-BlP2yROxpbb0w,1519
|
|
14
16
|
marlenv/catalog/two_steps.py,sha256=lI-q4-Q8283QZTjY0wk7OfXWB6Ln-lquYUjHyT4URi4,2970
|
|
17
|
+
marlenv/catalog/connectn/__init__.py,sha256=BKfM0ZofMK6zqGURi2bzILyNFfYjfbZpKTs5ikKiJAk,195
|
|
18
|
+
marlenv/catalog/connectn/board.py,sha256=GVcFA1OJgLUmQoTIfOO9M7nL9dFv-4T3tGrVsP15zyg,6124
|
|
19
|
+
marlenv/catalog/connectn/env.py,sha256=Ot5vfAbzS6eRe3-nLW_AkhEH7F1WVvv4_odoxZU7HNg,1905
|
|
15
20
|
marlenv/models/__init__.py,sha256=uihmRs71Gg5z7Bvau_xtaQVg7xEtX8sTzi74bIHL5P0,443
|
|
16
21
|
marlenv/models/env.py,sha256=BG1iVHxGD_p827mF0ewyOBn6wU2gtFsHLW1b4UtW-V0,7841
|
|
17
22
|
marlenv/models/episode.py,sha256=zsyxsW4LIioPKyY4DZKn64A31e5ZvlwOf3HIGuRUzhs,13531
|
|
18
|
-
marlenv/models/observation.py,sha256=
|
|
23
|
+
marlenv/models/observation.py,sha256=6uY2h0zHBm6g1ECzD8jZLXuSzuuX-U60QW0E_b4qPuc,3569
|
|
19
24
|
marlenv/models/spaces.py,sha256=d_aIPWwPdaOWZeNRUUdzSiDxs9XQb9itPnrE_EyhhfQ,7810
|
|
20
|
-
marlenv/models/state.py,sha256=
|
|
25
|
+
marlenv/models/state.py,sha256=JvCXwf0l7L2UMHkvYp-WM_aDegJ-hePpQI2yiUw6X_g,2099
|
|
21
26
|
marlenv/models/step.py,sha256=00PhD_ccdCIYAY1SVJdJU91weU0Y_tNIJwK16TN_53I,3056
|
|
22
27
|
marlenv/models/transition.py,sha256=UkJVRNxZoyRkjE7YmKtUf_4xA7cOEh20O60dTldbvys,5070
|
|
23
28
|
marlenv/utils/__init__.py,sha256=ky5mz_T7EF65YNaEN1UDCUYZVlz7hFyKResgIJlE_1Q,462
|
|
24
29
|
marlenv/utils/cached_property_collector.py,sha256=IOjbr61f0DqLhcidXKrl7MhN1BOEGiTzCANIKQCxaF0,600
|
|
25
|
-
marlenv/utils/import_placeholders.py,sha256=
|
|
30
|
+
marlenv/utils/import_placeholders.py,sha256=nNcOGHSsBVmcUXRALJD7YBg5WzX6vTQGTfsl8LnMQgA,1036
|
|
26
31
|
marlenv/utils/schedule.py,sha256=4S0V0RyYHuReVafeHnpfvSLf3oF0buAzD09qMFfexa0,9133
|
|
27
32
|
marlenv/wrappers/__init__.py,sha256=Z4_M-mxRNKQeu52tkmQ4B2m3-zrsmjfXXL5NsWQ4vu4,952
|
|
28
33
|
marlenv/wrappers/action_randomizer.py,sha256=A1kejqGOTA0sc_RQL0EOd6sMSbcIdiV5zlscjKUlzdY,474
|
|
@@ -37,9 +42,10 @@ marlenv/wrappers/paddings.py,sha256=DlYYnPbRPTBcpYuuUQPTlU0ZT4j26WmUIrJP-B5RF44,
|
|
|
37
42
|
marlenv/wrappers/penalty_wrapper.py,sha256=3YBoUV6ETksZ8tFEOq1WYXvPs3ejMAehE6-QA8e4JOE,864
|
|
38
43
|
marlenv/wrappers/potential_shaping.py,sha256=9-CtYe1uGcPTfbc-jx5jHPbgjZ2DpwuyNjmOyhaUvKQ,1659
|
|
39
44
|
marlenv/wrappers/rlenv_wrapper.py,sha256=iFSQsDMkUUbQJKEO8l6SosNi-eOUVSh4pIJVu7aM8Qo,2991
|
|
45
|
+
marlenv/wrappers/state_counter.py,sha256=QmEMb55vOnK-VJuvKsDIIBgcNRsHuovqgpK2pcCY7sA,1211
|
|
40
46
|
marlenv/wrappers/time_limit.py,sha256=HctKeiepPQ2NAIa208SnvknioSkRIuUQ4X-Xhf_XTs0,3974
|
|
41
47
|
marlenv/wrappers/video_recorder.py,sha256=mtWcqaYNCu-zjVXvpa8DJe3_062tpK_TChOu-Xyxs3s,2533
|
|
42
|
-
multi_agent_rlenv-3.7.
|
|
43
|
-
multi_agent_rlenv-3.7.
|
|
44
|
-
multi_agent_rlenv-3.7.
|
|
45
|
-
multi_agent_rlenv-3.7.
|
|
48
|
+
multi_agent_rlenv-3.7.2.dist-info/METADATA,sha256=loGAqI0-dvtGJEM6txWjaP00TImesivli2RI0Pd2OK0,5751
|
|
49
|
+
multi_agent_rlenv-3.7.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
50
|
+
multi_agent_rlenv-3.7.2.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
|
|
51
|
+
multi_agent_rlenv-3.7.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|