multi-agent-rlenv 3.3.6__tar.gz → 3.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/PKG-INFO +1 -1
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/__init__.py +1 -1
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/overcooked_adapter.py +34 -3
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/test_adapters.py +33 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/.github/workflows/ci.yaml +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/.github/workflows/docs.yaml +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/.gitignore +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/LICENSE +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/README.md +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/pyproject.toml +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/__init__.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/gym_adapter.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/pettingzoo_adapter.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/pymarl_adapter.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/smac_adapter.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/env_builder.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/env_pool.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/exceptions.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/mock_env.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/__init__.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/env.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/episode.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/observation.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/spaces.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/state.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/step.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/models/transition.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/py.typed +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/__init__.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/available_actions_mask.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/blind_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/centralised.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/delayed_rewards.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/paddings.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/rlenv_wrapper.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/time_limit.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/video_recorder.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/__init__.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/test_episode.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/test_models.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/test_pool.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/test_serialization.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/test_spaces.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/test_wrappers.py +0 -0
- {multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multi-agent-rlenv
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.7
|
|
4
4
|
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
5
|
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
6
|
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
|
|
|
62
62
|
If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
__version__ = "3.3.
|
|
65
|
+
__version__ = "3.3.7"
|
|
66
66
|
|
|
67
67
|
from . import models
|
|
68
68
|
from . import wrappers
|
{multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/overcooked_adapter.py
RENAMED
|
@@ -2,6 +2,7 @@ import sys
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from typing import Literal, Sequence
|
|
4
4
|
from copy import deepcopy
|
|
5
|
+
from time import time
|
|
5
6
|
|
|
6
7
|
import cv2
|
|
7
8
|
import numpy as np
|
|
@@ -17,8 +18,10 @@ from overcooked_ai_py.visualization.state_visualizer import StateVisualizer
|
|
|
17
18
|
@dataclass
|
|
18
19
|
class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
19
20
|
horizon: int
|
|
21
|
+
reward_shaping: bool
|
|
20
22
|
|
|
21
|
-
def __init__(self, oenv: OvercookedEnv):
|
|
23
|
+
def __init__(self, oenv: OvercookedEnv, reward_shaping: bool = True):
|
|
24
|
+
self.reward_shaping = reward_shaping
|
|
22
25
|
self._oenv = oenv
|
|
23
26
|
assert isinstance(oenv.mdp, OvercookedGridworld)
|
|
24
27
|
self._mdp = oenv.mdp
|
|
@@ -86,10 +89,12 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
86
89
|
def step(self, actions: Sequence[int] | npt.NDArray[np.int32 | np.int64]) -> Step:
|
|
87
90
|
actions = [Action.ALL_ACTIONS[a] for a in actions]
|
|
88
91
|
_, reward, done, info = self._oenv.step(actions, display_phi=True)
|
|
92
|
+
if self.reward_shaping:
|
|
93
|
+
reward += sum(info["shaped_r_by_agent"])
|
|
89
94
|
return Step(
|
|
90
95
|
obs=self.get_observation(),
|
|
91
96
|
state=self.get_state(),
|
|
92
|
-
reward=np.array([reward]),
|
|
97
|
+
reward=np.array([reward], dtype=np.float32),
|
|
93
98
|
done=done,
|
|
94
99
|
truncated=False,
|
|
95
100
|
info=info,
|
|
@@ -185,6 +190,32 @@ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
|
185
190
|
"you_shall_not_pass",
|
|
186
191
|
],
|
|
187
192
|
horizon: int = 400,
|
|
193
|
+
reward_shaping: bool = True,
|
|
188
194
|
):
|
|
189
195
|
mdp = OvercookedGridworld.from_layout_name(layout)
|
|
190
|
-
return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=horizon))
|
|
196
|
+
return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=horizon), reward_shaping=reward_shaping)
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def from_grid(
|
|
200
|
+
grid: Sequence[Sequence[Literal["S", "P", "X", "O", "D", "T", "1", "2", " "] | str]],
|
|
201
|
+
horizon: int = 400,
|
|
202
|
+
reward_shaping: bool = True,
|
|
203
|
+
):
|
|
204
|
+
"""
|
|
205
|
+
Create an Overcooked environment from a grid layout where
|
|
206
|
+
- S is a serving location
|
|
207
|
+
- P is a cooking pot
|
|
208
|
+
- X is a counter
|
|
209
|
+
- O is an onion dispenser
|
|
210
|
+
- D is a dish dispenser
|
|
211
|
+
- T is a tomato dispenser
|
|
212
|
+
- 1 is a player 1 starting location
|
|
213
|
+
- 2 is a player 2 starting location
|
|
214
|
+
- ' ' is a walkable space
|
|
215
|
+
"""
|
|
216
|
+
# It is necessary to add an explicit layout name because Overcooked saves some files under this
|
|
217
|
+
# name. By default the name is a concatenation of the grid elements, which may include characters
|
|
218
|
+
# such as white spaces, pipes ('|') and square brackets ('[' and ']') that are invalid Windows file paths.
|
|
219
|
+
layout_name = str(time())
|
|
220
|
+
mdp = OvercookedGridworld.from_grid(grid, base_layout_params={"layout_name": layout_name})
|
|
221
|
+
return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=horizon), reward_shaping=reward_shaping)
|
|
@@ -183,6 +183,39 @@ def test_overcooked_obs_state():
|
|
|
183
183
|
assert step.done
|
|
184
184
|
|
|
185
185
|
|
|
186
|
+
@pytest.mark.skipif(skip_overcooked, reason="Overcooked is not installed")
|
|
187
|
+
def test_overcooked_shaping():
|
|
188
|
+
from marlenv.adapters import Overcooked
|
|
189
|
+
|
|
190
|
+
UP = 0
|
|
191
|
+
DOWN = 1
|
|
192
|
+
RIGHT = 2
|
|
193
|
+
LEFT = 3
|
|
194
|
+
STAY = 4
|
|
195
|
+
INTERACT = 5
|
|
196
|
+
grid = [
|
|
197
|
+
["X", "X", "X", "D", "X"],
|
|
198
|
+
["X", "O", "S", "2", "X"],
|
|
199
|
+
["X", "1", "P", " ", "X"],
|
|
200
|
+
["X", "T", "S", " ", "X"],
|
|
201
|
+
["X", "X", "X", "X", "X"],
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
env = Overcooked.from_grid(grid, reward_shaping=True)
|
|
205
|
+
env.reset()
|
|
206
|
+
actions_rewards = [
|
|
207
|
+
([UP, STAY], False),
|
|
208
|
+
([INTERACT, STAY], False),
|
|
209
|
+
([RIGHT, STAY], False),
|
|
210
|
+
([INTERACT, STAY], True),
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
for action, expected_reward in actions_rewards:
|
|
214
|
+
step = env.step(action)
|
|
215
|
+
if expected_reward:
|
|
216
|
+
assert step.reward.item() > 0
|
|
217
|
+
|
|
218
|
+
|
|
186
219
|
def test_pymarl():
|
|
187
220
|
LIMIT = 20
|
|
188
221
|
N_AGENTS = 2
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/adapters/pettingzoo_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/agent_id_wrapper.py
RENAMED
|
File without changes
|
{multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/available_actions_mask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.3.6 → multi_agent_rlenv-3.3.7}/src/marlenv/wrappers/last_action_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|