multi-agent-rlenv 3.2.2__tar.gz → 3.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/.github/workflows/ci.yaml +5 -5
- multi_agent_rlenv-3.3.1/.github/workflows/docs.yaml +58 -0
- multi_agent_rlenv-3.2.2/README.md → multi_agent_rlenv-3.3.1/PKG-INFO +59 -2
- multi_agent_rlenv-3.2.2/PKG-INFO → multi_agent_rlenv-3.3.1/README.md +24 -17
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/pyproject.toml +22 -2
- multi_agent_rlenv-3.3.1/src/marlenv/__init__.py +111 -0
- multi_agent_rlenv-3.3.1/src/marlenv/adapters/__init__.py +42 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/gym_adapter.py +6 -1
- multi_agent_rlenv-3.3.1/src/marlenv/adapters/overcooked_adapter.py +164 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/env_builder.py +31 -49
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/env_pool.py +0 -1
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/mock_env.py +15 -5
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/env.py +46 -11
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/observation.py +6 -1
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/spaces.py +49 -10
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/__init__.py +2 -0
- multi_agent_rlenv-3.3.1/src/marlenv/wrappers/delayed_rewards.py +36 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_adapters.py +56 -24
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_serialization.py +7 -1
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_spaces.py +4 -4
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_wrappers.py +32 -2
- multi_agent_rlenv-3.2.2/src/marlenv/__init__.py +0 -62
- multi_agent_rlenv-3.2.2/src/marlenv/adapters/__init__.py +0 -24
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/.gitignore +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/LICENSE +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/pettingzoo_adapter.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/pymarl_adapter.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/smac_adapter.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/exceptions.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/__init__.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/episode.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/state.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/step.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/transition.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/py.typed +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/available_actions_mask.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/blind_wrapper.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/centralised.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/paddings.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/rlenv_wrapper.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/time_limit.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/video_recorder.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/__init__.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_episode.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_models.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_pool.py +0 -0
- {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/utils.py +0 -0
|
@@ -21,8 +21,8 @@ jobs:
|
|
|
21
21
|
matrix:
|
|
22
22
|
os:
|
|
23
23
|
- ubuntu-latest
|
|
24
|
-
- windows-latest
|
|
25
24
|
- macOS-latest
|
|
25
|
+
- windows-latest
|
|
26
26
|
target:
|
|
27
27
|
- x86_64
|
|
28
28
|
- aarch64
|
|
@@ -43,16 +43,16 @@ jobs:
|
|
|
43
43
|
- name: Install uv
|
|
44
44
|
uses: yezz123/setup-uv@v4
|
|
45
45
|
with:
|
|
46
|
-
uv-version: 0.
|
|
46
|
+
uv-version: 0.6.4
|
|
47
47
|
- name: Install dependencies and run pytest
|
|
48
48
|
run: |
|
|
49
|
-
uv sync
|
|
49
|
+
uv sync --extra overcooked --extra gym --extra pettingzoo
|
|
50
50
|
uv run pytest
|
|
51
51
|
|
|
52
52
|
build:
|
|
53
53
|
name: 📦 Build package
|
|
54
54
|
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
|
|
55
|
-
needs: test
|
|
55
|
+
needs: [test]
|
|
56
56
|
runs-on: ubuntu-latest
|
|
57
57
|
steps:
|
|
58
58
|
- uses: actions/checkout@v4
|
|
@@ -63,7 +63,7 @@ jobs:
|
|
|
63
63
|
- name: Install UV
|
|
64
64
|
uses: yezz123/setup-uv@v4
|
|
65
65
|
with:
|
|
66
|
-
uv-version: 0.
|
|
66
|
+
uv-version: 0.6.4
|
|
67
67
|
- name: Build wheels
|
|
68
68
|
run: |
|
|
69
69
|
uv venv
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Simple workflow for deploying static content to GitHub Pages
|
|
2
|
+
name: Deploy static content to Pages
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
# Runs on pushes targeting the default branch
|
|
6
|
+
push:
|
|
7
|
+
branches: ["main", "master"]
|
|
8
|
+
# Only deploy the documentation on new version tags
|
|
9
|
+
tags:
|
|
10
|
+
- 'v[0-9]+.[0-9]+.[0-9]+'
|
|
11
|
+
|
|
12
|
+
# Allows you to run this workflow manually from the Actions tab
|
|
13
|
+
workflow_dispatch:
|
|
14
|
+
|
|
15
|
+
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
|
|
16
|
+
permissions:
|
|
17
|
+
contents: read
|
|
18
|
+
pages: write
|
|
19
|
+
id-token: write
|
|
20
|
+
|
|
21
|
+
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
|
|
22
|
+
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
|
|
23
|
+
concurrency:
|
|
24
|
+
group: "pages"
|
|
25
|
+
cancel-in-progress: false
|
|
26
|
+
|
|
27
|
+
jobs:
|
|
28
|
+
# Single deploy job since we're just deploying
|
|
29
|
+
deploy:
|
|
30
|
+
name: 📚 Build docs
|
|
31
|
+
environment:
|
|
32
|
+
name: github-pages
|
|
33
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
steps:
|
|
36
|
+
- name: Checkout
|
|
37
|
+
uses: actions/checkout@v4
|
|
38
|
+
- name: Setup Pages
|
|
39
|
+
uses: actions/configure-pages@v5
|
|
40
|
+
- uses: actions/setup-python@v5
|
|
41
|
+
with:
|
|
42
|
+
python-version: 3.12
|
|
43
|
+
- name: Install UV
|
|
44
|
+
uses: yezz123/setup-uv@v4
|
|
45
|
+
with:
|
|
46
|
+
uv-version: 0.6.4
|
|
47
|
+
- name: Install dependencies
|
|
48
|
+
run: |
|
|
49
|
+
uv sync
|
|
50
|
+
uv run pdoc python/marlenv -o ./docs --docformat=google
|
|
51
|
+
- name: Upload artifact
|
|
52
|
+
uses: actions/upload-pages-artifact@v3
|
|
53
|
+
with:
|
|
54
|
+
# Upload entire repository
|
|
55
|
+
path: './docs'
|
|
56
|
+
- name: Deploy to GitHub Pages
|
|
57
|
+
id: deployment
|
|
58
|
+
uses: actions/deploy-pages@v4
|
|
@@ -1,8 +1,65 @@
|
|
|
1
|
-
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: multi-agent-rlenv
|
|
3
|
+
Version: 3.3.1
|
|
4
|
+
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
|
+
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
|
+
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Requires-Python: <4,>=3.10
|
|
11
|
+
Requires-Dist: numpy>=2.0.0
|
|
12
|
+
Requires-Dist: opencv-python>=4.0
|
|
13
|
+
Requires-Dist: typing-extensions>=4.0
|
|
14
|
+
Provides-Extra: all
|
|
15
|
+
Requires-Dist: gymnasium>0.29.1; extra == 'all'
|
|
16
|
+
Requires-Dist: overcooked-ai; extra == 'all'
|
|
17
|
+
Requires-Dist: pettingzoo>=1.20; extra == 'all'
|
|
18
|
+
Requires-Dist: pymunk>=6.0; extra == 'all'
|
|
19
|
+
Requires-Dist: pysc2; extra == 'all'
|
|
20
|
+
Requires-Dist: scipy>=1.10; extra == 'all'
|
|
21
|
+
Requires-Dist: smac; extra == 'all'
|
|
22
|
+
Provides-Extra: gym
|
|
23
|
+
Requires-Dist: gymnasium>=0.29.1; extra == 'gym'
|
|
24
|
+
Provides-Extra: overcooked
|
|
25
|
+
Requires-Dist: overcooked-ai>=1.1.0; extra == 'overcooked'
|
|
26
|
+
Requires-Dist: scipy>=1.10; extra == 'overcooked'
|
|
27
|
+
Provides-Extra: pettingzoo
|
|
28
|
+
Requires-Dist: pettingzoo>=1.20; extra == 'pettingzoo'
|
|
29
|
+
Requires-Dist: pymunk>=6.0; extra == 'pettingzoo'
|
|
30
|
+
Requires-Dist: scipy>=1.10; extra == 'pettingzoo'
|
|
31
|
+
Provides-Extra: smac
|
|
32
|
+
Requires-Dist: pysc2; extra == 'smac'
|
|
33
|
+
Requires-Dist: smac; extra == 'smac'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# `marlenv` - A unified framework for muti-agent reinforcement learning
|
|
37
|
+
**Documentation: [https://yamoling.github.io/multi-agent-rlenv](https://yamoling.github.io/multi-agent-rlenv)**
|
|
38
|
+
|
|
2
39
|
The objective of `marlenv` is to provide a common (typed) interface for many different reinforcement learning environments.
|
|
3
40
|
|
|
4
41
|
As such, `marlenv` provides high level abstractions of RL concepts such as `Observation`s or `Transition`s that are commonly represented as mere (confusing) lists or tuples.
|
|
5
42
|
|
|
43
|
+
## Installation
|
|
44
|
+
Install with you preferred package manager (`uv`, `pip`, `poetry`, ...):
|
|
45
|
+
```bash
|
|
46
|
+
$ pip install marlenv[all] # Enable all features
|
|
47
|
+
$ pip install marlenv # Basic installation
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
There are multiple optional dependencies if you want to support specific libraries and environments. Available options are:
|
|
51
|
+
- `smac` for StarCraft II environments
|
|
52
|
+
- `gym` for OpenAI Gym environments
|
|
53
|
+
- `pettingzoo` for PettingZoo environments
|
|
54
|
+
- `overcooked` for Overcooked environments
|
|
55
|
+
|
|
56
|
+
Install them with:
|
|
57
|
+
```bash
|
|
58
|
+
$ pip install marlenv[smac] # Install SMAC
|
|
59
|
+
$ pip install marlenv[gym,smac] # Install Gym & smac support
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
|
|
6
63
|
## Using `marlenv` with existing libraries
|
|
7
64
|
`marlenv` unifies multiple popular libraries under a single interface. Namely, `marlenv` supports `smac`, `gymnasium` and `pettingzoo`.
|
|
8
65
|
|
|
@@ -32,7 +89,7 @@ from marlenv import RLEnv, DiscreteActionSpace, Observation
|
|
|
32
89
|
N_AGENTS = 3
|
|
33
90
|
N_ACTIONS = 5
|
|
34
91
|
|
|
35
|
-
class CustomEnv(
|
|
92
|
+
class CustomEnv(MARLEnv[DiscreteActionSpace]):
|
|
36
93
|
def __init__(self, width: int, height: int):
|
|
37
94
|
super().__init__(
|
|
38
95
|
action_space=DiscreteActionSpace(N_AGENTS, N_ACTIONS),
|
|
@@ -1,23 +1,30 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
|
-
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
|
-
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
7
|
-
License-File: LICENSE
|
|
8
|
-
Classifier: Operating System :: OS Independent
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Requires-Python: <4,>=3.10
|
|
11
|
-
Requires-Dist: gymnasium>=0.29.1
|
|
12
|
-
Requires-Dist: numpy>=2.0.0
|
|
13
|
-
Requires-Dist: opencv-python>=4.10.0.84
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
|
|
16
|
-
# `marlenv` - A unified interface for muti-agent reinforcement learning
|
|
1
|
+
# `marlenv` - A unified framework for muti-agent reinforcement learning
|
|
2
|
+
**Documentation: [https://yamoling.github.io/multi-agent-rlenv](https://yamoling.github.io/multi-agent-rlenv)**
|
|
3
|
+
|
|
17
4
|
The objective of `marlenv` is to provide a common (typed) interface for many different reinforcement learning environments.
|
|
18
5
|
|
|
19
6
|
As such, `marlenv` provides high level abstractions of RL concepts such as `Observation`s or `Transition`s that are commonly represented as mere (confusing) lists or tuples.
|
|
20
7
|
|
|
8
|
+
## Installation
|
|
9
|
+
Install with you preferred package manager (`uv`, `pip`, `poetry`, ...):
|
|
10
|
+
```bash
|
|
11
|
+
$ pip install marlenv[all] # Enable all features
|
|
12
|
+
$ pip install marlenv # Basic installation
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
There are multiple optional dependencies if you want to support specific libraries and environments. Available options are:
|
|
16
|
+
- `smac` for StarCraft II environments
|
|
17
|
+
- `gym` for OpenAI Gym environments
|
|
18
|
+
- `pettingzoo` for PettingZoo environments
|
|
19
|
+
- `overcooked` for Overcooked environments
|
|
20
|
+
|
|
21
|
+
Install them with:
|
|
22
|
+
```bash
|
|
23
|
+
$ pip install marlenv[smac] # Install SMAC
|
|
24
|
+
$ pip install marlenv[gym,smac] # Install Gym & smac support
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
|
|
21
28
|
## Using `marlenv` with existing libraries
|
|
22
29
|
`marlenv` unifies multiple popular libraries under a single interface. Namely, `marlenv` supports `smac`, `gymnasium` and `pettingzoo`.
|
|
23
30
|
|
|
@@ -47,7 +54,7 @@ from marlenv import RLEnv, DiscreteActionSpace, Observation
|
|
|
47
54
|
N_AGENTS = 3
|
|
48
55
|
N_ACTIONS = 5
|
|
49
56
|
|
|
50
|
-
class CustomEnv(
|
|
57
|
+
class CustomEnv(MARLEnv[DiscreteActionSpace]):
|
|
51
58
|
def __init__(self, width: int, height: int):
|
|
52
59
|
super().__init__(
|
|
53
60
|
action_space=DiscreteActionSpace(N_AGENTS, N_ACTIONS),
|
|
@@ -7,13 +7,28 @@ authors = [
|
|
|
7
7
|
]
|
|
8
8
|
readme = "README.md"
|
|
9
9
|
requires-python = ">=3.10, <4"
|
|
10
|
-
dependencies = ["numpy>=2.0.0", "opencv-python>=4.10.0.84", "gymnasium>=0.29.1"]
|
|
11
10
|
urls = { "repository" = "https://github.com/yamoling/multi-agent-rlenv" }
|
|
12
11
|
classifiers = [
|
|
13
12
|
"Programming Language :: Python :: 3",
|
|
14
13
|
"Operating System :: OS Independent",
|
|
15
14
|
]
|
|
16
15
|
|
|
16
|
+
dependencies = ["numpy>=2.0.0", "opencv-python>=4.0", "typing_extensions>=4.0"]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
gym = ["gymnasium>=0.29.1"]
|
|
20
|
+
smac = ["smac", "pysc2"]
|
|
21
|
+
pettingzoo = ["pettingzoo>=1.20", "pymunk>=6.0", "scipy>=1.10"]
|
|
22
|
+
overcooked = ["overcooked-ai>=1.1.0", "scipy>=1.10"]
|
|
23
|
+
all = [
|
|
24
|
+
"gymnasium>0.29.1",
|
|
25
|
+
"pettingzoo>=1.20",
|
|
26
|
+
"overcooked-ai",
|
|
27
|
+
"smac",
|
|
28
|
+
"pysc2",
|
|
29
|
+
"pymunk>=6.0",
|
|
30
|
+
"scipy>=1.10",
|
|
31
|
+
]
|
|
17
32
|
|
|
18
33
|
[build-system]
|
|
19
34
|
requires = ["hatchling"]
|
|
@@ -35,5 +50,10 @@ pythonpath = "src"
|
|
|
35
50
|
# Ignore deprecation warnings caused by SMAC
|
|
36
51
|
filterwarnings = "ignore::DeprecationWarning"
|
|
37
52
|
|
|
53
|
+
[tool.uv.sources]
|
|
54
|
+
smac = { git = "https://github.com/oxwhirl/smac.git" }
|
|
55
|
+
pysc2 = { git = "https://github.com/google-deepmind/pysc2.git" }
|
|
56
|
+
|
|
57
|
+
|
|
38
58
|
[dependency-groups]
|
|
39
|
-
dev = ["orjson>=3.10.12", "pytest>=8.3.2"]
|
|
59
|
+
dev = ["orjson>=3.10.12", "pdoc>=15.0.1", "pytest>=8.3.2"]
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
`marlenv` is a strongly typed library for multi-agent and multi-objective reinforcement learning.
|
|
3
|
+
|
|
4
|
+
It aims to provide a simple and consistent interface for reinforcement learning environments by providing abstraction models such as `Observation`s or `Episode`s. `marlenv` provides adapters for popular libraries such as `gym` or `pettingzoo` and provides utility wrappers to add functionalities such as video recording or limiting the number of steps.
|
|
5
|
+
|
|
6
|
+
Almost every class is a dataclassto enable seemless serialiation with the `orjson` library.
|
|
7
|
+
|
|
8
|
+
# Existing environments
|
|
9
|
+
The `MARLEnv` class represents a multi-agent RL environment and is at the center of this library, and `marlenv` provides an adapted implementation of multiple common MARL environments (gym, pettingzoo, smac and overcooked) in `marlenv.adapters`. Note that these adapters will only work if you have the corresponding library installed.
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from marlenv.adapters import Gym, PettingZoo, SMAC, Overcooked
|
|
13
|
+
import marlenv
|
|
14
|
+
|
|
15
|
+
env1 = Gym("CartPole-v1")
|
|
16
|
+
env2 = marlenv.make("CartPole-v1")
|
|
17
|
+
env3 = PettingZoo("prospector_v4")
|
|
18
|
+
env4 = SMAC("3m")
|
|
19
|
+
env5 = Overcooked.from_layout("cramped_room")
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
# Wrappers & Builder
|
|
23
|
+
To facilitate the create of an environment with common wrappers, `marlenv` provides a `Builder` class that can be used to chain the creation of multiple wrappers.
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from marlenv import make, Builder
|
|
27
|
+
|
|
28
|
+
env = <your env>
|
|
29
|
+
env = Builder(env).agent_id().time_limit(50).record("videos").build()
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
# Using the library
|
|
33
|
+
A typical environment loop would look like this:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from marlenv import DiscreteMockEnv, Builder, Episode
|
|
37
|
+
|
|
38
|
+
env = Builder(DicreteMockEnv()).agent_id().build()
|
|
39
|
+
obs, state = env.reset()
|
|
40
|
+
terminated = False
|
|
41
|
+
episode = Episode.new(obs, state)
|
|
42
|
+
while not episode.is_finished:
|
|
43
|
+
action = env.sample_action() # a valid random action
|
|
44
|
+
step = env.step(action) # Step data `step.obs`, `step.reward`, ...
|
|
45
|
+
episode.add(step, action) # Progressively build the episode
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
# Extras
|
|
49
|
+
To cope with complex observation spaces, `marlenv` distinguishes the "main" observation data from the "extra" observation data. A typical example would be the observation of a gridworld environment with a time limit. In that case, the main observation has shape (height, width), i.e. the content of the grid, but the current time is an extra observation data of shape (1, ).
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
env = GridWorldEnv()
|
|
53
|
+
print(env.observation_shape) # (height, width)
|
|
54
|
+
print(env.extras_shape) # (0, )
|
|
55
|
+
|
|
56
|
+
env = Builder(env).time_limit(25).build()
|
|
57
|
+
print(env.observation_shape) # (height, width)
|
|
58
|
+
print(env.extras_shape) # (1, )
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
# Creating a new environment
|
|
62
|
+
If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
__version__ = "3.3.1"
|
|
66
|
+
|
|
67
|
+
from . import models
|
|
68
|
+
from . import wrappers
|
|
69
|
+
from . import adapters
|
|
70
|
+
from .models import spaces
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
from .env_builder import make, Builder
|
|
74
|
+
from .models import (
|
|
75
|
+
MARLEnv,
|
|
76
|
+
State,
|
|
77
|
+
Step,
|
|
78
|
+
Observation,
|
|
79
|
+
Episode,
|
|
80
|
+
Transition,
|
|
81
|
+
DiscreteSpace,
|
|
82
|
+
ContinuousSpace,
|
|
83
|
+
ActionSpace,
|
|
84
|
+
DiscreteActionSpace,
|
|
85
|
+
ContinuousActionSpace,
|
|
86
|
+
)
|
|
87
|
+
from .wrappers import RLEnvWrapper
|
|
88
|
+
from .mock_env import DiscreteMockEnv, DiscreteMOMockEnv
|
|
89
|
+
|
|
90
|
+
__all__ = [
|
|
91
|
+
"models",
|
|
92
|
+
"wrappers",
|
|
93
|
+
"adapters",
|
|
94
|
+
"spaces",
|
|
95
|
+
"make",
|
|
96
|
+
"Builder",
|
|
97
|
+
"MARLEnv",
|
|
98
|
+
"Step",
|
|
99
|
+
"State",
|
|
100
|
+
"Observation",
|
|
101
|
+
"Episode",
|
|
102
|
+
"Transition",
|
|
103
|
+
"ActionSpace",
|
|
104
|
+
"DiscreteSpace",
|
|
105
|
+
"ContinuousSpace",
|
|
106
|
+
"DiscreteActionSpace",
|
|
107
|
+
"ContinuousActionSpace",
|
|
108
|
+
"DiscreteMockEnv",
|
|
109
|
+
"DiscreteMOMockEnv",
|
|
110
|
+
"RLEnvWrapper",
|
|
111
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from importlib.util import find_spec
|
|
2
|
+
from .pymarl_adapter import PymarlAdapter
|
|
3
|
+
|
|
4
|
+
HAS_GYM = False
|
|
5
|
+
if find_spec("gymnasium") is not None:
|
|
6
|
+
from .gym_adapter import Gym
|
|
7
|
+
|
|
8
|
+
HAS_GYM = True
|
|
9
|
+
|
|
10
|
+
HAS_PETTINGZOO = False
|
|
11
|
+
if find_spec("pettingzoo") is not None:
|
|
12
|
+
from .pettingzoo_adapter import PettingZoo
|
|
13
|
+
|
|
14
|
+
HAS_PETTINGZOO = True
|
|
15
|
+
|
|
16
|
+
HAS_SMAC = False
|
|
17
|
+
if find_spec("smac") is not None:
|
|
18
|
+
from .smac_adapter import SMAC
|
|
19
|
+
|
|
20
|
+
HAS_SMAC = True
|
|
21
|
+
|
|
22
|
+
HAS_OVERCOOKED = False
|
|
23
|
+
if find_spec("overcooked_ai_py.mdp") is not None:
|
|
24
|
+
import numpy
|
|
25
|
+
|
|
26
|
+
# Overcooked assumes a version of numpy <2.0 where np.Inf is available.
|
|
27
|
+
setattr(numpy, "Inf", numpy.inf)
|
|
28
|
+
from .overcooked_adapter import Overcooked
|
|
29
|
+
|
|
30
|
+
HAS_OVERCOOKED = True
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"PymarlAdapter",
|
|
34
|
+
"Gym",
|
|
35
|
+
"PettingZoo",
|
|
36
|
+
"SMAC",
|
|
37
|
+
"Overcooked",
|
|
38
|
+
"HAS_GYM",
|
|
39
|
+
"HAS_PETTINGZOO",
|
|
40
|
+
"HAS_SMAC",
|
|
41
|
+
"HAS_OVERCOOKED",
|
|
42
|
+
]
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import cv2
|
|
1
3
|
from dataclasses import dataclass
|
|
2
4
|
from typing import Sequence
|
|
3
5
|
|
|
@@ -79,7 +81,10 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
|
|
|
79
81
|
return self.last_obs, self.get_state()
|
|
80
82
|
|
|
81
83
|
def get_image(self):
|
|
82
|
-
|
|
84
|
+
image = np.array(self.env.render())
|
|
85
|
+
if sys.platform in ("linux", "linux2"):
|
|
86
|
+
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
87
|
+
return image
|
|
83
88
|
|
|
84
89
|
def seed(self, seed_value: int):
|
|
85
90
|
self.env.reset(seed=seed_value)
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Literal, Sequence
|
|
4
|
+
|
|
5
|
+
import cv2
|
|
6
|
+
import numpy as np
|
|
7
|
+
import numpy.typing as npt
|
|
8
|
+
import pygame
|
|
9
|
+
from marlenv.models import ContinuousSpace, DiscreteActionSpace, MARLEnv, Observation, State, Step
|
|
10
|
+
|
|
11
|
+
from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
|
|
12
|
+
from overcooked_ai_py.mdp.overcooked_mdp import Action, OvercookedGridworld, OvercookedState
|
|
13
|
+
from overcooked_ai_py.visualization.state_visualizer import StateVisualizer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
|
|
18
|
+
horizon: int
|
|
19
|
+
|
|
20
|
+
def __init__(self, oenv: OvercookedEnv):
|
|
21
|
+
self._oenv = oenv
|
|
22
|
+
assert isinstance(oenv.mdp, OvercookedGridworld)
|
|
23
|
+
self._mdp = oenv.mdp
|
|
24
|
+
self.visualizer = StateVisualizer()
|
|
25
|
+
shape = tuple(int(s) for s in self._mdp.get_lossless_state_encoding_shape())
|
|
26
|
+
shape = (shape[2], shape[0], shape[1])
|
|
27
|
+
super().__init__(
|
|
28
|
+
action_space=DiscreteActionSpace(
|
|
29
|
+
n_agents=self._mdp.num_players,
|
|
30
|
+
n_actions=Action.NUM_ACTIONS,
|
|
31
|
+
action_names=[Action.ACTION_TO_CHAR[a] for a in Action.ALL_ACTIONS],
|
|
32
|
+
),
|
|
33
|
+
observation_shape=shape,
|
|
34
|
+
extras_shape=(1,),
|
|
35
|
+
extras_meanings=["timestep"],
|
|
36
|
+
state_shape=shape,
|
|
37
|
+
state_extra_shape=(1,),
|
|
38
|
+
reward_space=ContinuousSpace.from_shape(1),
|
|
39
|
+
)
|
|
40
|
+
self.horizon = int(self._oenv.horizon)
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def state(self) -> OvercookedState:
|
|
44
|
+
"""Current state of the environment"""
|
|
45
|
+
return self._oenv.state
|
|
46
|
+
|
|
47
|
+
def set_state(self, state: State):
|
|
48
|
+
raise NotImplementedError("Not yet implemented")
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def time_step(self):
|
|
52
|
+
return self.state.timestep
|
|
53
|
+
|
|
54
|
+
def _state_data(self):
|
|
55
|
+
state = np.array(self._mdp.lossless_state_encoding(self.state))
|
|
56
|
+
# Use axes (agents, channels, height, width) instead of (agents, height, width, channels)
|
|
57
|
+
state = np.transpose(state, (0, 3, 1, 2))
|
|
58
|
+
return state
|
|
59
|
+
|
|
60
|
+
def get_state(self):
|
|
61
|
+
return State(self._state_data()[0], np.array([self.time_step / self.horizon]))
|
|
62
|
+
|
|
63
|
+
def get_observation(self) -> Observation:
|
|
64
|
+
return Observation(
|
|
65
|
+
data=self._state_data(),
|
|
66
|
+
available_actions=self.available_actions(),
|
|
67
|
+
extras=np.array([[self.time_step / self.horizon]] * self.n_agents),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def available_actions(self):
|
|
71
|
+
available_actions = np.full((self.n_agents, self.n_actions), False)
|
|
72
|
+
actions = self._mdp.get_actions(self._oenv.state)
|
|
73
|
+
for agent_num, agent_actions in enumerate(actions):
|
|
74
|
+
for action in agent_actions:
|
|
75
|
+
available_actions[agent_num, Action.ACTION_TO_INDEX[action]] = True
|
|
76
|
+
return np.array(available_actions)
|
|
77
|
+
|
|
78
|
+
def step(self, actions: Sequence[int] | npt.NDArray[np.int32 | np.int64]) -> Step:
|
|
79
|
+
actions = [Action.ALL_ACTIONS[a] for a in actions]
|
|
80
|
+
_, reward, done, info = self._oenv.step(actions, display_phi=True)
|
|
81
|
+
return Step(
|
|
82
|
+
obs=self.get_observation(),
|
|
83
|
+
state=self.get_state(),
|
|
84
|
+
reward=np.array([reward]),
|
|
85
|
+
done=done,
|
|
86
|
+
truncated=False,
|
|
87
|
+
info=info,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def get_image(self):
|
|
91
|
+
rewards_dict = {} # dictionary of details you want rendered in the UI
|
|
92
|
+
for key, value in self._oenv.game_stats.items():
|
|
93
|
+
if key in [
|
|
94
|
+
"cumulative_shaped_rewards_by_agent",
|
|
95
|
+
"cumulative_sparse_rewards_by_agent",
|
|
96
|
+
]:
|
|
97
|
+
rewards_dict[key] = value
|
|
98
|
+
|
|
99
|
+
image = self.visualizer.render_state(
|
|
100
|
+
state=self._oenv.state,
|
|
101
|
+
grid=self._mdp.terrain_mtx,
|
|
102
|
+
hud_data=StateVisualizer.default_hud_data(self._oenv.state, **rewards_dict),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
image = pygame.surfarray.array3d(image)
|
|
106
|
+
image = np.flip(np.rot90(image, 3), 1)
|
|
107
|
+
# Depending on the platform, the image may need to be converted to RGB
|
|
108
|
+
if sys.platform in ("linux", "linux2"):
|
|
109
|
+
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
110
|
+
return image
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def from_layout(
|
|
114
|
+
layout: Literal[
|
|
115
|
+
"asymmetric_advantages",
|
|
116
|
+
"asymmetric_advantages_tomato",
|
|
117
|
+
"bonus_order_test",
|
|
118
|
+
"bottleneck",
|
|
119
|
+
"centre_objects",
|
|
120
|
+
"centre_pots",
|
|
121
|
+
"coordination_ring",
|
|
122
|
+
"corridor",
|
|
123
|
+
"counter_circuit",
|
|
124
|
+
"counter_circuit_o_1order",
|
|
125
|
+
"cramped_corridor",
|
|
126
|
+
"cramped_room",
|
|
127
|
+
"cramped_room_o_3orders",
|
|
128
|
+
"cramped_room_single",
|
|
129
|
+
"cramped_room_tomato",
|
|
130
|
+
"five_by_five",
|
|
131
|
+
"forced_coordination",
|
|
132
|
+
"forced_coordination_tomato",
|
|
133
|
+
"inverse_marshmallow_experiment",
|
|
134
|
+
"large_room",
|
|
135
|
+
"long_cook_time",
|
|
136
|
+
"marshmallow_experiment_coordination",
|
|
137
|
+
"marshmallow_experiment",
|
|
138
|
+
"mdp_test",
|
|
139
|
+
"m_shaped_s",
|
|
140
|
+
"multiplayer_schelling",
|
|
141
|
+
"pipeline",
|
|
142
|
+
"scenario1_s",
|
|
143
|
+
"scenario2",
|
|
144
|
+
"scenario2_s",
|
|
145
|
+
"scenario3",
|
|
146
|
+
"scenario4",
|
|
147
|
+
"schelling",
|
|
148
|
+
"schelling_s",
|
|
149
|
+
"simple_o",
|
|
150
|
+
"simple_o_t",
|
|
151
|
+
"simple_tomato",
|
|
152
|
+
"small_corridor",
|
|
153
|
+
"soup_coordination",
|
|
154
|
+
"tutorial_0",
|
|
155
|
+
"tutorial_1",
|
|
156
|
+
"tutorial_2",
|
|
157
|
+
"tutorial_3",
|
|
158
|
+
"unident",
|
|
159
|
+
"you_shall_not_pass",
|
|
160
|
+
],
|
|
161
|
+
horizon: int = 400,
|
|
162
|
+
):
|
|
163
|
+
mdp = OvercookedGridworld.from_layout_name(layout)
|
|
164
|
+
return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=horizon))
|