multi-agent-rlenv 3.2.2__tar.gz → 3.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/.github/workflows/ci.yaml +5 -5
  2. multi_agent_rlenv-3.3.1/.github/workflows/docs.yaml +58 -0
  3. multi_agent_rlenv-3.2.2/README.md → multi_agent_rlenv-3.3.1/PKG-INFO +59 -2
  4. multi_agent_rlenv-3.2.2/PKG-INFO → multi_agent_rlenv-3.3.1/README.md +24 -17
  5. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/pyproject.toml +22 -2
  6. multi_agent_rlenv-3.3.1/src/marlenv/__init__.py +111 -0
  7. multi_agent_rlenv-3.3.1/src/marlenv/adapters/__init__.py +42 -0
  8. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/gym_adapter.py +6 -1
  9. multi_agent_rlenv-3.3.1/src/marlenv/adapters/overcooked_adapter.py +164 -0
  10. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/env_builder.py +31 -49
  11. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/env_pool.py +0 -1
  12. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/mock_env.py +15 -5
  13. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/env.py +46 -11
  14. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/observation.py +6 -1
  15. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/spaces.py +49 -10
  16. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/__init__.py +2 -0
  17. multi_agent_rlenv-3.3.1/src/marlenv/wrappers/delayed_rewards.py +36 -0
  18. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_adapters.py +56 -24
  19. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_serialization.py +7 -1
  20. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_spaces.py +4 -4
  21. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_wrappers.py +32 -2
  22. multi_agent_rlenv-3.2.2/src/marlenv/__init__.py +0 -62
  23. multi_agent_rlenv-3.2.2/src/marlenv/adapters/__init__.py +0 -24
  24. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/.gitignore +0 -0
  25. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/LICENSE +0 -0
  26. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/pettingzoo_adapter.py +0 -0
  27. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/pymarl_adapter.py +0 -0
  28. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/adapters/smac_adapter.py +0 -0
  29. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/exceptions.py +0 -0
  30. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/__init__.py +0 -0
  31. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/episode.py +0 -0
  32. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/state.py +0 -0
  33. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/step.py +0 -0
  34. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/models/transition.py +0 -0
  35. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/py.typed +0 -0
  36. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
  37. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/available_actions_mask.py +0 -0
  38. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
  39. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/blind_wrapper.py +0 -0
  40. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/centralised.py +0 -0
  41. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
  42. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/paddings.py +0 -0
  43. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
  44. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/rlenv_wrapper.py +0 -0
  45. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/time_limit.py +0 -0
  46. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/src/marlenv/wrappers/video_recorder.py +0 -0
  47. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/__init__.py +0 -0
  48. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_episode.py +0 -0
  49. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_models.py +0 -0
  50. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/test_pool.py +0 -0
  51. {multi_agent_rlenv-3.2.2 → multi_agent_rlenv-3.3.1}/tests/utils.py +0 -0
@@ -21,8 +21,8 @@ jobs:
21
21
  matrix:
22
22
  os:
23
23
  - ubuntu-latest
24
- - windows-latest
25
24
  - macOS-latest
25
+ - windows-latest
26
26
  target:
27
27
  - x86_64
28
28
  - aarch64
@@ -43,16 +43,16 @@ jobs:
43
43
  - name: Install uv
44
44
  uses: yezz123/setup-uv@v4
45
45
  with:
46
- uv-version: 0.5.6
46
+ uv-version: 0.6.4
47
47
  - name: Install dependencies and run pytest
48
48
  run: |
49
- uv sync
49
+ uv sync --extra overcooked --extra gym --extra pettingzoo
50
50
  uv run pytest
51
51
 
52
52
  build:
53
53
  name: 📦 Build package
54
54
  if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
55
- needs: test
55
+ needs: [test]
56
56
  runs-on: ubuntu-latest
57
57
  steps:
58
58
  - uses: actions/checkout@v4
@@ -63,7 +63,7 @@ jobs:
63
63
  - name: Install UV
64
64
  uses: yezz123/setup-uv@v4
65
65
  with:
66
- uv-version: 0.5.6
66
+ uv-version: 0.6.4
67
67
  - name: Build wheels
68
68
  run: |
69
69
  uv venv
@@ -0,0 +1,58 @@
1
+ # Simple workflow for deploying static content to GitHub Pages
2
+ name: Deploy static content to Pages
3
+
4
+ on:
5
+ # Runs on pushes targeting the default branch
6
+ push:
7
+ branches: ["main", "master"]
8
+ # Only deploy the documentation on new version tags
9
+ tags:
10
+ - 'v[0-9]+.[0-9]+.[0-9]+'
11
+
12
+ # Allows you to run this workflow manually from the Actions tab
13
+ workflow_dispatch:
14
+
15
+ # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
16
+ permissions:
17
+ contents: read
18
+ pages: write
19
+ id-token: write
20
+
21
+ # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
22
+ # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
23
+ concurrency:
24
+ group: "pages"
25
+ cancel-in-progress: false
26
+
27
+ jobs:
28
+ # Single deploy job since we're just deploying
29
+ deploy:
30
+ name: 📚 Build docs
31
+ environment:
32
+ name: github-pages
33
+ url: ${{ steps.deployment.outputs.page_url }}
34
+ runs-on: ubuntu-latest
35
+ steps:
36
+ - name: Checkout
37
+ uses: actions/checkout@v4
38
+ - name: Setup Pages
39
+ uses: actions/configure-pages@v5
40
+ - uses: actions/setup-python@v5
41
+ with:
42
+ python-version: 3.12
43
+ - name: Install UV
44
+ uses: yezz123/setup-uv@v4
45
+ with:
46
+ uv-version: 0.6.4
47
+ - name: Install dependencies
48
+ run: |
49
+ uv sync
50
+ uv run pdoc python/marlenv -o ./docs --docformat=google
51
+ - name: Upload artifact
52
+ uses: actions/upload-pages-artifact@v3
53
+ with:
54
+ # Upload entire repository
55
+ path: './docs'
56
+ - name: Deploy to GitHub Pages
57
+ id: deployment
58
+ uses: actions/deploy-pages@v4
@@ -1,8 +1,65 @@
1
- # `marlenv` - A unified interface for muti-agent reinforcement learning
1
+ Metadata-Version: 2.4
2
+ Name: multi-agent-rlenv
3
+ Version: 3.3.1
4
+ Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
+ Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
+ Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
7
+ License-File: LICENSE
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: <4,>=3.10
11
+ Requires-Dist: numpy>=2.0.0
12
+ Requires-Dist: opencv-python>=4.0
13
+ Requires-Dist: typing-extensions>=4.0
14
+ Provides-Extra: all
15
+ Requires-Dist: gymnasium>0.29.1; extra == 'all'
16
+ Requires-Dist: overcooked-ai; extra == 'all'
17
+ Requires-Dist: pettingzoo>=1.20; extra == 'all'
18
+ Requires-Dist: pymunk>=6.0; extra == 'all'
19
+ Requires-Dist: pysc2; extra == 'all'
20
+ Requires-Dist: scipy>=1.10; extra == 'all'
21
+ Requires-Dist: smac; extra == 'all'
22
+ Provides-Extra: gym
23
+ Requires-Dist: gymnasium>=0.29.1; extra == 'gym'
24
+ Provides-Extra: overcooked
25
+ Requires-Dist: overcooked-ai>=1.1.0; extra == 'overcooked'
26
+ Requires-Dist: scipy>=1.10; extra == 'overcooked'
27
+ Provides-Extra: pettingzoo
28
+ Requires-Dist: pettingzoo>=1.20; extra == 'pettingzoo'
29
+ Requires-Dist: pymunk>=6.0; extra == 'pettingzoo'
30
+ Requires-Dist: scipy>=1.10; extra == 'pettingzoo'
31
+ Provides-Extra: smac
32
+ Requires-Dist: pysc2; extra == 'smac'
33
+ Requires-Dist: smac; extra == 'smac'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # `marlenv` - A unified framework for muti-agent reinforcement learning
37
+ **Documentation: [https://yamoling.github.io/multi-agent-rlenv](https://yamoling.github.io/multi-agent-rlenv)**
38
+
2
39
  The objective of `marlenv` is to provide a common (typed) interface for many different reinforcement learning environments.
3
40
 
4
41
  As such, `marlenv` provides high level abstractions of RL concepts such as `Observation`s or `Transition`s that are commonly represented as mere (confusing) lists or tuples.
5
42
 
43
+ ## Installation
44
+ Install with you preferred package manager (`uv`, `pip`, `poetry`, ...):
45
+ ```bash
46
+ $ pip install marlenv[all] # Enable all features
47
+ $ pip install marlenv # Basic installation
48
+ ```
49
+
50
+ There are multiple optional dependencies if you want to support specific libraries and environments. Available options are:
51
+ - `smac` for StarCraft II environments
52
+ - `gym` for OpenAI Gym environments
53
+ - `pettingzoo` for PettingZoo environments
54
+ - `overcooked` for Overcooked environments
55
+
56
+ Install them with:
57
+ ```bash
58
+ $ pip install marlenv[smac] # Install SMAC
59
+ $ pip install marlenv[gym,smac] # Install Gym & smac support
60
+ ```
61
+
62
+
6
63
  ## Using `marlenv` with existing libraries
7
64
  `marlenv` unifies multiple popular libraries under a single interface. Namely, `marlenv` supports `smac`, `gymnasium` and `pettingzoo`.
8
65
 
@@ -32,7 +89,7 @@ from marlenv import RLEnv, DiscreteActionSpace, Observation
32
89
  N_AGENTS = 3
33
90
  N_ACTIONS = 5
34
91
 
35
- class CustomEnv(RLEnv[DiscreteActionSpace]):
92
+ class CustomEnv(MARLEnv[DiscreteActionSpace]):
36
93
  def __init__(self, width: int, height: int):
37
94
  super().__init__(
38
95
  action_space=DiscreteActionSpace(N_AGENTS, N_ACTIONS),
@@ -1,23 +1,30 @@
1
- Metadata-Version: 2.4
2
- Name: multi-agent-rlenv
3
- Version: 3.2.2
4
- Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
- Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
- Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
7
- License-File: LICENSE
8
- Classifier: Operating System :: OS Independent
9
- Classifier: Programming Language :: Python :: 3
10
- Requires-Python: <4,>=3.10
11
- Requires-Dist: gymnasium>=0.29.1
12
- Requires-Dist: numpy>=2.0.0
13
- Requires-Dist: opencv-python>=4.10.0.84
14
- Description-Content-Type: text/markdown
15
-
16
- # `marlenv` - A unified interface for muti-agent reinforcement learning
1
+ # `marlenv` - A unified framework for muti-agent reinforcement learning
2
+ **Documentation: [https://yamoling.github.io/multi-agent-rlenv](https://yamoling.github.io/multi-agent-rlenv)**
3
+
17
4
  The objective of `marlenv` is to provide a common (typed) interface for many different reinforcement learning environments.
18
5
 
19
6
  As such, `marlenv` provides high level abstractions of RL concepts such as `Observation`s or `Transition`s that are commonly represented as mere (confusing) lists or tuples.
20
7
 
8
+ ## Installation
9
+ Install with you preferred package manager (`uv`, `pip`, `poetry`, ...):
10
+ ```bash
11
+ $ pip install marlenv[all] # Enable all features
12
+ $ pip install marlenv # Basic installation
13
+ ```
14
+
15
+ There are multiple optional dependencies if you want to support specific libraries and environments. Available options are:
16
+ - `smac` for StarCraft II environments
17
+ - `gym` for OpenAI Gym environments
18
+ - `pettingzoo` for PettingZoo environments
19
+ - `overcooked` for Overcooked environments
20
+
21
+ Install them with:
22
+ ```bash
23
+ $ pip install marlenv[smac] # Install SMAC
24
+ $ pip install marlenv[gym,smac] # Install Gym & smac support
25
+ ```
26
+
27
+
21
28
  ## Using `marlenv` with existing libraries
22
29
  `marlenv` unifies multiple popular libraries under a single interface. Namely, `marlenv` supports `smac`, `gymnasium` and `pettingzoo`.
23
30
 
@@ -47,7 +54,7 @@ from marlenv import RLEnv, DiscreteActionSpace, Observation
47
54
  N_AGENTS = 3
48
55
  N_ACTIONS = 5
49
56
 
50
- class CustomEnv(RLEnv[DiscreteActionSpace]):
57
+ class CustomEnv(MARLEnv[DiscreteActionSpace]):
51
58
  def __init__(self, width: int, height: int):
52
59
  super().__init__(
53
60
  action_space=DiscreteActionSpace(N_AGENTS, N_ACTIONS),
@@ -7,13 +7,28 @@ authors = [
7
7
  ]
8
8
  readme = "README.md"
9
9
  requires-python = ">=3.10, <4"
10
- dependencies = ["numpy>=2.0.0", "opencv-python>=4.10.0.84", "gymnasium>=0.29.1"]
11
10
  urls = { "repository" = "https://github.com/yamoling/multi-agent-rlenv" }
12
11
  classifiers = [
13
12
  "Programming Language :: Python :: 3",
14
13
  "Operating System :: OS Independent",
15
14
  ]
16
15
 
16
+ dependencies = ["numpy>=2.0.0", "opencv-python>=4.0", "typing_extensions>=4.0"]
17
+
18
+ [project.optional-dependencies]
19
+ gym = ["gymnasium>=0.29.1"]
20
+ smac = ["smac", "pysc2"]
21
+ pettingzoo = ["pettingzoo>=1.20", "pymunk>=6.0", "scipy>=1.10"]
22
+ overcooked = ["overcooked-ai>=1.1.0", "scipy>=1.10"]
23
+ all = [
24
+ "gymnasium>0.29.1",
25
+ "pettingzoo>=1.20",
26
+ "overcooked-ai",
27
+ "smac",
28
+ "pysc2",
29
+ "pymunk>=6.0",
30
+ "scipy>=1.10",
31
+ ]
17
32
 
18
33
  [build-system]
19
34
  requires = ["hatchling"]
@@ -35,5 +50,10 @@ pythonpath = "src"
35
50
  # Ignore deprecation warnings caused by SMAC
36
51
  filterwarnings = "ignore::DeprecationWarning"
37
52
 
53
+ [tool.uv.sources]
54
+ smac = { git = "https://github.com/oxwhirl/smac.git" }
55
+ pysc2 = { git = "https://github.com/google-deepmind/pysc2.git" }
56
+
57
+
38
58
  [dependency-groups]
39
- dev = ["orjson>=3.10.12", "pytest>=8.3.2"]
59
+ dev = ["orjson>=3.10.12", "pdoc>=15.0.1", "pytest>=8.3.2"]
@@ -0,0 +1,111 @@
1
+ """
2
+ `marlenv` is a strongly typed library for multi-agent and multi-objective reinforcement learning.
3
+
4
+ It aims to provide a simple and consistent interface for reinforcement learning environments by providing abstraction models such as `Observation`s or `Episode`s. `marlenv` provides adapters for popular libraries such as `gym` or `pettingzoo` and provides utility wrappers to add functionalities such as video recording or limiting the number of steps.
5
+
6
+ Almost every class is a dataclassto enable seemless serialiation with the `orjson` library.
7
+
8
+ # Existing environments
9
+ The `MARLEnv` class represents a multi-agent RL environment and is at the center of this library, and `marlenv` provides an adapted implementation of multiple common MARL environments (gym, pettingzoo, smac and overcooked) in `marlenv.adapters`. Note that these adapters will only work if you have the corresponding library installed.
10
+
11
+ ```python
12
+ from marlenv.adapters import Gym, PettingZoo, SMAC, Overcooked
13
+ import marlenv
14
+
15
+ env1 = Gym("CartPole-v1")
16
+ env2 = marlenv.make("CartPole-v1")
17
+ env3 = PettingZoo("prospector_v4")
18
+ env4 = SMAC("3m")
19
+ env5 = Overcooked.from_layout("cramped_room")
20
+ ```
21
+
22
+ # Wrappers & Builder
23
+ To facilitate the create of an environment with common wrappers, `marlenv` provides a `Builder` class that can be used to chain the creation of multiple wrappers.
24
+
25
+ ```python
26
+ from marlenv import make, Builder
27
+
28
+ env = <your env>
29
+ env = Builder(env).agent_id().time_limit(50).record("videos").build()
30
+ ```
31
+
32
+ # Using the library
33
+ A typical environment loop would look like this:
34
+
35
+ ```python
36
+ from marlenv import DiscreteMockEnv, Builder, Episode
37
+
38
+ env = Builder(DicreteMockEnv()).agent_id().build()
39
+ obs, state = env.reset()
40
+ terminated = False
41
+ episode = Episode.new(obs, state)
42
+ while not episode.is_finished:
43
+ action = env.sample_action() # a valid random action
44
+ step = env.step(action) # Step data `step.obs`, `step.reward`, ...
45
+ episode.add(step, action) # Progressively build the episode
46
+ ```
47
+
48
+ # Extras
49
+ To cope with complex observation spaces, `marlenv` distinguishes the "main" observation data from the "extra" observation data. A typical example would be the observation of a gridworld environment with a time limit. In that case, the main observation has shape (height, width), i.e. the content of the grid, but the current time is an extra observation data of shape (1, ).
50
+
51
+ ```python
52
+ env = GridWorldEnv()
53
+ print(env.observation_shape) # (height, width)
54
+ print(env.extras_shape) # (0, )
55
+
56
+ env = Builder(env).time_limit(25).build()
57
+ print(env.observation_shape) # (height, width)
58
+ print(env.extras_shape) # (1, )
59
+ ```
60
+
61
+ # Creating a new environment
62
+ If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
63
+ """
64
+
65
+ __version__ = "3.3.1"
66
+
67
+ from . import models
68
+ from . import wrappers
69
+ from . import adapters
70
+ from .models import spaces
71
+
72
+
73
+ from .env_builder import make, Builder
74
+ from .models import (
75
+ MARLEnv,
76
+ State,
77
+ Step,
78
+ Observation,
79
+ Episode,
80
+ Transition,
81
+ DiscreteSpace,
82
+ ContinuousSpace,
83
+ ActionSpace,
84
+ DiscreteActionSpace,
85
+ ContinuousActionSpace,
86
+ )
87
+ from .wrappers import RLEnvWrapper
88
+ from .mock_env import DiscreteMockEnv, DiscreteMOMockEnv
89
+
90
+ __all__ = [
91
+ "models",
92
+ "wrappers",
93
+ "adapters",
94
+ "spaces",
95
+ "make",
96
+ "Builder",
97
+ "MARLEnv",
98
+ "Step",
99
+ "State",
100
+ "Observation",
101
+ "Episode",
102
+ "Transition",
103
+ "ActionSpace",
104
+ "DiscreteSpace",
105
+ "ContinuousSpace",
106
+ "DiscreteActionSpace",
107
+ "ContinuousActionSpace",
108
+ "DiscreteMockEnv",
109
+ "DiscreteMOMockEnv",
110
+ "RLEnvWrapper",
111
+ ]
@@ -0,0 +1,42 @@
1
+ from importlib.util import find_spec
2
+ from .pymarl_adapter import PymarlAdapter
3
+
4
+ HAS_GYM = False
5
+ if find_spec("gymnasium") is not None:
6
+ from .gym_adapter import Gym
7
+
8
+ HAS_GYM = True
9
+
10
+ HAS_PETTINGZOO = False
11
+ if find_spec("pettingzoo") is not None:
12
+ from .pettingzoo_adapter import PettingZoo
13
+
14
+ HAS_PETTINGZOO = True
15
+
16
+ HAS_SMAC = False
17
+ if find_spec("smac") is not None:
18
+ from .smac_adapter import SMAC
19
+
20
+ HAS_SMAC = True
21
+
22
+ HAS_OVERCOOKED = False
23
+ if find_spec("overcooked_ai_py.mdp") is not None:
24
+ import numpy
25
+
26
+ # Overcooked assumes a version of numpy <2.0 where np.Inf is available.
27
+ setattr(numpy, "Inf", numpy.inf)
28
+ from .overcooked_adapter import Overcooked
29
+
30
+ HAS_OVERCOOKED = True
31
+
32
+ __all__ = [
33
+ "PymarlAdapter",
34
+ "Gym",
35
+ "PettingZoo",
36
+ "SMAC",
37
+ "Overcooked",
38
+ "HAS_GYM",
39
+ "HAS_PETTINGZOO",
40
+ "HAS_SMAC",
41
+ "HAS_OVERCOOKED",
42
+ ]
@@ -1,3 +1,5 @@
1
+ import sys
2
+ import cv2
1
3
  from dataclasses import dataclass
2
4
  from typing import Sequence
3
5
 
@@ -79,7 +81,10 @@ class Gym(MARLEnv[Sequence | npt.NDArray, ActionSpace]):
79
81
  return self.last_obs, self.get_state()
80
82
 
81
83
  def get_image(self):
82
- return self.env.render()
84
+ image = np.array(self.env.render())
85
+ if sys.platform in ("linux", "linux2"):
86
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
87
+ return image
83
88
 
84
89
  def seed(self, seed_value: int):
85
90
  self.env.reset(seed=seed_value)
@@ -0,0 +1,164 @@
1
+ import sys
2
+ from dataclasses import dataclass
3
+ from typing import Literal, Sequence
4
+
5
+ import cv2
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+ import pygame
9
+ from marlenv.models import ContinuousSpace, DiscreteActionSpace, MARLEnv, Observation, State, Step
10
+
11
+ from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
12
+ from overcooked_ai_py.mdp.overcooked_mdp import Action, OvercookedGridworld, OvercookedState
13
+ from overcooked_ai_py.visualization.state_visualizer import StateVisualizer
14
+
15
+
16
+ @dataclass
17
+ class Overcooked(MARLEnv[Sequence[int] | npt.NDArray, DiscreteActionSpace]):
18
+ horizon: int
19
+
20
+ def __init__(self, oenv: OvercookedEnv):
21
+ self._oenv = oenv
22
+ assert isinstance(oenv.mdp, OvercookedGridworld)
23
+ self._mdp = oenv.mdp
24
+ self.visualizer = StateVisualizer()
25
+ shape = tuple(int(s) for s in self._mdp.get_lossless_state_encoding_shape())
26
+ shape = (shape[2], shape[0], shape[1])
27
+ super().__init__(
28
+ action_space=DiscreteActionSpace(
29
+ n_agents=self._mdp.num_players,
30
+ n_actions=Action.NUM_ACTIONS,
31
+ action_names=[Action.ACTION_TO_CHAR[a] for a in Action.ALL_ACTIONS],
32
+ ),
33
+ observation_shape=shape,
34
+ extras_shape=(1,),
35
+ extras_meanings=["timestep"],
36
+ state_shape=shape,
37
+ state_extra_shape=(1,),
38
+ reward_space=ContinuousSpace.from_shape(1),
39
+ )
40
+ self.horizon = int(self._oenv.horizon)
41
+
42
+ @property
43
+ def state(self) -> OvercookedState:
44
+ """Current state of the environment"""
45
+ return self._oenv.state
46
+
47
+ def set_state(self, state: State):
48
+ raise NotImplementedError("Not yet implemented")
49
+
50
+ @property
51
+ def time_step(self):
52
+ return self.state.timestep
53
+
54
+ def _state_data(self):
55
+ state = np.array(self._mdp.lossless_state_encoding(self.state))
56
+ # Use axes (agents, channels, height, width) instead of (agents, height, width, channels)
57
+ state = np.transpose(state, (0, 3, 1, 2))
58
+ return state
59
+
60
+ def get_state(self):
61
+ return State(self._state_data()[0], np.array([self.time_step / self.horizon]))
62
+
63
+ def get_observation(self) -> Observation:
64
+ return Observation(
65
+ data=self._state_data(),
66
+ available_actions=self.available_actions(),
67
+ extras=np.array([[self.time_step / self.horizon]] * self.n_agents),
68
+ )
69
+
70
+ def available_actions(self):
71
+ available_actions = np.full((self.n_agents, self.n_actions), False)
72
+ actions = self._mdp.get_actions(self._oenv.state)
73
+ for agent_num, agent_actions in enumerate(actions):
74
+ for action in agent_actions:
75
+ available_actions[agent_num, Action.ACTION_TO_INDEX[action]] = True
76
+ return np.array(available_actions)
77
+
78
+ def step(self, actions: Sequence[int] | npt.NDArray[np.int32 | np.int64]) -> Step:
79
+ actions = [Action.ALL_ACTIONS[a] for a in actions]
80
+ _, reward, done, info = self._oenv.step(actions, display_phi=True)
81
+ return Step(
82
+ obs=self.get_observation(),
83
+ state=self.get_state(),
84
+ reward=np.array([reward]),
85
+ done=done,
86
+ truncated=False,
87
+ info=info,
88
+ )
89
+
90
+ def get_image(self):
91
+ rewards_dict = {} # dictionary of details you want rendered in the UI
92
+ for key, value in self._oenv.game_stats.items():
93
+ if key in [
94
+ "cumulative_shaped_rewards_by_agent",
95
+ "cumulative_sparse_rewards_by_agent",
96
+ ]:
97
+ rewards_dict[key] = value
98
+
99
+ image = self.visualizer.render_state(
100
+ state=self._oenv.state,
101
+ grid=self._mdp.terrain_mtx,
102
+ hud_data=StateVisualizer.default_hud_data(self._oenv.state, **rewards_dict),
103
+ )
104
+
105
+ image = pygame.surfarray.array3d(image)
106
+ image = np.flip(np.rot90(image, 3), 1)
107
+ # Depending on the platform, the image may need to be converted to RGB
108
+ if sys.platform in ("linux", "linux2"):
109
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
110
+ return image
111
+
112
+ @staticmethod
113
+ def from_layout(
114
+ layout: Literal[
115
+ "asymmetric_advantages",
116
+ "asymmetric_advantages_tomato",
117
+ "bonus_order_test",
118
+ "bottleneck",
119
+ "centre_objects",
120
+ "centre_pots",
121
+ "coordination_ring",
122
+ "corridor",
123
+ "counter_circuit",
124
+ "counter_circuit_o_1order",
125
+ "cramped_corridor",
126
+ "cramped_room",
127
+ "cramped_room_o_3orders",
128
+ "cramped_room_single",
129
+ "cramped_room_tomato",
130
+ "five_by_five",
131
+ "forced_coordination",
132
+ "forced_coordination_tomato",
133
+ "inverse_marshmallow_experiment",
134
+ "large_room",
135
+ "long_cook_time",
136
+ "marshmallow_experiment_coordination",
137
+ "marshmallow_experiment",
138
+ "mdp_test",
139
+ "m_shaped_s",
140
+ "multiplayer_schelling",
141
+ "pipeline",
142
+ "scenario1_s",
143
+ "scenario2",
144
+ "scenario2_s",
145
+ "scenario3",
146
+ "scenario4",
147
+ "schelling",
148
+ "schelling_s",
149
+ "simple_o",
150
+ "simple_o_t",
151
+ "simple_tomato",
152
+ "small_corridor",
153
+ "soup_coordination",
154
+ "tutorial_0",
155
+ "tutorial_1",
156
+ "tutorial_2",
157
+ "tutorial_3",
158
+ "unident",
159
+ "you_shall_not_pass",
160
+ ],
161
+ horizon: int = 400,
162
+ ):
163
+ mdp = OvercookedGridworld.from_layout_name(layout)
164
+ return Overcooked(OvercookedEnv.from_mdp(mdp, horizon=horizon))