multi-agent-rlenv 3.5.2__tar.gz → 3.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/.github/workflows/ci.yaml +1 -1
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/PKG-INFO +4 -1
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/pyproject.toml +2 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/__init__.py +1 -1
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/episode.py +4 -49
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/observation.py +10 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/state.py +8 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_models.py +34 -1
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/.github/workflows/docs.yaml +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/.gitignore +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/LICENSE +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/README.md +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/__init__.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/gym_adapter.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/overcooked_adapter.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/pettingzoo_adapter.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/pymarl_adapter.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/smac_adapter.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/env_builder.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/env_pool.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/exceptions.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/mock_env.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/__init__.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/env.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/spaces.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/step.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/models/transition.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/py.typed +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/utils/__init__.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/utils/schedule.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/__init__.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/available_actions_mask.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/blind_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/centralised.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/delayed_rewards.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/paddings.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/potential_shaping.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/rlenv_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/time_limit.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/video_recorder.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/__init__.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_adapters.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_episode.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_pool.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_schedules.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_serialization.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_spaces.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/test_wrappers.py +0 -0
- {multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multi-agent-rlenv
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.4
|
|
4
4
|
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
5
|
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
6
|
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
@@ -19,6 +19,7 @@ Requires-Dist: pymunk>=6.0; extra == 'all'
|
|
|
19
19
|
Requires-Dist: pysc2; extra == 'all'
|
|
20
20
|
Requires-Dist: scipy>=1.10; extra == 'all'
|
|
21
21
|
Requires-Dist: smac; extra == 'all'
|
|
22
|
+
Requires-Dist: torch>=2.0; extra == 'all'
|
|
22
23
|
Provides-Extra: gym
|
|
23
24
|
Requires-Dist: gymnasium>=0.29.1; extra == 'gym'
|
|
24
25
|
Provides-Extra: overcooked
|
|
@@ -31,6 +32,8 @@ Requires-Dist: scipy>=1.10; extra == 'pettingzoo'
|
|
|
31
32
|
Provides-Extra: smac
|
|
32
33
|
Requires-Dist: pysc2; extra == 'smac'
|
|
33
34
|
Requires-Dist: smac; extra == 'smac'
|
|
35
|
+
Provides-Extra: torch
|
|
36
|
+
Requires-Dist: torch>=2.0; extra == 'torch'
|
|
34
37
|
Description-Content-Type: text/markdown
|
|
35
38
|
|
|
36
39
|
# `marlenv` - A unified framework for muti-agent reinforcement learning
|
|
@@ -20,6 +20,7 @@ gym = ["gymnasium>=0.29.1"]
|
|
|
20
20
|
smac = ["smac", "pysc2"]
|
|
21
21
|
pettingzoo = ["pettingzoo>=1.20", "pymunk>=6.0", "scipy>=1.10"]
|
|
22
22
|
overcooked = ["overcooked-ai>=1.1.0", "scipy>=1.10"]
|
|
23
|
+
torch = ["torch>=2.0"]
|
|
23
24
|
all = [
|
|
24
25
|
"gymnasium>0.29.1",
|
|
25
26
|
"pettingzoo>=1.20",
|
|
@@ -28,6 +29,7 @@ all = [
|
|
|
28
29
|
"pysc2",
|
|
29
30
|
"pymunk>=6.0",
|
|
30
31
|
"scipy>=1.10",
|
|
32
|
+
"torch>=2.0",
|
|
31
33
|
]
|
|
32
34
|
|
|
33
35
|
[build-system]
|
|
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
|
|
|
62
62
|
If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
__version__ = "3.5.
|
|
65
|
+
__version__ = "3.5.4"
|
|
66
66
|
|
|
67
67
|
from . import models
|
|
68
68
|
from .models import (
|
|
@@ -22,10 +22,10 @@ class Episode:
|
|
|
22
22
|
all_extras: list[npt.NDArray[np.float32]]
|
|
23
23
|
actions: list[npt.NDArray]
|
|
24
24
|
rewards: list[npt.NDArray[np.float32]]
|
|
25
|
-
all_available_actions: list[npt.NDArray[np.
|
|
25
|
+
all_available_actions: list[npt.NDArray[np.bool]]
|
|
26
26
|
all_states: list[npt.NDArray[np.float32]]
|
|
27
27
|
all_states_extras: list[npt.NDArray[np.float32]]
|
|
28
|
-
metrics: dict[str,
|
|
28
|
+
metrics: dict[str, Any]
|
|
29
29
|
episode_len: int
|
|
30
30
|
other: dict[str, list[Any]]
|
|
31
31
|
is_done: bool = False
|
|
@@ -33,7 +33,7 @@ class Episode:
|
|
|
33
33
|
"""Whether the episode did reach a terminal state (different from truncated)"""
|
|
34
34
|
|
|
35
35
|
@staticmethod
|
|
36
|
-
def new(obs: Observation, state: State, metrics: Optional[dict[str,
|
|
36
|
+
def new(obs: Observation, state: State, metrics: Optional[dict[str, Any]] = None) -> "Episode":
|
|
37
37
|
if metrics is None:
|
|
38
38
|
metrics = {}
|
|
39
39
|
return Episode(
|
|
@@ -363,51 +363,6 @@ class Episode:
|
|
|
363
363
|
for i, s in enumerate(scores):
|
|
364
364
|
self.metrics[f"score-{i}"] = float(s)
|
|
365
365
|
|
|
366
|
-
|
|
367
|
-
# self,
|
|
368
|
-
# new_obs: Observation,
|
|
369
|
-
# new_state: State,
|
|
370
|
-
# action: A,
|
|
371
|
-
# reward: np.ndarray,
|
|
372
|
-
# done: bool,
|
|
373
|
-
# truncated: bool,
|
|
374
|
-
# info: dict[str, Any],
|
|
375
|
-
# **kwargs,
|
|
376
|
-
# ):
|
|
377
|
-
# """Add a new transition to the episode"""
|
|
378
|
-
# self.episode_len += 1
|
|
379
|
-
# self.all_observations.append(new_obs.data)
|
|
380
|
-
# self.all_extras.append(new_obs.extras)
|
|
381
|
-
# self.all_available_actions.append(new_obs.available_actions)
|
|
382
|
-
# self.all_states.append(new_state.data)
|
|
383
|
-
# self.all_states_extras.append(new_state.extras)
|
|
384
|
-
# match action:
|
|
385
|
-
# case np.ndarray() as action:
|
|
386
|
-
# self.actions.append(action)
|
|
387
|
-
# case other:
|
|
388
|
-
# self.actions.append(np.array(other))
|
|
389
|
-
# self.rewards.append(reward)
|
|
390
|
-
# for key, value in kwargs.items():
|
|
391
|
-
# current = self.other.get(key, [])
|
|
392
|
-
# current.append(value)
|
|
393
|
-
# self.other[key] = current
|
|
394
|
-
|
|
395
|
-
# if done:
|
|
396
|
-
# # Only set the truncated flag if the episode is not done (both could happen with a time limit)
|
|
397
|
-
# self.is_truncated = truncated
|
|
398
|
-
# self.is_done = done
|
|
399
|
-
# # Add metrics that can be plotted
|
|
400
|
-
# for key, value in info.items():
|
|
401
|
-
# if isinstance(value, bool):
|
|
402
|
-
# value = int(value)
|
|
403
|
-
# self.metrics[key] = value
|
|
404
|
-
# self.metrics["episode_len"] = self.episode_len
|
|
405
|
-
|
|
406
|
-
# rewards = np.array(self.rewards)
|
|
407
|
-
# scores = np.sum(rewards, axis=0)
|
|
408
|
-
# for i, s in enumerate(scores):
|
|
409
|
-
# self.metrics[f"score-{i}"] = float(s)
|
|
410
|
-
|
|
411
|
-
def add_metrics(self, metrics: dict[str, float]):
|
|
366
|
+
def add_metrics(self, metrics: dict[str, Any]):
|
|
412
367
|
"""Add metrics to the episode"""
|
|
413
368
|
self.metrics.update(metrics)
|
|
@@ -87,3 +87,13 @@ class Observation:
|
|
|
87
87
|
if not np.array_equal(self.data, other.data):
|
|
88
88
|
return False
|
|
89
89
|
return np.array_equal(self.extras, other.extras) and np.array_equal(self.available_actions, other.available_actions)
|
|
90
|
+
|
|
91
|
+
def as_tensors(self, device=None):
|
|
92
|
+
"""
|
|
93
|
+
Convert the observation to a tuple of tensors of shape (1, n_agents, <dim>).
|
|
94
|
+
"""
|
|
95
|
+
import torch
|
|
96
|
+
|
|
97
|
+
data = torch.from_numpy(self.data).unsqueeze(0).to(device, non_blocking=True)
|
|
98
|
+
extras = torch.from_numpy(self.extras).unsqueeze(0).to(device, non_blocking=True)
|
|
99
|
+
return data, extras
|
|
@@ -52,3 +52,11 @@ class State(Generic[StateType]):
|
|
|
52
52
|
if not np.array_equal(self.extras, value.extras):
|
|
53
53
|
return False
|
|
54
54
|
return True
|
|
55
|
+
|
|
56
|
+
def as_tensors(self, device=None):
|
|
57
|
+
"""Convert the state to a tuple of tensors of shape (1, <dim>)."""
|
|
58
|
+
import torch
|
|
59
|
+
|
|
60
|
+
data = torch.from_numpy(self.data).unsqueeze(0).to(device, non_blocking=True)
|
|
61
|
+
extras = torch.from_numpy(self.extras).unsqueeze(0).to(device, non_blocking=True)
|
|
62
|
+
return data, extras
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
from marlenv import Observation, Transition, DiscreteMockEnv, DiscreteMOMockEnv, Builder, State, Episode, MARLEnv, DiscreteSpace
|
|
2
2
|
import numpy as np
|
|
3
|
-
|
|
3
|
+
import pytest
|
|
4
|
+
from importlib.util import find_spec
|
|
4
5
|
from .utils import generate_episode
|
|
5
6
|
|
|
7
|
+
HAS_PYTORCH = find_spec("torch") is not None
|
|
8
|
+
|
|
6
9
|
|
|
7
10
|
def test_obs_eq():
|
|
8
11
|
obs1 = Observation(
|
|
@@ -422,3 +425,33 @@ def test_wrong_extras_meanings_length():
|
|
|
422
425
|
assert False, "This should raise a ValueError because the length of extras_meanings is different from the actual number of extras"
|
|
423
426
|
except ValueError:
|
|
424
427
|
pass
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
@pytest.mark.skipif(not HAS_PYTORCH, reason="torch is not installed")
|
|
431
|
+
def test_observation_as_tensor():
|
|
432
|
+
import torch
|
|
433
|
+
|
|
434
|
+
env = DiscreteMockEnv(4)
|
|
435
|
+
obs = env.reset()[0]
|
|
436
|
+
data, extras = obs.as_tensors()
|
|
437
|
+
assert isinstance(data, torch.Tensor)
|
|
438
|
+
assert data.shape == (1, env.n_agents, *env.observation_shape)
|
|
439
|
+
assert data.dtype == torch.float32
|
|
440
|
+
assert isinstance(extras, torch.Tensor)
|
|
441
|
+
assert extras.shape == (1, env.n_agents, *env.extras_shape)
|
|
442
|
+
assert extras.dtype == torch.float32
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
@pytest.mark.skipif(not HAS_PYTORCH, reason="torch is not installed")
|
|
446
|
+
def test_state_as_tensor():
|
|
447
|
+
import torch
|
|
448
|
+
|
|
449
|
+
env = DiscreteMockEnv(4)
|
|
450
|
+
state = env.reset()[1]
|
|
451
|
+
data, extras = state.as_tensors()
|
|
452
|
+
assert isinstance(data, torch.Tensor)
|
|
453
|
+
assert data.shape == (1, *env.state_shape)
|
|
454
|
+
assert data.dtype == torch.float32
|
|
455
|
+
assert isinstance(extras, torch.Tensor)
|
|
456
|
+
assert extras.shape == (1, *env.state_extra_shape)
|
|
457
|
+
assert extras.dtype == torch.float32
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/overcooked_adapter.py
RENAMED
|
File without changes
|
{multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/adapters/pettingzoo_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/agent_id_wrapper.py
RENAMED
|
File without changes
|
{multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/available_actions_mask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/last_action_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.5.2 → multi_agent_rlenv-3.5.4}/src/marlenv/wrappers/potential_shaping.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|