multi-agent-rlenv 3.5.0__tar.gz → 3.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/PKG-INFO +1 -1
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/__init__.py +1 -1
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/env.py +1 -1
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/episode.py +6 -6
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/step.py +1 -1
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/__init__.py +2 -0
- multi_agent_rlenv-3.5.2/src/marlenv/wrappers/potential_shaping.py +54 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/rlenv_wrapper.py +1 -2
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_wrappers.py +33 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/.github/workflows/ci.yaml +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/.github/workflows/docs.yaml +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/.gitignore +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/LICENSE +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/README.md +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/pyproject.toml +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/__init__.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/gym_adapter.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/overcooked_adapter.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/pettingzoo_adapter.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/pymarl_adapter.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/smac_adapter.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/env_builder.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/env_pool.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/exceptions.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/mock_env.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/__init__.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/observation.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/spaces.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/state.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/models/transition.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/py.typed +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/utils/__init__.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/utils/schedule.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/available_actions_mask.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/blind_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/centralised.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/delayed_rewards.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/paddings.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/time_limit.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/video_recorder.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/__init__.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_adapters.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_episode.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_models.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_pool.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_schedules.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_serialization.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/test_spaces.py +0 -0
- {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/tests/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multi-agent-rlenv
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.2
|
|
4
4
|
Summary: A strongly typed Multi-Agent Reinforcement Learning framework
|
|
5
5
|
Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
|
|
6
6
|
Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
|
|
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
|
|
|
62
62
|
If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
__version__ = "3.5.
|
|
65
|
+
__version__ = "3.5.2"
|
|
66
66
|
|
|
67
67
|
from . import models
|
|
68
68
|
from .models import (
|
|
@@ -199,7 +199,7 @@ class MARLEnv(ABC, Generic[ActionSpaceType]):
|
|
|
199
199
|
episode.add(step, action)
|
|
200
200
|
return episode
|
|
201
201
|
|
|
202
|
-
def has_same_inouts(self, other) -> bool:
|
|
202
|
+
def has_same_inouts(self, other: "MARLEnv[ActionSpaceType]") -> bool:
|
|
203
203
|
"""Alias for `have_same_inouts(self, other)`."""
|
|
204
204
|
if not isinstance(other, MARLEnv):
|
|
205
205
|
return False
|
|
@@ -66,13 +66,13 @@ class Episode:
|
|
|
66
66
|
if target_len < self.episode_len:
|
|
67
67
|
raise ValueError(f"Cannot pad episode to a smaller size: {target_len} < {self.episode_len}")
|
|
68
68
|
padding_size = target_len - self.episode_len
|
|
69
|
-
obs = self.all_observations + [self.all_observations[0]] * padding_size
|
|
70
|
-
extras = self.all_extras + [self.all_extras[0]] * padding_size
|
|
71
|
-
actions = self.actions + [self.actions[0]] * padding_size
|
|
72
|
-
rewards = self.rewards + [self.rewards[0]] * padding_size
|
|
69
|
+
obs = self.all_observations + [np.zeros_like(self.all_observations[0])] * padding_size
|
|
70
|
+
extras = self.all_extras + [np.zeros_like(self.all_extras[0])] * padding_size
|
|
71
|
+
actions = self.actions + [np.zeros_like(self.actions[0])] * padding_size
|
|
72
|
+
rewards = self.rewards + [np.zeros_like(self.rewards[0])] * padding_size
|
|
73
73
|
availables = self.all_available_actions + [self.all_available_actions[0]] * padding_size
|
|
74
|
-
states = self.all_states + [self.all_states[0]] * padding_size
|
|
75
|
-
states_extras = self.all_states_extras + [self.all_states_extras[0]] * padding_size
|
|
74
|
+
states = self.all_states + [np.zeros_like(self.all_states[0])] * padding_size
|
|
75
|
+
states_extras = self.all_states_extras + [np.zeros_like(self.all_states_extras[0])] * padding_size
|
|
76
76
|
other = {key: value + [value[0]] * padding_size for key, value in self.other.items()}
|
|
77
77
|
return Episode(
|
|
78
78
|
all_observations=obs,
|
|
@@ -39,7 +39,7 @@ class Step:
|
|
|
39
39
|
case int() | float():
|
|
40
40
|
self.reward = np.array([reward], dtype=np.float32)
|
|
41
41
|
case np.ndarray():
|
|
42
|
-
self.reward = reward
|
|
42
|
+
self.reward = reward.astype(np.float32)
|
|
43
43
|
case other:
|
|
44
44
|
# We assume this is a sequence of some sort
|
|
45
45
|
self.reward = np.array(other, dtype=np.float32)
|
|
@@ -10,6 +10,7 @@ from .blind_wrapper import Blind
|
|
|
10
10
|
from .centralised import Centralized
|
|
11
11
|
from .available_actions_mask import AvailableActionsMask
|
|
12
12
|
from .delayed_rewards import DelayedReward
|
|
13
|
+
from .potential_shaping import PotentialShaping
|
|
13
14
|
|
|
14
15
|
__all__ = [
|
|
15
16
|
"RLEnvWrapper",
|
|
@@ -26,4 +27,5 @@ __all__ = [
|
|
|
26
27
|
"Blind",
|
|
27
28
|
"Centralized",
|
|
28
29
|
"DelayedReward",
|
|
30
|
+
"PotentialShaping",
|
|
29
31
|
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from abc import abstractmethod, ABC
|
|
2
|
+
from .rlenv_wrapper import RLEnvWrapper
|
|
3
|
+
from marlenv import Space, MARLEnv, Observation
|
|
4
|
+
from typing import TypeVar, Optional
|
|
5
|
+
import numpy as np
|
|
6
|
+
import numpy.typing as npt
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
A = TypeVar("A", bound=Space)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class PotentialShaping(RLEnvWrapper[A], ABC):
|
|
15
|
+
"""
|
|
16
|
+
Potential shaping for the Laser Learning Environment (LLE).
|
|
17
|
+
|
|
18
|
+
https://people.eecs.berkeley.edu/~pabbeel/cs287-fa09/readings/NgHaradaRussell-shaping-ICML1999.pdf
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
gamma: float
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
env: MARLEnv,
|
|
26
|
+
gamma: float = 1.0,
|
|
27
|
+
extra_shape: Optional[tuple[int]] = None,
|
|
28
|
+
):
|
|
29
|
+
super().__init__(env, extra_shape=extra_shape)
|
|
30
|
+
self.gamma = gamma
|
|
31
|
+
self._current_potential = self.compute_potential()
|
|
32
|
+
|
|
33
|
+
def add_extras(self, obs: Observation) -> Observation:
|
|
34
|
+
"""Add the extras related to potential shaping. Does nothing by default."""
|
|
35
|
+
return obs
|
|
36
|
+
|
|
37
|
+
def reset(self):
|
|
38
|
+
obs, state = super().reset()
|
|
39
|
+
self._current_potential = self.compute_potential()
|
|
40
|
+
return self.add_extras(obs), state
|
|
41
|
+
|
|
42
|
+
def step(self, actions):
|
|
43
|
+
prev_potential = self._current_potential
|
|
44
|
+
step = super().step(actions)
|
|
45
|
+
|
|
46
|
+
self._current_potential = self.compute_potential()
|
|
47
|
+
shaped_reward = self.gamma * self._current_potential - prev_potential
|
|
48
|
+
step.obs = self.add_extras(step.obs)
|
|
49
|
+
step.reward += shaped_reward
|
|
50
|
+
return step
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def compute_potential(self) -> float | npt.NDArray[np.float32]:
|
|
54
|
+
"""Compute the potential of the current state of the environment."""
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Optional, Sequence
|
|
2
2
|
from typing_extensions import TypeVar
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from abc import ABC
|
|
5
4
|
import numpy as np
|
|
6
5
|
|
|
7
6
|
from marlenv.models import MARLEnv, Space, DiscreteSpace, State
|
|
@@ -11,7 +10,7 @@ AS = TypeVar("AS", bound=Space, default=Space)
|
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
@dataclass
|
|
14
|
-
class RLEnvWrapper(MARLEnv[AS]
|
|
13
|
+
class RLEnvWrapper(MARLEnv[AS]):
|
|
15
14
|
"""Parent class for all RLEnv wrappers"""
|
|
16
15
|
|
|
17
16
|
wrapped: MARLEnv[AS]
|
|
@@ -357,3 +357,36 @@ def test_delayed_rewards_from_builder():
|
|
|
357
357
|
for end_game in range(delay + 1, delay * 2):
|
|
358
358
|
env = Builder(DiscreteMockEnv(reward_step=10, end_game=end_game, n_agents=2)).delay_rewards(delay).build()
|
|
359
359
|
_test_delayed_rewards(env)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def test_potential_shaping():
|
|
363
|
+
from marlenv.wrappers.potential_shaping import PotentialShaping
|
|
364
|
+
|
|
365
|
+
class PS(PotentialShaping):
|
|
366
|
+
def __init__(self, env: MARLEnv):
|
|
367
|
+
self.phi = 10
|
|
368
|
+
super().__init__(env)
|
|
369
|
+
|
|
370
|
+
def reset(self):
|
|
371
|
+
self.phi = 10
|
|
372
|
+
return super().reset()
|
|
373
|
+
|
|
374
|
+
def compute_potential(self) -> float:
|
|
375
|
+
return self.phi
|
|
376
|
+
|
|
377
|
+
def step(self, actions):
|
|
378
|
+
self.phi = max(0, self.phi - 1)
|
|
379
|
+
return super().step(actions)
|
|
380
|
+
|
|
381
|
+
EP_LENGTH = 20
|
|
382
|
+
env = PS(DiscreteMockEnv(reward_step=0, end_game=EP_LENGTH))
|
|
383
|
+
env.reset()
|
|
384
|
+
step = None
|
|
385
|
+
|
|
386
|
+
for i in range(10):
|
|
387
|
+
step = env.random_step()
|
|
388
|
+
assert step.reward.item() == -1
|
|
389
|
+
|
|
390
|
+
for i in range(10):
|
|
391
|
+
step = env.random_step()
|
|
392
|
+
assert step.reward.item() == 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/overcooked_adapter.py
RENAMED
|
File without changes
|
{multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/adapters/pettingzoo_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/agent_id_wrapper.py
RENAMED
|
File without changes
|
{multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/available_actions_mask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.2}/src/marlenv/wrappers/last_action_wrapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|