multi-agent-rlenv 3.5.0__py3-none-any.whl → 3.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
marlenv/__init__.py CHANGED
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
62
62
  If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
63
63
  """
64
64
 
65
- __version__ = "3.5.0"
65
+ __version__ = "3.5.2"
66
66
 
67
67
  from . import models
68
68
  from .models import (
marlenv/models/env.py CHANGED
@@ -199,7 +199,7 @@ class MARLEnv(ABC, Generic[ActionSpaceType]):
199
199
  episode.add(step, action)
200
200
  return episode
201
201
 
202
- def has_same_inouts(self, other) -> bool:
202
+ def has_same_inouts(self, other: "MARLEnv[ActionSpaceType]") -> bool:
203
203
  """Alias for `have_same_inouts(self, other)`."""
204
204
  if not isinstance(other, MARLEnv):
205
205
  return False
marlenv/models/episode.py CHANGED
@@ -66,13 +66,13 @@ class Episode:
66
66
  if target_len < self.episode_len:
67
67
  raise ValueError(f"Cannot pad episode to a smaller size: {target_len} < {self.episode_len}")
68
68
  padding_size = target_len - self.episode_len
69
- obs = self.all_observations + [self.all_observations[0]] * padding_size
70
- extras = self.all_extras + [self.all_extras[0]] * padding_size
71
- actions = self.actions + [self.actions[0]] * padding_size
72
- rewards = self.rewards + [self.rewards[0]] * padding_size
69
+ obs = self.all_observations + [np.zeros_like(self.all_observations[0])] * padding_size
70
+ extras = self.all_extras + [np.zeros_like(self.all_extras[0])] * padding_size
71
+ actions = self.actions + [np.zeros_like(self.actions[0])] * padding_size
72
+ rewards = self.rewards + [np.zeros_like(self.rewards[0])] * padding_size
73
73
  availables = self.all_available_actions + [self.all_available_actions[0]] * padding_size
74
- states = self.all_states + [self.all_states[0]] * padding_size
75
- states_extras = self.all_states_extras + [self.all_states_extras[0]] * padding_size
74
+ states = self.all_states + [np.zeros_like(self.all_states[0])] * padding_size
75
+ states_extras = self.all_states_extras + [np.zeros_like(self.all_states_extras[0])] * padding_size
76
76
  other = {key: value + [value[0]] * padding_size for key, value in self.other.items()}
77
77
  return Episode(
78
78
  all_observations=obs,
marlenv/models/step.py CHANGED
@@ -39,7 +39,7 @@ class Step:
39
39
  case int() | float():
40
40
  self.reward = np.array([reward], dtype=np.float32)
41
41
  case np.ndarray():
42
- self.reward = reward
42
+ self.reward = reward.astype(np.float32)
43
43
  case other:
44
44
  # We assume this is a sequence of some sort
45
45
  self.reward = np.array(other, dtype=np.float32)
@@ -10,6 +10,7 @@ from .blind_wrapper import Blind
10
10
  from .centralised import Centralized
11
11
  from .available_actions_mask import AvailableActionsMask
12
12
  from .delayed_rewards import DelayedReward
13
+ from .potential_shaping import PotentialShaping
13
14
 
14
15
  __all__ = [
15
16
  "RLEnvWrapper",
@@ -26,4 +27,5 @@ __all__ = [
26
27
  "Blind",
27
28
  "Centralized",
28
29
  "DelayedReward",
30
+ "PotentialShaping",
29
31
  ]
@@ -0,0 +1,54 @@
1
+ from abc import abstractmethod, ABC
2
+ from .rlenv_wrapper import RLEnvWrapper
3
+ from marlenv import Space, MARLEnv, Observation
4
+ from typing import TypeVar, Optional
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+
8
+ from dataclasses import dataclass
9
+
10
+ A = TypeVar("A", bound=Space)
11
+
12
+
13
+ @dataclass
14
+ class PotentialShaping(RLEnvWrapper[A], ABC):
15
+ """
16
+ Potential shaping for the Laser Learning Environment (LLE).
17
+
18
+ https://people.eecs.berkeley.edu/~pabbeel/cs287-fa09/readings/NgHaradaRussell-shaping-ICML1999.pdf
19
+ """
20
+
21
+ gamma: float
22
+
23
+ def __init__(
24
+ self,
25
+ env: MARLEnv,
26
+ gamma: float = 1.0,
27
+ extra_shape: Optional[tuple[int]] = None,
28
+ ):
29
+ super().__init__(env, extra_shape=extra_shape)
30
+ self.gamma = gamma
31
+ self._current_potential = self.compute_potential()
32
+
33
+ def add_extras(self, obs: Observation) -> Observation:
34
+ """Add the extras related to potential shaping. Does nothing by default."""
35
+ return obs
36
+
37
+ def reset(self):
38
+ obs, state = super().reset()
39
+ self._current_potential = self.compute_potential()
40
+ return self.add_extras(obs), state
41
+
42
+ def step(self, actions):
43
+ prev_potential = self._current_potential
44
+ step = super().step(actions)
45
+
46
+ self._current_potential = self.compute_potential()
47
+ shaped_reward = self.gamma * self._current_potential - prev_potential
48
+ step.obs = self.add_extras(step.obs)
49
+ step.reward += shaped_reward
50
+ return step
51
+
52
+ @abstractmethod
53
+ def compute_potential(self) -> float | npt.NDArray[np.float32]:
54
+ """Compute the potential of the current state of the environment."""
@@ -1,7 +1,6 @@
1
1
  from typing import Optional, Sequence
2
2
  from typing_extensions import TypeVar
3
3
  from dataclasses import dataclass
4
- from abc import ABC
5
4
  import numpy as np
6
5
 
7
6
  from marlenv.models import MARLEnv, Space, DiscreteSpace, State
@@ -11,7 +10,7 @@ AS = TypeVar("AS", bound=Space, default=Space)
11
10
 
12
11
 
13
12
  @dataclass
14
- class RLEnvWrapper(MARLEnv[AS], ABC):
13
+ class RLEnvWrapper(MARLEnv[AS]):
15
14
  """Parent class for all RLEnv wrappers"""
16
15
 
17
16
  wrapped: MARLEnv[AS]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: multi-agent-rlenv
3
- Version: 3.5.0
3
+ Version: 3.5.2
4
4
  Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
5
  Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
6
  Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
@@ -1,4 +1,4 @@
1
- marlenv/__init__.py,sha256=PDJGtS3Psbiif5tc-PyUCfIju4pSlc6ukb-if6rbOv0,3656
1
+ marlenv/__init__.py,sha256=UoZATsYMuKlnHyYdIRX7eQ6mGcmMww-tqX3uCyWVqRA,3656
2
2
  marlenv/env_builder.py,sha256=RJoHJLYAUE1ausAoJiRC3fUxyxpH1WRJf7Sdm2ml-uk,5517
3
3
  marlenv/env_pool.py,sha256=nCEBkGQU62fcvCAANyAqY8gCFjYlVnSCg-V3Fhx00yc,933
4
4
  marlenv/exceptions.py,sha256=gJUC_2rVAvOfK_ypVFc7Myh-pIfSU3To38VBVS_0rZA,1179
@@ -11,16 +11,16 @@ marlenv/adapters/pettingzoo_adapter.py,sha256=w9Ta-X4L_6ZXdDGmREOdcU0vpLR8lGP__s
11
11
  marlenv/adapters/pymarl_adapter.py,sha256=2s7EY31s1hrml3q-BBaXo_eDMXTjkebozZPvzsgrb9c,3353
12
12
  marlenv/adapters/smac_adapter.py,sha256=8uWC7YKsaSXeTS8AUhpGOKvrWMbVEQT2-pml5BaFUB0,8343
13
13
  marlenv/models/__init__.py,sha256=uihmRs71Gg5z7Bvau_xtaQVg7xEtX8sTzi74bIHL5P0,443
14
- marlenv/models/env.py,sha256=KB3-LcvIbGG-78L8ZavfjKykoO85FvhZjs5lJQKvxE0,7813
15
- marlenv/models/episode.py,sha256=IF3-8YV0tHsIjTYZUOlHmX_IyjnrrzTWk-HPk_mwcR4,15100
14
+ marlenv/models/env.py,sha256=BG1iVHxGD_p827mF0ewyOBn6wU2gtFsHLW1b4UtW-V0,7841
15
+ marlenv/models/episode.py,sha256=IKPLuDVlz85Be6zrC21gyautjqRkEApS4fgRqQR52s0,15190
16
16
  marlenv/models/observation.py,sha256=kAmh1hIoC2TGrZlGVzV0y4TXXCSrI7gcmG0raeoncYk,3153
17
17
  marlenv/models/spaces.py,sha256=v7jnhPfj7vq7DFFJpSbQEYe4NGLLlj_bj2pzvvSBX7Y,7777
18
18
  marlenv/models/state.py,sha256=958PXTHadi3gtRnhGgcGtqBnF44R11kdcx62NN2gwxA,1717
19
- marlenv/models/step.py,sha256=LKGAV2Cu-k9Gz1hwrfvGx51l8axtQRqDE9WVL5r2A1Q,3037
19
+ marlenv/models/step.py,sha256=00PhD_ccdCIYAY1SVJdJU91weU0Y_tNIJwK16TN_53I,3056
20
20
  marlenv/models/transition.py,sha256=UkJVRNxZoyRkjE7YmKtUf_4xA7cOEh20O60dTldbvys,5070
21
21
  marlenv/utils/__init__.py,sha256=C3qhvkVwctBP8mG3G5nkAZ5DKfErVRkdbHo7oeWVsM0,209
22
22
  marlenv/utils/schedule.py,sha256=slhtpQiBHSUNyPmSkKb2yBgiHJqPhoPxa33GxvyV8Jc,8565
23
- marlenv/wrappers/__init__.py,sha256=wl23NUYcl0vPJb2QLpe4Xj8ZocUIOarAZX8CgWqdSQE,808
23
+ marlenv/wrappers/__init__.py,sha256=uV00m0jysZBgOW-TvRekis-gsAKPeR51P3HsuRZKxG8,880
24
24
  marlenv/wrappers/agent_id_wrapper.py,sha256=9qHV3LMQ4AjcDCSuvQhz5h9hUf7Xtrdi2sIxmNZk5NA,1126
25
25
  marlenv/wrappers/available_actions_mask.py,sha256=OMyt2KntsR8JA2RuRgvwdzqzPe-_H-KKkbUUJfe_mks,1404
26
26
  marlenv/wrappers/available_actions_wrapper.py,sha256=_HRl9zsjJgSrLgVuT-BjpnnfrfM8ic6wBUWlg67uCx4,926
@@ -30,10 +30,11 @@ marlenv/wrappers/delayed_rewards.py,sha256=P8az9rYmu67OzL1ZEFqfTQcCxRI_AXKXrKUBQ
30
30
  marlenv/wrappers/last_action_wrapper.py,sha256=QVepSLcWExqACwKvAM0G2LALapSoWdd7YHmah2LZ3vE,2603
31
31
  marlenv/wrappers/paddings.py,sha256=0aAi7RP1yL8I5mR4Oxzl9-itKys88mgsPjqe7q-frbk,2024
32
32
  marlenv/wrappers/penalty_wrapper.py,sha256=3YBoUV6ETksZ8tFEOq1WYXvPs3ejMAehE6-QA8e4JOE,864
33
- marlenv/wrappers/rlenv_wrapper.py,sha256=9k0bH5oHfnxnQYXL5PIRVbJfiTP_jnh2c9stMC8bA1o,3018
33
+ marlenv/wrappers/potential_shaping.py,sha256=T_QvnmWReCgpyoInxRw2UXbmdvcBD5U-vV1ledLG7y8,1661
34
+ marlenv/wrappers/rlenv_wrapper.py,sha256=S6G1VjFklTEzU6bj0AXrTDXnsTQJARq8VB4uUH6AXe4,2993
34
35
  marlenv/wrappers/time_limit.py,sha256=GxbxcbfFyuVg14ylQU2C_cjmV9q4uDAt5wepfgX_PyM,3976
35
36
  marlenv/wrappers/video_recorder.py,sha256=ucBQSNRPqDr-2mYxrTCqlrWcxSWtSJ7XlRC9-LdukBM,2535
36
- multi_agent_rlenv-3.5.0.dist-info/METADATA,sha256=XiqIH0A9YzbwckC_1hf8KbOI99LRDyYY--rRqAoMxZE,4897
37
- multi_agent_rlenv-3.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
38
- multi_agent_rlenv-3.5.0.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
39
- multi_agent_rlenv-3.5.0.dist-info/RECORD,,
37
+ multi_agent_rlenv-3.5.2.dist-info/METADATA,sha256=QjQkN0ZJsbaa-GyP7fAs4JFSTJkEUBLrIV0zCGPUvrc,4897
38
+ multi_agent_rlenv-3.5.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
39
+ multi_agent_rlenv-3.5.2.dist-info/licenses/LICENSE,sha256=_eeiGVoIJ7kYt6l1zbIvSBQppTnw0mjnYk1lQ4FxEjE,1074
40
+ multi_agent_rlenv-3.5.2.dist-info/RECORD,,