multi-agent-rlenv 3.5.0__tar.gz → 3.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/PKG-INFO +1 -1
  2. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/__init__.py +1 -1
  3. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/step.py +1 -1
  4. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/__init__.py +2 -0
  5. multi_agent_rlenv-3.5.1/src/marlenv/wrappers/potential_shaping.py +49 -0
  6. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/rlenv_wrapper.py +1 -2
  7. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_wrappers.py +33 -0
  8. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/.github/workflows/ci.yaml +0 -0
  9. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/.github/workflows/docs.yaml +0 -0
  10. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/.gitignore +0 -0
  11. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/LICENSE +0 -0
  12. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/README.md +0 -0
  13. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/pyproject.toml +0 -0
  14. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/__init__.py +0 -0
  15. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/gym_adapter.py +0 -0
  16. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/overcooked_adapter.py +0 -0
  17. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/pettingzoo_adapter.py +0 -0
  18. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/pymarl_adapter.py +0 -0
  19. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/adapters/smac_adapter.py +0 -0
  20. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/env_builder.py +0 -0
  21. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/env_pool.py +0 -0
  22. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/exceptions.py +0 -0
  23. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/mock_env.py +0 -0
  24. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/__init__.py +0 -0
  25. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/env.py +0 -0
  26. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/episode.py +0 -0
  27. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/observation.py +0 -0
  28. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/spaces.py +0 -0
  29. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/state.py +0 -0
  30. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/models/transition.py +0 -0
  31. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/py.typed +0 -0
  32. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/utils/__init__.py +0 -0
  33. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/utils/schedule.py +0 -0
  34. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
  35. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/available_actions_mask.py +0 -0
  36. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
  37. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/blind_wrapper.py +0 -0
  38. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/centralised.py +0 -0
  39. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/delayed_rewards.py +0 -0
  40. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
  41. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/paddings.py +0 -0
  42. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
  43. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/time_limit.py +0 -0
  44. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/src/marlenv/wrappers/video_recorder.py +0 -0
  45. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/__init__.py +0 -0
  46. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_adapters.py +0 -0
  47. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_episode.py +0 -0
  48. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_models.py +0 -0
  49. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_pool.py +0 -0
  50. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_schedules.py +0 -0
  51. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_serialization.py +0 -0
  52. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/test_spaces.py +0 -0
  53. {multi_agent_rlenv-3.5.0 → multi_agent_rlenv-3.5.1}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: multi-agent-rlenv
3
- Version: 3.5.0
3
+ Version: 3.5.1
4
4
  Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
5
  Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
6
  Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
62
62
  If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
63
63
  """
64
64
 
65
- __version__ = "3.5.0"
65
+ __version__ = "3.5.1"
66
66
 
67
67
  from . import models
68
68
  from .models import (
@@ -39,7 +39,7 @@ class Step:
39
39
  case int() | float():
40
40
  self.reward = np.array([reward], dtype=np.float32)
41
41
  case np.ndarray():
42
- self.reward = reward
42
+ self.reward = reward.astype(np.float32)
43
43
  case other:
44
44
  # We assume this is a sequence of some sort
45
45
  self.reward = np.array(other, dtype=np.float32)
@@ -10,6 +10,7 @@ from .blind_wrapper import Blind
10
10
  from .centralised import Centralized
11
11
  from .available_actions_mask import AvailableActionsMask
12
12
  from .delayed_rewards import DelayedReward
13
+ from .potential_shaping import PotentialShaping
13
14
 
14
15
  __all__ = [
15
16
  "RLEnvWrapper",
@@ -26,4 +27,5 @@ __all__ = [
26
27
  "Blind",
27
28
  "Centralized",
28
29
  "DelayedReward",
30
+ "PotentialShaping",
29
31
  ]
@@ -0,0 +1,49 @@
1
+ from abc import abstractmethod, ABC
2
+ from .rlenv_wrapper import RLEnvWrapper
3
+ from marlenv import Space, MARLEnv, Observation
4
+ from typing import TypeVar, Optional
5
+
6
+ A = TypeVar("A", bound=Space)
7
+
8
+
9
+ class PotentialShaping(RLEnvWrapper[A], ABC):
10
+ """
11
+ Potential shaping for the Laser Learning Environment (LLE).
12
+
13
+ https://people.eecs.berkeley.edu/~pabbeel/cs287-fa09/readings/NgHaradaRussell-shaping-ICML1999.pdf
14
+ """
15
+
16
+ gamma: float
17
+
18
+ def __init__(
19
+ self,
20
+ env: MARLEnv,
21
+ gamma: float = 1.0,
22
+ extra_shape: Optional[tuple[int]] = None,
23
+ ):
24
+ super().__init__(env, extra_shape=extra_shape)
25
+ self.gamma = gamma
26
+ self.current_potential = self.compute_potential()
27
+
28
+ def add_extras(self, obs: Observation) -> Observation:
29
+ """Add the extras related to potential shaping. Does nothing by default."""
30
+ return obs
31
+
32
+ def reset(self):
33
+ obs, state = super().reset()
34
+ self.current_potential = self.compute_potential()
35
+ return self.add_extras(obs), state
36
+
37
+ def step(self, actions):
38
+ phi_t = self.current_potential
39
+ step = super().step(actions)
40
+
41
+ self.current_potential = self.compute_potential()
42
+ shaped_reward = self.gamma * self.current_potential - phi_t
43
+ step.obs = self.add_extras(step.obs)
44
+ step.reward += shaped_reward
45
+ return step
46
+
47
+ @abstractmethod
48
+ def compute_potential(self) -> float:
49
+ """Compute the potential of the current state of the environment."""
@@ -1,7 +1,6 @@
1
1
  from typing import Optional, Sequence
2
2
  from typing_extensions import TypeVar
3
3
  from dataclasses import dataclass
4
- from abc import ABC
5
4
  import numpy as np
6
5
 
7
6
  from marlenv.models import MARLEnv, Space, DiscreteSpace, State
@@ -11,7 +10,7 @@ AS = TypeVar("AS", bound=Space, default=Space)
11
10
 
12
11
 
13
12
  @dataclass
14
- class RLEnvWrapper(MARLEnv[AS], ABC):
13
+ class RLEnvWrapper(MARLEnv[AS]):
15
14
  """Parent class for all RLEnv wrappers"""
16
15
 
17
16
  wrapped: MARLEnv[AS]
@@ -357,3 +357,36 @@ def test_delayed_rewards_from_builder():
357
357
  for end_game in range(delay + 1, delay * 2):
358
358
  env = Builder(DiscreteMockEnv(reward_step=10, end_game=end_game, n_agents=2)).delay_rewards(delay).build()
359
359
  _test_delayed_rewards(env)
360
+
361
+
362
+ def test_potential_shaping():
363
+ from marlenv.wrappers.potential_shaping import PotentialShaping
364
+
365
+ class PS(PotentialShaping):
366
+ def __init__(self, env: MARLEnv):
367
+ self.phi = 10
368
+ super().__init__(env)
369
+
370
+ def reset(self):
371
+ self.phi = 10
372
+ return super().reset()
373
+
374
+ def compute_potential(self) -> float:
375
+ return self.phi
376
+
377
+ def step(self, actions):
378
+ self.phi = max(0, self.phi - 1)
379
+ return super().step(actions)
380
+
381
+ EP_LENGTH = 20
382
+ env = PS(DiscreteMockEnv(reward_step=0, end_game=EP_LENGTH))
383
+ env.reset()
384
+ step = None
385
+
386
+ for i in range(10):
387
+ step = env.random_step()
388
+ assert step.reward.item() == -1
389
+
390
+ for i in range(10):
391
+ step = env.random_step()
392
+ assert step.reward.item() == 0