multi-agent-rlenv 3.5.4__tar.gz → 3.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/PKG-INFO +1 -1
  2. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/__init__.py +1 -1
  3. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/episode.py +9 -7
  4. multi_agent_rlenv-3.5.5/src/marlenv/utils/__init__.py +12 -0
  5. multi_agent_rlenv-3.5.5/src/marlenv/utils/cached_property_collector.py +17 -0
  6. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/__init__.py +2 -0
  7. multi_agent_rlenv-3.5.5/src/marlenv/wrappers/action_randomizer.py +17 -0
  8. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_episode.py +31 -2
  9. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_schedules.py +1 -1
  10. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_wrappers.py +1 -1
  11. multi_agent_rlenv-3.5.4/src/marlenv/utils/__init__.py +0 -10
  12. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/.github/workflows/ci.yaml +0 -0
  13. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/.github/workflows/docs.yaml +0 -0
  14. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/.gitignore +0 -0
  15. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/LICENSE +0 -0
  16. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/README.md +0 -0
  17. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/pyproject.toml +0 -0
  18. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/adapters/__init__.py +0 -0
  19. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/adapters/gym_adapter.py +0 -0
  20. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/adapters/overcooked_adapter.py +0 -0
  21. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/adapters/pettingzoo_adapter.py +0 -0
  22. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/adapters/pymarl_adapter.py +0 -0
  23. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/adapters/smac_adapter.py +0 -0
  24. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/env_builder.py +0 -0
  25. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/env_pool.py +0 -0
  26. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/exceptions.py +0 -0
  27. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/mock_env.py +0 -0
  28. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/__init__.py +0 -0
  29. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/env.py +0 -0
  30. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/observation.py +0 -0
  31. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/spaces.py +0 -0
  32. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/state.py +0 -0
  33. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/step.py +0 -0
  34. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/models/transition.py +0 -0
  35. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/py.typed +0 -0
  36. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/utils/schedule.py +0 -0
  37. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/agent_id_wrapper.py +0 -0
  38. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/available_actions_mask.py +0 -0
  39. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/available_actions_wrapper.py +0 -0
  40. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/blind_wrapper.py +0 -0
  41. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/centralised.py +0 -0
  42. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/delayed_rewards.py +0 -0
  43. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/last_action_wrapper.py +0 -0
  44. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/paddings.py +0 -0
  45. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/penalty_wrapper.py +0 -0
  46. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/potential_shaping.py +0 -0
  47. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/rlenv_wrapper.py +0 -0
  48. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/time_limit.py +0 -0
  49. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/src/marlenv/wrappers/video_recorder.py +0 -0
  50. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/__init__.py +0 -0
  51. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_adapters.py +0 -0
  52. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_models.py +0 -0
  53. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_pool.py +0 -0
  54. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_serialization.py +0 -0
  55. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/test_spaces.py +0 -0
  56. {multi_agent_rlenv-3.5.4 → multi_agent_rlenv-3.5.5}/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: multi-agent-rlenv
3
- Version: 3.5.4
3
+ Version: 3.5.5
4
4
  Summary: A strongly typed Multi-Agent Reinforcement Learning framework
5
5
  Project-URL: repository, https://github.com/yamoling/multi-agent-rlenv
6
6
  Author-email: Yannick Molinghen <yannick.molinghen@ulb.be>
@@ -62,7 +62,7 @@ print(env.extras_shape) # (1, )
62
62
  If you want to create a new environment, you can simply create a class that inherits from `MARLEnv`. If you want to create a wrapper around an existing `MARLEnv`, you probably want to subclass `RLEnvWrapper` which implements a default behaviour for every method.
63
63
  """
64
64
 
65
- __version__ = "3.5.4"
65
+ __version__ = "3.5.5"
66
66
 
67
67
  from . import models
68
68
  from .models import (
@@ -2,20 +2,22 @@ from dataclasses import dataclass
2
2
  from functools import cached_property
3
3
  from typing import Any, Callable, Optional, Sequence, overload
4
4
 
5
+ import cv2
5
6
  import numpy as np
6
7
  import numpy.typing as npt
7
- import cv2
8
8
 
9
+ from marlenv.exceptions import EnvironmentMismatchException, ReplayMismatchException
10
+ from marlenv.utils import CachedPropertyInvalidator
11
+
12
+ from .env import MARLEnv
9
13
  from .observation import Observation
10
14
  from .state import State
11
15
  from .step import Step
12
16
  from .transition import Transition
13
- from .env import MARLEnv
14
- from marlenv.exceptions import EnvironmentMismatchException, ReplayMismatchException
15
17
 
16
18
 
17
19
  @dataclass
18
- class Episode:
20
+ class Episode(CachedPropertyInvalidator):
19
21
  """Episode model made of observations, actions, rewards, ..."""
20
22
 
21
23
  all_observations: list[npt.NDArray[np.float32]]
@@ -153,12 +155,12 @@ class Episode:
153
155
  """Get the next extra features"""
154
156
  return self.all_extras[1:]
155
157
 
156
- @cached_property
158
+ @property
157
159
  def n_agents(self):
158
160
  """The number of agents in the episode"""
159
161
  return self.all_extras[0].shape[0]
160
162
 
161
- @cached_property
163
+ @property
162
164
  def n_actions(self):
163
165
  """The number of actions"""
164
166
  return len(self.all_available_actions[0][0])
@@ -267,7 +269,7 @@ class Episode:
267
269
  def __len__(self):
268
270
  return self.episode_len
269
271
 
270
- @cached_property
272
+ @property
271
273
  def score(self) -> list[float]:
272
274
  """The episode score (sum of all rewards across all objectives)"""
273
275
  score = []
@@ -0,0 +1,12 @@
1
+ from .cached_property_collector import CachedPropertyCollector, CachedPropertyInvalidator
2
+ from .schedule import ExpSchedule, LinearSchedule, MultiSchedule, RoundedSchedule, Schedule
3
+
4
+ __all__ = [
5
+ "Schedule",
6
+ "LinearSchedule",
7
+ "ExpSchedule",
8
+ "MultiSchedule",
9
+ "RoundedSchedule",
10
+ "CachedPropertyCollector",
11
+ "CachedPropertyInvalidator",
12
+ ]
@@ -0,0 +1,17 @@
1
+ from functools import cached_property
2
+
3
+
4
+ class CachedPropertyCollector(type):
5
+ def __init__(cls, name: str, bases: tuple, namespace: dict):
6
+ super().__init__(name, bases, namespace)
7
+ cls.CACHED_PROPERTY_NAMES = [key for key, value in namespace.items() if isinstance(value, cached_property)]
8
+
9
+
10
+ class CachedPropertyInvalidator(metaclass=CachedPropertyCollector):
11
+ def __init__(self):
12
+ super().__init__()
13
+
14
+ def invalidate_cached_properties(self):
15
+ for key in self.__class__.CACHED_PROPERTY_NAMES:
16
+ if hasattr(self, key):
17
+ delattr(self, key)
@@ -11,6 +11,7 @@ from .centralised import Centralized
11
11
  from .available_actions_mask import AvailableActionsMask
12
12
  from .delayed_rewards import DelayedReward
13
13
  from .potential_shaping import PotentialShaping
14
+ from .action_randomizer import ActionRandomizer
14
15
 
15
16
  __all__ = [
16
17
  "RLEnvWrapper",
@@ -28,4 +29,5 @@ __all__ = [
28
29
  "Centralized",
29
30
  "DelayedReward",
30
31
  "PotentialShaping",
32
+ "ActionRandomizer",
31
33
  ]
@@ -0,0 +1,17 @@
1
+ from .rlenv_wrapper import RLEnvWrapper, AS, MARLEnv
2
+ import numpy as np
3
+
4
+
5
+ class ActionRandomizer(RLEnvWrapper[AS]):
6
+ def __init__(self, env: MARLEnv[AS], p: float):
7
+ super().__init__(env)
8
+ self.p = p
9
+
10
+ def step(self, action):
11
+ if np.random.rand() < self.p:
12
+ action = self.action_space.sample()
13
+ return super().step(action)
14
+
15
+ def seed(self, seed_value: int):
16
+ np.random.seed(seed_value)
17
+ super().seed(seed_value)
@@ -1,7 +1,8 @@
1
1
  import numpy as np
2
- from marlenv.models import Transition, Episode
2
+
3
+ from marlenv import DiscreteMockEnv, Episode, Transition, wrappers
3
4
  from marlenv.exceptions import EnvironmentMismatchException, ReplayMismatchException
4
- from marlenv import wrappers, DiscreteMockEnv
5
+
5
6
  from .utils import generate_episode
6
7
 
7
8
 
@@ -219,3 +220,31 @@ def test_get_images():
219
220
  images = episode.get_images(env, seed=0)
220
221
  # +1 because there is one image for the initial state (after reset)
221
222
  assert len(images) == len(episode) + 1
223
+
224
+
225
+ def test_episode_invalidate_cached_properties():
226
+ env = DiscreteMockEnv(end_game=10)
227
+ obs, state = env.reset()
228
+ episode = Episode.new(obs, state)
229
+ for _ in range(5):
230
+ action = env.sample_action()
231
+ step = env.step(action)
232
+ episode.add(Transition.from_step(obs, state, action, step))
233
+ obs = step.obs
234
+ state = step.state
235
+
236
+ # Access the cached properties
237
+ assert len(episode.states) == 5
238
+ assert len(episode.obs) == 5
239
+
240
+ # Invalidate the cached properties
241
+ episode.invalidate_cached_properties()
242
+ for _ in range(3):
243
+ action = env.sample_action()
244
+ step = env.step(action)
245
+ episode.add(Transition.from_step(obs, state, action, step))
246
+ obs = step.obs
247
+ state = step.state
248
+
249
+ # Access the cached properties again
250
+ assert len(episode.obs) == 8
@@ -1,4 +1,4 @@
1
- from marlenv.utils import Schedule, MultiSchedule
1
+ from marlenv.utils import MultiSchedule, Schedule
2
2
 
3
3
 
4
4
  def is_close(a: float, b: float, tol: float = 1e-6) -> bool:
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
  from marlenv import Builder, DiscreteMOMockEnv, DiscreteMockEnv, MARLEnv
3
- from marlenv.wrappers import Centralized, AvailableActionsMask, TimeLimit, LastAction, DelayedReward
3
+ from marlenv.wrappers import Centralized, AvailableActionsMask, TimeLimit, LastAction, DelayedReward, ActionRandomizer
4
4
  import marlenv
5
5
 
6
6
 
@@ -1,10 +0,0 @@
1
- from .schedule import Schedule, MultiSchedule, RoundedSchedule, LinearSchedule, ExpSchedule
2
-
3
-
4
- __all__ = [
5
- "Schedule",
6
- "LinearSchedule",
7
- "ExpSchedule",
8
- "MultiSchedule",
9
- "RoundedSchedule",
10
- ]