PyPI - continual-foragax - Versions diffs - 0.28.1__py3-none-any.whl → 0.30.0__py3-none-any.whl - Mend

continual-foragax 0.28.1py3-none-any.whl → 0.30.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{continual_foragax-0.28.1.dist-info → continual_foragax-0.30.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: continual-foragax
-Version: 0.28.1
+Version: 0.30.0
 Summary: A continual reinforcement learning benchmark
 Author-email: Steven Tang <stang5@ualberta.ca>
 Requires-Python: >=3.8

{continual_foragax-0.28.1.dist-info → continual_foragax-0.30.0.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 foragax/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 foragax/colors.py,sha256=rqNPiywP4Nvr0POhsGpasRk-nMMTS3DOwFRUgperlUk,2065
-foragax/env.py,sha256=8V8HHfOBBB6adW3pGqEoO4YFWl2CzeKbgRyZ2W9Rpl4,25682
-foragax/objects.py,sha256=FCLZ-8d7qq9VMTG6G-TaRt842-sjgB0-DH0IoHwwngI,9503
-foragax/registry.py,sha256=HysNaZs1tcbAcr53l8Cb2NeZ-_FmE6OpUe_zIks-ObM,15089
+foragax/env.py,sha256=4NZ5JsUGjAepmzw2uxu5_ikyVZnZ7vazy062Xzx22Zg,27481
+foragax/objects.py,sha256=0vb_iyr62BKaIxiE3JwtRhZhFE3VFM6PdxDZTaDtv24,10410
+foragax/registry.py,sha256=Dxg6cWIPwg91fNrCPxADJv35u6jFg_8dI5iTpCMFEFA,15229
 foragax/rendering.py,sha256=bms7wvBZTofoR-K-2QD2Ggeed7Viw8uwAEiEpEM3eSo,2768
 foragax/weather.py,sha256=KNAiwuFz8V__6G75vZIWQKPocLzXqxXn-Vt4TbHIpcA,1258
 foragax/data/ECA_non-blended_custom/TG_SOUID100897.txt,sha256=N7URbX6VlCZvCboUogYjMzy1I-0cfNPOn0QTLSHHfQ0,1776751
@@ -128,8 +128,8 @@ foragax/data/ECA_non-blended_custom/TG_SOUID156887.txt,sha256=juzTPgJoJxfqmZkorL
 foragax/data/ECA_non-blended_custom/elements.txt,sha256=OtcUBoDAHxuln79BPKGu0tsQxG_5G2BfAX3Ck130kEA,4507
 foragax/data/ECA_non-blended_custom/metadata.txt,sha256=nudnmOCy5cPJfSXt_IjyX0S5-T7NkCZREICZSimqeqc,48260
 foragax/data/ECA_non-blended_custom/sources.txt,sha256=1j3lSmINAoCMqPqFrHfZJriOz6sTYZNOhXzUwvTLas0,20857
-continual_foragax-0.28.1.dist-info/METADATA,sha256=dO9WXb8d6s6PWMklUwrb_EGYsKzeU3rT-FyqRZtQRkQ,4897
-continual_foragax-0.28.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-continual_foragax-0.28.1.dist-info/entry_points.txt,sha256=Qiu6iE_XudrDO_bVAMeA435h4PO9ourt8huvSHiuMPc,41
-continual_foragax-0.28.1.dist-info/top_level.txt,sha256=-z3SDK6RfLIcLI24n8rdbeFzlVY3hunChzlu-v1Fncs,8
-continual_foragax-0.28.1.dist-info/RECORD,,
+continual_foragax-0.30.0.dist-info/METADATA,sha256=d0xeSz0BvDVe1lOUGdhVyqnbkkYN7dNW4BPfCnDSZfQ,4897
+continual_foragax-0.30.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+continual_foragax-0.30.0.dist-info/entry_points.txt,sha256=Qiu6iE_XudrDO_bVAMeA435h4PO9ourt8huvSHiuMPc,41
+continual_foragax-0.30.0.dist-info/top_level.txt,sha256=-z3SDK6RfLIcLI24n8rdbeFzlVY3hunChzlu-v1Fncs,8
+continual_foragax-0.30.0.dist-info/RECORD,,

foragax/env.py CHANGED Viewed

@@ -61,6 +61,7 @@ class EnvState(environment.EnvState):
     object_grid: jax.Array
     biome_grid: jax.Array
     time: int
+    digestion_buffer: jax.Array
 class ForagaxEnv(environment.Environment):
@@ -102,11 +103,6 @@ class ForagaxEnv(environment.Environment):
         if self.nowrap and not self.full_world:
             objects = objects + (PADDING,)
         self.objects = objects
-        self.weather_object = None
-        for o in objects:
-            if isinstance(o, WeatherObject):
-                self.weather_object = o
-                break
         # JIT-compatible versions of object and biome properties
         self.object_ids = jnp.arange(len(objects))
@@ -117,6 +113,13 @@ class ForagaxEnv(environment.Environment):
         self.reward_fns = [o.reward for o in objects]
         self.regen_delay_fns = [o.regen_delay for o in objects]
+        self.reward_delay_fns = [o.reward_delay for o in objects]
+        # Compute reward steps per object (using max_reward_delay attribute)
+        object_max_reward_delay = jnp.array([o.max_reward_delay for o in objects])
+        self.max_reward_delay = (
+            int(jnp.max(object_max_reward_delay)) + 1 if len(objects) > 0 else 0
+        )
         self.biome_object_frequencies = jnp.array(
             [b.object_frequencies for b in biomes]
@@ -237,12 +240,36 @@ class ForagaxEnv(environment.Environment):
         # 2. HANDLE COLLISIONS AND REWARDS
         obj_at_pos = current_objects[pos[1], pos[0]]
-        key, subkey = jax.random.split(key)
-        reward = jax.lax.switch(obj_at_pos, self.reward_fns, state.time, subkey)
         is_collectable = self.object_collectable[obj_at_pos]
+        should_collect = is_collectable & (obj_at_pos > 0)
+        # Handle digestion: add reward to buffer if collected
+        digestion_buffer = state.digestion_buffer
+        key, reward_subkey = jax.random.split(key)
+        object_reward = jax.lax.switch(
+            obj_at_pos, self.reward_fns, state.time, reward_subkey
+        )
+        key, digestion_subkey = jax.random.split(key)
+        reward_delay = jax.lax.switch(
+            obj_at_pos, self.reward_delay_fns, state.time, digestion_subkey
+        )
+        reward = jnp.where(should_collect & (reward_delay == 0), object_reward, 0.0)
+        if self.max_reward_delay > 0:
+            # Add delayed rewards to buffer
+            digestion_buffer = jax.lax.cond(
+                should_collect & (reward_delay > 0),
+                lambda: digestion_buffer.at[
+                    (state.time + reward_delay) % self.max_reward_delay
+                ].add(object_reward),
+                lambda: digestion_buffer,
+            )
+            # Deliver current rewards
+            current_index = state.time % self.max_reward_delay
+            reward += digestion_buffer[current_index]
+            digestion_buffer = digestion_buffer.at[current_index].set(0.0)
         # 3. HANDLE OBJECT COLLECTION AND RESPAWNING
-        key, subkey, rand_key = jax.random.split(key, 3)
+        key, regen_subkey, rand_key = jax.random.split(key, 3)
         # Decrement timers (stored as negative values)
         is_timer = state.object_grid < 0
@@ -252,7 +279,7 @@ class ForagaxEnv(environment.Environment):
         # Collect object: set a timer
         regen_delay = jax.lax.switch(
-            obj_at_pos, self.regen_delay_fns, state.time, subkey
+            obj_at_pos, self.regen_delay_fns, state.time, regen_subkey
         )
         encoded_timer = obj_at_pos - ((regen_delay + 1) * num_obj_types)
@@ -301,10 +328,13 @@ class ForagaxEnv(environment.Environment):
         )
         info = {"discount": self.discount(state, params)}
-        if self.weather_object is not None:
-            info["temperature"] = get_temperature(
-                self.weather_object.rewards, state.time, self.weather_object.repeat
-            )
+        temperatures = jnp.zeros(len(self.objects))
+        for obj_index, obj in enumerate(self.objects):
+            if isinstance(obj, WeatherObject):
+                temperatures = temperatures.at[obj_index].set(
+                    get_temperature(obj.rewards, state.time, obj.repeat)
+                )
+        info["temperatures"] = temperatures
         info["biome_id"] = state.biome_grid[pos[1], pos[0]]
         info["object_collected_id"] = jax.lax.select(should_collect, obj_at_pos, -1)
@@ -314,6 +344,7 @@ class ForagaxEnv(environment.Environment):
             object_grid=object_grid,
             biome_grid=state.biome_grid,
             time=state.time + 1,
+            digestion_buffer=digestion_buffer,
         )
         done = self.is_terminal(state, params)
@@ -352,6 +383,7 @@ class ForagaxEnv(environment.Environment):
             object_grid=object_grid,
             biome_grid=biome_grid,
             time=0,
+            digestion_buffer=jnp.zeros((self.max_reward_delay,)),
         )
         return self.get_obs(state, params), state
@@ -412,6 +444,12 @@ class ForagaxEnv(environment.Environment):
                     int,
                 ),
                 "time": spaces.Discrete(params.max_steps_in_episode),
+                "digestion_buffer": spaces.Box(
+                    -jnp.inf,
+                    jnp.inf,
+                    (self.max_reward_delay,),
+                    float,
+                ),
             }
         )

foragax/objects.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import abc
-from typing import Tuple
+from typing import Optional, Tuple
 import jax
 import jax.numpy as jnp
@@ -17,12 +17,14 @@ class BaseForagaxObject:
         collectable: bool = False,
         color: Tuple[int, int, int] = (0, 0, 0),
         random_respawn: bool = False,
+        max_reward_delay: int = 0,
     ):
         self.name = name
         self.blocking = blocking
         self.collectable = collectable
         self.color = color
         self.random_respawn = random_respawn
+        self.max_reward_delay = max_reward_delay
     @abc.abstractmethod
     def reward(self, clock: int, rng: jax.Array) -> float:
@@ -30,8 +32,8 @@ class BaseForagaxObject:
         raise NotImplementedError
     @abc.abstractmethod
-    def regen_delay(self, clock: int, rng: jax.Array) -> int:
-        """Regeneration delay function."""
+    def reward_delay(self, clock: int, rng: jax.Array) -> int:
+        """Reward delay function."""
         raise NotImplementedError
@@ -47,10 +49,17 @@ class DefaultForagaxObject(BaseForagaxObject):
         regen_delay: Tuple[int, int] = (10, 100),
         color: Tuple[int, int, int] = (255, 255, 255),
         random_respawn: bool = False,
+        reward_delay: int = 0,
+        max_reward_delay: Optional[int] = None,
     ):
-        super().__init__(name, blocking, collectable, color, random_respawn)
+        if max_reward_delay is None:
+            max_reward_delay = reward_delay
+        super().__init__(
+            name, blocking, collectable, color, random_respawn, max_reward_delay
+        )
         self.reward_val = reward
         self.regen_delay_range = regen_delay
+        self.reward_delay_val = reward_delay
     def reward(self, clock: int, rng: jax.Array) -> float:
         """Default reward function."""
@@ -61,6 +70,10 @@ class DefaultForagaxObject(BaseForagaxObject):
         min_delay, max_delay = self.regen_delay_range
         return jax.random.randint(rng, (), min_delay, max_delay)
+    def reward_delay(self, clock: int, rng: jax.Array) -> int:
+        """Default reward delay function."""
+        return self.reward_delay_val
 class NormalRegenForagaxObject(DefaultForagaxObject):
     """Object with regeneration delay from a normal distribution."""
@@ -74,6 +87,8 @@ class NormalRegenForagaxObject(DefaultForagaxObject):
         std_regen_delay: int = 1,
         color: Tuple[int, int, int] = (0, 0, 0),
         random_respawn: bool = False,
+        reward_delay: int = 0,
+        max_reward_delay: Optional[int] = None,
     ):
         super().__init__(
             name=name,
@@ -82,6 +97,8 @@ class NormalRegenForagaxObject(DefaultForagaxObject):
             regen_delay=(mean_regen_delay, mean_regen_delay),
             color=color,
             random_respawn=random_respawn,
+            reward_delay=reward_delay,
+            max_reward_delay=max_reward_delay,
         )
         self.mean_regen_delay = mean_regen_delay
         self.std_regen_delay = std_regen_delay
@@ -105,6 +122,8 @@ class WeatherObject(NormalRegenForagaxObject):
         std_regen_delay: int = 1,
         color: Tuple[int, int, int] = (0, 0, 0),
         random_respawn: bool = False,
+        reward_delay: int = 0,
+        max_reward_delay: Optional[int] = None,
     ):
         super().__init__(
             name=name,
@@ -113,6 +132,8 @@ class WeatherObject(NormalRegenForagaxObject):
             std_regen_delay=std_regen_delay,
             color=color,
             random_respawn=random_respawn,
+            reward_delay=reward_delay,
+            max_reward_delay=max_reward_delay,
         )
         self.rewards = rewards
         self.repeat = repeat
@@ -319,6 +340,7 @@ def create_weather_objects(
     multiplier: float = 1.0,
     same_color: bool = False,
     random_respawn: bool = False,
+    reward_delay: int = 0,
 ):
     """Create HOT and COLD WeatherObject instances using the specified file.
@@ -348,6 +370,7 @@ def create_weather_objects(
         multiplier=multiplier,
         color=hot_color,
         random_respawn=random_respawn,
+        reward_delay=reward_delay,
     )
     cold_color = hot_color if same_color else (0, 255, 255)
@@ -358,6 +381,7 @@ def create_weather_objects(
         multiplier=-multiplier,
         color=cold_color,
         random_respawn=random_respawn,
+        reward_delay=reward_delay,
     )
     return hot, cold

foragax/registry.py CHANGED Viewed

@@ -348,7 +348,8 @@ def make(
     observation_type: str = "color",
     aperture_size: Optional[Tuple[int, int]] = (5, 5),
     file_index: int = 0,
-    nowrap: Optional[bool] = None,
+    repeat: int = 500,
+    reward_delay: int = 0,
     **kwargs: Any,
 ) -> ForagaxEnv:
     """Create a Foragax environment.
@@ -358,9 +359,9 @@ def make(
         observation_type: The type of observation to use. One of "object", "rgb", or "color".
         aperture_size: The size of the agent's observation aperture. If -1, full world observation.
             If None, the default for the environment is used.
-        file_index: File index for weather objects. nowrap: If True, disables
-        wrapping around environment boundaries. If None, uses defaults per
-        environment.
+        file_index: File index for weather objects.
+        repeat: How many steps each temperature value repeats for (weather environments).
+        reward_delay: Number of steps required to digest food items (weather environments).
         **kwargs: Additional keyword arguments to pass to the ForagaxEnv constructor.
     Returns:
@@ -376,8 +377,6 @@ def make(
         else:
             aperture_size = (aperture_size, aperture_size)
     config["aperture_size"] = aperture_size
-    if nowrap is not None:
-        config["nowrap"] = nowrap
     # Handle special size and biome configurations
     if env_id in (
@@ -460,9 +459,16 @@ def make(
             "ForagaxWeather-v4",
             "ForagaxWeather-v5",
         )
-        random_respawn = env_id in ("ForagaxWeather-v4", "ForagaxWeather-v5")
+        random_respawn = env_id in (
+            "ForagaxWeather-v4",
+            "ForagaxWeather-v5",
+        )
         hot, cold = create_weather_objects(
-            file_index=file_index, same_color=same_color, random_respawn=random_respawn
+            file_index=file_index,
+            repeat=repeat,
+            same_color=same_color,
+            random_respawn=random_respawn,
+            reward_delay=reward_delay,
         )
         config["objects"] = (hot, cold)

{continual_foragax-0.28.1.dist-info → continual_foragax-0.30.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{continual_foragax-0.28.1.dist-info → continual_foragax-0.30.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{continual_foragax-0.28.1.dist-info → continual_foragax-0.30.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

continual-foragax 0.28.1__py3-none-any.whl → 0.30.0__py3-none-any.whl

continual-foragax 0.28.1py3-none-any.whl → 0.30.0py3-none-any.whl