PyPI - dfa-gym - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

dfa-gym 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

dfa_gym/__init__.py +5 -15
dfa_gym/dfa_bisim_env.py +121 -0
dfa_gym/dfa_wrapper.py +185 -52
dfa_gym/env.py +168 -0
dfa_gym/maps/2buttons_2agents.pdf +0 -0
dfa_gym/maps/2rooms_2agents.pdf +0 -0
dfa_gym/maps/4buttons_4agents.pdf +0 -0
dfa_gym/maps/4rooms_4agents.pdf +0 -0
dfa_gym/robot.png +0 -0
dfa_gym/spaces.py +156 -0
dfa_gym/token_env.py +571 -0
dfa_gym/utils.py +266 -0
dfa_gym-0.2.0.dist-info/METADATA +93 -0
dfa_gym-0.2.0.dist-info/RECORD +16 -0
{dfa_gym-0.1.0.dist-info → dfa_gym-0.2.0.dist-info}/WHEEL +1 -1
dfa_gym/dfa_env.py +0 -45
dfa_gym-0.1.0.dist-info/METADATA +0 -11
dfa_gym-0.1.0.dist-info/RECORD +0 -7
{dfa_gym-0.1.0.dist-info → dfa_gym-0.2.0.dist-info}/licenses/LICENSE +0 -0

dfa_gym/__init__.py CHANGED Viewed

@@ -1,16 +1,6 @@
-from dfa_gym.dfa_env import *
+from dfa_gym.token_env import *
+from dfa_gym.dfa_bisim_env import *
 from dfa_gym.dfa_wrapper import *
-from dfa_samplers import RADSampler
-from gymnasium.envs.registration import register
-register(
-    id='DFAEnv-v0',
-    entry_point='dfa_gym.dfa_env:DFAEnv',
-    kwargs = {"sampler": RADSampler(n_tokens=12), "timeout": 75}
-)
-register(
-    id='DFAEnv-v1',
-    entry_point='dfa_gym.dfa_env:DFAEnv'
-)
+from dfa_gym.env import *
+from dfa_gym.spaces import *
+from dfa_gym.utils import *

dfa_gym/dfa_bisim_env.py ADDED Viewed

@@ -0,0 +1,121 @@
+import jax
+import dfax
+import chex
+import jax.numpy as jnp
+from flax import struct
+from functools import partial
+from typing import Tuple, Dict
+from dfa_gym import spaces
+from dfa_gym.env import MultiAgentEnv, State
+from dfax.samplers import DFASampler, RADSampler
+@struct.dataclass
+class DFABisimState(State):
+    dfa_l: dfax.DFAx
+    dfa_r: dfax.DFAx
+    time: int
+class DFABisimEnv(MultiAgentEnv):
+    def __init__(
+        self,
+        sampler: DFASampler = RADSampler(),
+        max_steps_in_episode: int = 100
+    ) -> None:
+        super().__init__(num_agents=1)
+        self.n_agents = self.num_agents
+        self.sampler = sampler
+        self.max_steps_in_episode = max_steps_in_episode
+        self.agents = [f"agent_{i}" for i in range(self.n_agents)]
+        self.action_spaces = {
+            agent: spaces.Discrete(self.sampler.n_tokens)
+            for agent in self.agents
+        }
+        max_dfa_size = self.sampler.max_size
+        n_tokens = self.sampler.n_tokens
+        self.observation_spaces = {
+            agent: spaces.Dict({
+                "graph_l": spaces.Dict({
+                    "node_features": spaces.Box(low=0, high=1, shape=(max_dfa_size, 4), dtype=jnp.uint16),
+                    "edge_features": spaces.Box(low=0, high=1, shape=(max_dfa_size*max_dfa_size, n_tokens + 8), dtype=jnp.uint16),
+                    "edge_index": spaces.Box(low=0, high=max_dfa_size, shape=(2, max_dfa_size*max_dfa_size), dtype=jnp.uint16),
+                    "current_state": spaces.Box(low=0, high=max_dfa_size, shape=(1,), dtype=jnp.uint16),
+                    "n_states": spaces.Box(low=0, high=max_dfa_size, shape=(max_dfa_size,), dtype=jnp.uint16)
+                }),
+                "graph_r": spaces.Dict({
+                    "node_features": spaces.Box(low=0, high=1, shape=(max_dfa_size, 4), dtype=jnp.uint16),
+                    "edge_features": spaces.Box(low=0, high=1, shape=(max_dfa_size*max_dfa_size, n_tokens + 8), dtype=jnp.uint16),
+                    "edge_index": spaces.Box(low=0, high=max_dfa_size, shape=(2, max_dfa_size*max_dfa_size), dtype=jnp.uint16),
+                    "current_state": spaces.Box(low=0, high=max_dfa_size, shape=(1,), dtype=jnp.uint16),
+                    "n_states": spaces.Box(low=0, high=max_dfa_size, shape=(max_dfa_size,), dtype=jnp.uint16)
+                })
+            })
+            for agent in self.agents
+        }
+    @partial(jax.jit, static_argnums=(0,))
+    def reset(
+        self,
+        key: chex.PRNGKey
+    ) -> Tuple[Dict[str, chex.Array], DFABisimState]:
+        def cond_fn(carry):
+            _, dfa_l, dfa_r = carry
+            return dfa_l == dfa_r
+        def body_fn(carry):
+            key, _, _ = carry
+            key, kl, kr = jax.random.split(key, 3)
+            dfa_l = self.sampler.sample(kl)
+            dfa_r = self.sampler.sample(kr)
+            return (key, dfa_l, dfa_r)
+        init_carry = body_fn((key, None, None))
+        _, dfa_l, dfa_r = jax.lax.while_loop(cond_fn, body_fn, init_carry)
+        state = DFABisimState(dfa_l=dfa_l, dfa_r=dfa_r, time=0)
+        obs = self.get_obs(state=state)
+        return {self.agents[0]: obs}, state
+    @partial(jax.jit, static_argnums=(0,))
+    def step_env(
+        self,
+        key: chex.PRNGKey,
+        state: DFABisimState,
+        action: int
+    ) -> Tuple[Dict[str, chex.Array], DFABisimState, Dict[str, float], Dict[str, bool], Dict]:
+        dfa_l = state.dfa_l.advance(action[self.agents[0]]).minimize()
+        dfa_r = state.dfa_r.advance(action[self.agents[0]]).minimize()
+        reward_l = dfa_l.reward(binary=False)
+        reward_r = dfa_r.reward(binary=False)
+        reward = reward_l - reward_r
+        new_state = DFABisimState(
+            dfa_l=dfa_l,
+            dfa_r=dfa_r,
+            time=state.time+1
+        )
+        done = jnp.logical_or(jnp.logical_or(dfa_l.n_states <= 1, dfa_r.n_states <= 1), new_state.time >= self.max_steps_in_episode)
+        obs = self.get_obs(state=new_state)
+        info = {}
+        return {self.agents[0]: obs}, new_state, {self.agents[0]: reward}, {self.agents[0]: done, "__all__": done}, info
+    @partial(jax.jit, static_argnums=(0,))
+    def get_obs(
+        self,
+        state: DFABisimState
+    ) -> Dict[str, chex.Array]:
+        return {
+            "graph_l": state.dfa_l.to_graph(),
+            "graph_r": state.dfa_r.to_graph()
+        }

dfa_gym/dfa_wrapper.py CHANGED Viewed

@@ -1,57 +1,190 @@
-import numpy as np
-import gymnasium as gym
-from gymnasium import spaces
-from dfa_samplers import DFASampler, RADSampler
+import jax
+import dfax
+import chex
+import jax.numpy as jnp
+from flax import struct
+from dfa_gym import spaces
+from functools import partial
+from typing import Tuple, Dict, Callable
+from dfax.utils import list2batch, batch2graph
+from dfa_gym.env import MultiAgentEnv, State
+from dfax.samplers import DFASampler, RADSampler
-from typing import Any
-__all__ = ["DFAWrapper"]
+@struct.dataclass
+class DFAWrapperState(State):
+    dfas: Dict[str, dfax.DFAx]
+    init_dfas: Dict[str, dfax.DFAx]
+    env_obs: chex.Array
+    env_state: State
+class DFAWrapper(MultiAgentEnv):
-class DFAWrapper(gym.Wrapper):
     def __init__(
         self,
-        env_id: str,
-        sampler: DFASampler | None = None,
-        label_f: callable = None,
-        r_agg_f: callable = None
-    ):
-        super().__init__(gym.make(env_id))
-        self.sampler = sampler if sampler is not None else RADSampler()
-        self.label_f = label_f if label_f is not None else lambda obs: np.random.choice(self.sampler.n_tokens)
-        self.r_agg_f = r_agg_f if r_agg_f is not None else lambda _, dfa_reward: dfa_reward
-        self.size_bound = self.sampler.get_size_bound()
-        self.action_space = self.env.action_space
-        self.observation_space = spaces.Dict({
-            "obs": self.env.observation_space,
-            "dfa_obs": spaces.Box(low=0, high=9, shape=(self.size_bound,), dtype=np.int64),
-        })
-        self.dfa = None
-    def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict[str, Any]]:
-        np.random.seed(seed)
-        obs, info = self.env.reset(seed=seed, options=options)
-        self.dfa = self.sampler.sample()
-        dfa_obs = self._get_dfa_obs()
-        obs = {"obs": obs, "dfa_obs": dfa_obs}
-        return obs, info
-    def step(self, action: int) -> tuple[np.ndarray, int, bool, bool, dict[str, Any]]:
-        obs, reward, done, truncated, info = self.env.step(action)
-        symbol = self.label_f(obs)
-        if symbol is not None:
-            self.dfa = self.dfa.advance([symbol]).minimize()
-        dfa_obs = self._get_dfa_obs()
-        obs = {"obs": obs, "dfa_obs": dfa_obs}
-        dfa_reward = 0
-        if self.dfa._label(self.dfa.start):
-            dfa_reward = 1
-        elif self.dfa.find_word() is None:
-            dfa_reward = -1
-        reward = self.r_agg_f(reward, dfa_reward)
-        done = done or dfa_reward != 0
-        return obs, reward, done, truncated, info
-    def _get_dfa_obs(self) -> np.ndarray:
-        dfa_arr = np.array([int(i) for i in str(self.dfa.to_int())])
-        dfa_obs = np.pad(dfa_arr, (self.size_bound - dfa_arr.shape[0], 0), constant_values=0)
-        return dfa_obs
+        env: MultiAgentEnv,
+        gamma: float | None = None,
+        sampler: DFASampler = RADSampler(),
+        binary_reward: bool = True,
+        progress: bool = True,
+    ) -> None:
+        super().__init__(num_agents=env.num_agents)
+        self.env = env
+        self.gamma = gamma
+        self.sampler = sampler
+        self.binary_reward = binary_reward
+        self.progress = progress
+        assert self.sampler.n_tokens == self.env.n_tokens
+        self.agents = [f"agent_{i}" for i in range(self.num_agents)]
+        self.action_spaces = {
+            agent: self.env.action_space(agent)
+            for agent in self.agents
+        }
+        max_dfa_size = self.sampler.max_size
+        n_tokens = self.sampler.n_tokens
+        self.observation_spaces = {
+            agent: spaces.Dict({
+                "_id": spaces.Discrete(self.num_agents),
+                "obs": self.env.observation_space(agent),
+                "dfa": spaces.Dict({
+                    "node_features": spaces.Box(low=0, high=1, shape=(max_dfa_size*self.num_agents, 4), dtype=jnp.float32),
+                    "edge_features": spaces.Box(low=0, high=1, shape=(max_dfa_size*self.num_agents*max_dfa_size*self.num_agents, n_tokens + 8), dtype=jnp.float32),
+                    "edge_index": spaces.Box(low=0, high=max_dfa_size*self.num_agents, shape=(2, max_dfa_size*self.num_agents*max_dfa_size*self.num_agents), dtype=jnp.int32),
+                    "current_state": spaces.Box(low=0, high=max_dfa_size*self.num_agents, shape=(self.num_agents,), dtype=jnp.int32),
+                    "n_states": spaces.Box(low=0, high=max_dfa_size*self.num_agents, shape=(max_dfa_size*self.num_agents,), dtype=jnp.int32)
+                }),
+            })
+            for agent in self.agents
+        }
+    @partial(jax.jit, static_argnums=(0,))
+    def reset(
+        self,
+        key: chex.PRNGKey
+    ) -> Tuple[Dict[str, chex.Array], DFAWrapperState]:
+        keys = jax.random.split(key, 4 + self.num_agents)
+        env_obs, env_state = self.env.reset(keys[1])
+        n_trivial = jax.random.choice(keys[2], self.num_agents)
+        mask = jax.random.permutation(keys[3], jnp.arange(self.num_agents) < n_trivial)
+        def sample_dfa(dfa_key, sample_trivial):
+            return jax.tree_util.tree_map(
+                lambda t, s: jnp.where(sample_trivial, t, s),
+                self.sampler.trivial(True),
+                self.sampler.sample(dfa_key)
+            )
+        dfas_tree = jax.vmap(sample_dfa)(keys[4:], mask)
+        dfas = {
+            agent: jax.tree_util.tree_map(lambda x: x[i], dfas_tree)
+            for i, agent in enumerate(self.agents)
+        }
+        state = DFAWrapperState(
+            dfas=dfas,
+            init_dfas={agent: dfas[agent] for agent in self.agents},
+            env_obs=env_obs,
+            env_state=env_state
+        )
+        obs = self.get_obs(state=state)
+        return obs, state
+    @partial(jax.jit, static_argnums=(0,))
+    def step_env(
+        self,
+        key: chex.PRNGKey,
+        state: DFAWrapperState,
+        action: int,
+    ) -> Tuple[Dict[str, chex.Array], DFAWrapperState, Dict[str, float], Dict[str, bool], Dict]:
+        env_obs, env_state, env_rewards, env_dones, env_info = self.env.step_env(key, state.env_state, action)
+        symbols = self.env.label_f(env_state)
+        dfas = {
+            agent: state.dfas[agent].advance(symbols[agent]).minimize()
+            for agent in self.agents
+        }
+        dones = {
+            agent: jnp.logical_or(env_dones[agent], dfas[agent].n_states <= 1)
+            for agent in self.agents
+        }
+        _dones = jnp.array([dones[agent] for agent in self.agents])
+        dones.update({"__all__": jnp.all(_dones)})
+        dfa_rewards_min = jnp.min(jnp.array([dfas[agent].reward(binary=self.binary_reward) for agent in self.agents]))
+        rewards = {
+            agent: jax.lax.cond(
+                dones["__all__"],
+                lambda _: env_rewards[agent] + dfa_rewards_min,
+                lambda _: env_rewards[agent],
+                operand=None
+            )
+            for agent in self.agents
+        }
+        if self.gamma is not None:
+            rewards = {
+                agent: rewards[agent] + self.gamma * dfas[agent].reward(binary=self.binary_reward) - state.dfas[agent].reward(binary=self.binary_reward)
+                for agent in self.agents
+            }
+        infos = {}
+        state = DFAWrapperState(
+            dfas=dfas,
+            init_dfas=state.init_dfas,
+            env_obs=env_obs,
+            env_state=env_state
+        )
+        obs = self.get_obs(state=state)
+        return obs, state, rewards, dones, infos
+    @partial(jax.jit, static_argnums=(0,))
+    def get_obs(
+        self,
+        state: DFAWrapperState
+    ) -> Dict[str, chex.Array]:
+        if self.progress:
+            dfas = batch2graph(
+                list2batch(
+                    [state.dfas[agent].to_graph() for agent in self.agents]
+                )
+            )
+        else:
+            dfas = batch2graph(
+                list2batch(
+                    [state.init_dfas[agent].to_graph() for agent in self.agents]
+                )
+            )
+        return {
+            agent: {
+                "_id": i,
+                "obs": state.env_obs[agent],
+                "dfa": dfas
+            }
+            for i, agent in enumerate(self.agents)
+        }
+    def render(self, state: DFAWrapperState):
+        out = ""
+        for agent in self.agents:
+            out += "****\n"
+            out += f"{agent}'s DFA:\n"
+            if self.progress:
+                out += f"{state.dfas[agent]}\n"
+            else:
+                out += f"{state.init_dfas[agent]}\n"
+        self.env.render(state.env_state)
+        print(out)

dfa_gym/env.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Abstract base class for multi agent gym environments with JAX
+Based on the JaxMARL APIs
+"""
+import jax
+import jax.numpy as jnp
+from typing import Dict
+import chex
+from functools import partial
+from flax import struct
+from typing import Tuple, Optional
+from dfa_gym.spaces import Space
+@struct.dataclass
+class State:
+    pass
+class MultiAgentEnv(object):
+    """Jittable abstract base class for all JaxMARL Environments."""
+    def __init__(
+        self,
+        num_agents: int,
+    ) -> None:
+        """
+        Args:
+            num_agents (int): maximum number of agents within the environment, used to set array dimensions
+        """
+        self.num_agents = num_agents
+        self.observation_spaces = dict()
+        self.action_spaces = dict()
+    @partial(jax.jit, static_argnums=(0,))
+    def reset(self, key: chex.PRNGKey) -> Tuple[Dict[str, chex.Array], State]:
+        """Performs resetting of the environment.
+        Args:
+            key (chex.PRNGKey): random key
+        Returns:
+            Observations (Dict[str, chex.Array]): observations for each agent, keyed by agent name
+            State (State): environment state
+        """
+        raise NotImplementedError
+    @partial(jax.jit, static_argnums=(0,))
+    def step(
+        self,
+        key: chex.PRNGKey,
+        state: State,
+        actions: Dict[str, chex.Array],
+        reset_state: Optional[State] = None,
+    ) -> Tuple[Dict[str, chex.Array], State, Dict[str, float], Dict[str, bool], Dict]:
+        """Performs step transitions in the environment. Resets the environment if done.
+        To control the reset state, pass `reset_state`. Otherwise, the environment will reset using `self.reset`.
+        Args:
+            key (chex.PRNGKey): random key
+            state (State): environment state
+            actions (Dict[str, chex.Array]): agent actions, keyed by agent name
+            reset_state (Optional[State], optional): Optional environment state to reset to on episode completion. Defaults to None.
+        Returns:
+            Observations (Dict[str, chex.Array]): next observations
+            State (State): next environment state
+            Rewards (Dict[str, float]): rewards, keyed by agent name
+            Dones (Dict[str, bool]): dones, keyed by agent name:
+            Info (Dict): info dictionary
+        """
+        key, key_reset = jax.random.split(key)
+        obs_st, states_st, rewards, dones, infos = self.step_env(key, state, actions)
+        if reset_state is None:
+            obs_re, states_re = self.reset(key_reset)
+        else:
+            states_re = reset_state
+            obs_re = self.get_obs(states_re)
+        # Auto-reset environment based on termination
+        states = jax.tree.map(
+            lambda x, y: jax.lax.select(dones["__all__"], x, y), states_re, states_st
+        )
+        obs = jax.tree.map(
+            lambda x, y: jax.lax.select(dones["__all__"], x, y), obs_re, obs_st
+        )
+        return obs, states, rewards, dones, infos
+    def step_env(
+        self, key: chex.PRNGKey, state: State, actions: Dict[str, chex.Array]
+    ) -> Tuple[Dict[str, chex.Array], State, Dict[str, float], Dict[str, bool], Dict]:
+        """Environment-specific step transition.
+        Args:
+            key (chex.PRNGKey): random key
+            state (State): environment state
+            actions (Dict[str, chex.Array]): agent actions, keyed by agent name
+        Returns:
+            Observations (Dict[str, chex.Array]): next observations
+            State (State): next environment state
+            Rewards (Dict[str, float]): rewards, keyed by agent name
+            Dones (Dict[str, bool]): dones, keyed by agent name:
+            Info (Dict): info dictionary
+        """
+        raise NotImplementedError
+    def get_obs(self, state: State) -> Dict[str, chex.Array]:
+        """Applies observation function to state.
+        Args:
+            State (state): Environment state
+        Returns:
+            Observations (Dict[str, chex.Array]): observations keyed by agent names"""
+        raise NotImplementedError
+    def observation_space(self, agent: str) -> Space:
+        """Observation space for a given agent.
+        Args:
+            agent (str): agent name
+        Returns:
+            space (Space): observation space
+        """
+        return self.observation_spaces[agent]
+    def action_space(self, agent: str) -> Space:
+        """Action space for a given agent.
+        Args:
+            agent (str): agent name
+        Returns:
+            space (Space): action space
+        """
+        return self.action_spaces[agent]
+    @partial(jax.jit, static_argnums=(0,))
+    def get_avail_actions(self, state: State) -> Dict[str, chex.Array]:
+        """Returns the available actions for each agent.
+        Args:
+            state (State): environment state
+        Returns:
+            available actions (Dict[str, chex.Array]): available actions keyed by agent name
+        """
+        raise NotImplementedError
+    @property
+    def name(self) -> str:
+        """Environment name."""
+        return type(self).__name__
+    @property
+    def agent_classes(self) -> dict:
+        """Returns a dictionary with agent classes
+        Format:
+            agent_names: [agent_base_name_1, agent_base_name_2, ...]
+        """
+        raise NotImplementedError

dfa_gym/maps/2buttons_2agents.pdf ADDED Viewed

Binary file

dfa_gym/maps/2rooms_2agents.pdf ADDED Viewed

Binary file

dfa_gym/maps/4buttons_4agents.pdf ADDED Viewed

Binary file

dfa_gym/maps/4rooms_4agents.pdf ADDED Viewed

Binary file

dfa_gym/robot.png ADDED Viewed

Binary file

dfa-gym 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

dfa-gym 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl