PyPI - dfa-gym - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

dfa-gym 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

dfa_gym/__init__.py +5 -15
dfa_gym/dfa_bisim_env.py +121 -0
dfa_gym/dfa_wrapper.py +185 -52
dfa_gym/env.py +168 -0
dfa_gym/maps/2buttons_2agents.pdf +0 -0
dfa_gym/maps/2rooms_2agents.pdf +0 -0
dfa_gym/maps/4buttons_4agents.pdf +0 -0
dfa_gym/maps/4rooms_4agents.pdf +0 -0
dfa_gym/robot.png +0 -0
dfa_gym/spaces.py +156 -0
dfa_gym/token_env.py +571 -0
dfa_gym/utils.py +266 -0
dfa_gym-0.2.0.dist-info/METADATA +93 -0
dfa_gym-0.2.0.dist-info/RECORD +16 -0
{dfa_gym-0.1.0.dist-info → dfa_gym-0.2.0.dist-info}/WHEEL +1 -1
dfa_gym/dfa_env.py +0 -45
dfa_gym-0.1.0.dist-info/METADATA +0 -11
dfa_gym-0.1.0.dist-info/RECORD +0 -7
{dfa_gym-0.1.0.dist-info → dfa_gym-0.2.0.dist-info}/licenses/LICENSE +0 -0

dfa_gym/utils.py ADDED Viewed

@@ -0,0 +1,266 @@
+import re
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import matplotlib.patches as patches
+from matplotlib.offsetbox import OffsetImage, AnnotationBbox
+from matplotlib.animation import FuncAnimation, PillowWriter
+def parse_map(map_lines):
+    """Parses the ASCII map into a 2D grid of cells."""
+    grid = []
+    for line in map_lines:
+        cells = re.findall(r"\[(.*?)\]", line)
+        if cells:
+            grid.append([c.strip() for c in cells])
+    return grid
+def visualize(layout, figsize, cell_size=1, save_path=None, trace=None):
+    map_lines = layout.splitlines()
+    grid = parse_map(map_lines)
+    n_rows, n_cols = len(grid), len(grid[0])
+    fig, ax = plt.subplots(figsize=figsize)
+    ax.set_xlim(0, n_cols)
+    ax.set_ylim(0, n_rows)
+    ax.set_aspect("equal")
+    ax.axis("off")
+    for r in range(n_rows):
+        for c in range(n_cols):
+            content = grid[r][c]
+            x, y = c, n_rows - r - 1
+            # background
+            ax.add_patch(patches.Rectangle(
+                (x, y), cell_size, cell_size,
+                facecolor="lightgray", edgecolor="white", lw=1
+            ))
+    agent_positions = {}
+    wall_positions = {}
+    for r in range(n_rows):
+        for c in range(n_cols):
+            content = grid[r][c]
+            x, y = c, n_rows - r - 1
+            if not content:
+                continue
+            if content == "#":  # wall
+                wall_positions[(x,y)] = "dimgray"
+                # ax.add_patch(patches.Rectangle(
+                #     (x, y), cell_size, cell_size,
+                #     facecolor="dimgray", edgecolor="black", lw=1.5
+                # ))
+            elif content.isupper():  # agents
+                agent_positions[content] = (x + 0.5, y + 0.5)
+                # ax.text(x + 0.5, y + 0.5, "8",
+                #         ha="center", va="center",
+                #         fontsize=14, weight="bold")
+            elif content.isdigit():  # tokens
+                ax.add_patch(patches.Circle(
+                    (x + 0.5, y + 0.5), 0.4,
+                    facecolor="gold", edgecolor="orange", lw=1.5
+                ))
+                ax.text(x + 0.5, y + 0.5, content,
+                        ha="center", va="center",
+                        fontsize=24, color="black", weight="bold")
+            elif content.islower():  # sync button
+                if "a" in content:
+                    color = "red"
+                elif "b" in content:
+                    color = "green"
+                elif "c" in content:
+                    color = "blue"
+                elif "d" in content:
+                    color = "pink"
+                else:
+                    raise ValueError
+                # color = "crimson"
+                if "#" in content:
+                    wall_positions[(x,y)] = color
+                    # ax.add_patch(patches.Rectangle(
+                    #     (x, y), cell_size, cell_size,
+                    #     facecolor=color, edgecolor="black", lw=1.5,
+                    #     hatch="||", hatch_linewidth=3, fill=True
+                    # ))
+                else:
+                    ax.add_patch(patches.Rectangle(
+                        (x, y), cell_size, cell_size,
+                        facecolor=color, edgecolor="black", lw=1.5
+                    ))
+            elif "," in content:  # door like "#,a"
+                parts = [p.strip() for p in content.split(",")]
+                ax.add_patch(patches.Rectangle(
+                    (x, y), cell_size, cell_size,
+                    facecolor="firebrick", edgecolor="black", lw=1.5
+                ))
+                # for p in parts:
+                #     if p.islower():
+                #         ax.text(x + 0.5, y + 0.5, p,
+                #                 ha="center", va="center",
+                #                 fontsize=9, color="white")
+    # if trace is not None:
+    #     #TODO: Trace is a list of agent positions, where for n agents with trace length L, trace contains L many agent position entries each is a n by 2 vector giving agent positions.
+    #     # Draw this trace on the map!
+    if trace is not None:
+        n_agents = len(agent_positions.keys())
+        L = len(trace)
+        # Load robot image once
+        robot_img = mpimg.imread('robot.png')
+        zoom = 0.05  # adjust as needed
+        # Optional: add labels to track agents
+        agent_labels = [str(i + 1) for i in range(n_agents)]
+        # Store artists for cleanup each frame
+        current_boxes = []
+        current_texts = []
+        current_walls = []
+        current_timestep = []   # <-- NEW
+        def update(frame):
+            # Remove previous robot images and texts
+            for ab in current_boxes:
+                ab.remove()
+            current_boxes.clear()
+            for txt in current_texts:
+                txt.remove()
+            current_texts.clear()
+            for wall in current_walls:
+                wall.remove()
+            current_walls.clear()
+            for ts in current_timestep:   # <-- remove timestep text
+                ts.remove()
+            current_timestep.clear()
+            # Add robot images and labels for this frame
+            for agent_idx in range(n_agents):
+                pos = trace[frame].env_state.agent_positions[agent_idx]
+                x = pos[1] + 0.5
+                y = n_rows - pos[0] - 0.5
+                # robot image
+                image_box = OffsetImage(robot_img, zoom=zoom)
+                ab = AnnotationBbox(image_box, (x, y), frameon=False)
+                ax.add_artist(ab)
+                current_boxes.append(ab)
+                # label text
+                txt = ax.text(x+0.3, y + 0.3, agent_labels[agent_idx],
+                              ha='center', va='bottom', color='black', weight='bold', fontsize=10)
+                current_texts.append(txt)
+            # Draw walls dynamically
+            for i, (x, y) in enumerate(wall_positions):
+                color = wall_positions[(x, y)]
+                if trace[frame].env_state.is_wall_disabled[i] or color == "dimgray":
+                    rect = ax.add_patch(patches.Rectangle(
+                        (x, y), cell_size, cell_size,
+                        facecolor=color, edgecolor="black", lw=1.5
+                    ))
+                else:
+                    rect = ax.add_patch(patches.Rectangle(
+                        (x, y), cell_size, cell_size,
+                        facecolor=color, edgecolor="black", lw=1.5,
+                        hatch="||", hatch_linewidth=3, fill=True
+                    ))
+                current_walls.append(rect)
+            # Add timestep text above the grid
+            ts = ax.text(n_cols / 2, n_rows + 0.5, f"Time step: {frame}",
+                         ha='center', va='bottom', color='black', weight='bold', fontsize=14)
+            current_timestep.append(ts)
+            return current_boxes + current_texts + current_walls + current_timestep
+        anim = FuncAnimation(fig, update, frames=L, interval=500, blit=False)
+        if save_path:
+            gif_path = save_path.replace(".pdf", ".gif")
+            anim.save(gif_path, writer=PillowWriter(fps=2))
+    else:
+        for agent in agent_positions:
+            x, y = agent_positions[agent]
+            image = plt.imread('robot.png')
+            image_box = OffsetImage(image, zoom=0.05)
+            ab = AnnotationBbox(image_box, (x, y), frameon=False)
+            ax.add_artist(ab)
+        for (x, y) in wall_positions:
+            color = wall_positions[(x, y)]
+            ax.add_patch(patches.Rectangle(
+                (x, y), cell_size, cell_size,
+                facecolor=color, edgecolor="black", lw=1.5,
+                hatch="||", hatch_linewidth=3, fill=True
+            ))
+        if save_path:
+            plt.savefig(save_path, bbox_inches="tight", dpi=300)
+        else:
+            plt.show()
+        plt.close()
+if __name__ == "__main__":
+    layout = """
+    [ # ][ # ][ # ][ # ][ # ][   ][   ][   ][ 0 ][   ][   ][   ][ # ][ # ][ # ][ # ][ # ]
+    [ # ][ 0 ][   ][ 1 ][#,c][   ][ c ][   ][ A ][   ][ a ][   ][#,a][ 0 ][   ][ 2 ][ # ]
+    [ # ][   ][ 4 ][   ][#,c][   ][ c ][   ][   ][   ][ a ][   ][#,a][   ][ 8 ][   ][ # ]
+    [ # ][ 3 ][   ][ 2 ][#,c][   ][ c ][   ][ B ][   ][ a ][   ][#,a][ 6 ][   ][ 4 ][ # ]
+    [ # ][ # ][ # ][ # ][ # ][ 2 ][   ][   ][   ][   ][   ][ 3 ][ # ][ # ][ # ][ # ][ # ]
+    [ # ][ 5 ][   ][ 6 ][#,d][   ][ d ][   ][ C ][   ][ b ][   ][#,b][ 1 ][   ][ 3 ][ # ]
+    [ # ][   ][ 9 ][   ][#,d][   ][ d ][   ][   ][   ][ b ][   ][#,b][   ][ 9 ][   ][ # ]
+    [ # ][ 8 ][   ][ 7 ][#,d][   ][ d ][   ][ D ][   ][ b ][   ][#,b][ 7 ][   ][ 5 ][ # ]
+    [ # ][ # ][ # ][ # ][ # ][   ][   ][   ][ 1 ][   ][   ][   ][ # ][ # ][ # ][ # ][ # ]
+    """
+    # layout = """
+    # [ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ]
+    # [ # ][ 0 ][   ][ 2 ][ # ][ 0 ][   ][ 1 ][ # ][ 5 ][   ][ 6 ][ # ][ 1 ][   ][ 3 ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][   ][   ][ # ][   ][   ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][ B ][   ][ # ][   ][ a ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][ a ][ 8 ][ A ][#,a][   ][ 4 ][   ][#,a][   ][ 9 ][   ][#,a][ D ][ 9 ][ a ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][ a ][   ][ # ][   ][ C ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][   ][   ][ # ][   ][   ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][ 6 ][   ][ 4 ][ # ][ 3 ][   ][ 2 ][ # ][ 8 ][   ][ 7 ][ # ][ 7 ][   ][ 5 ][ # ]
+    # [ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ]
+    # """
+    # layout = """
+    # [ 0 ][   ][   ][   ][ # ][ # ][ # ][ # ][ # ]
+    # [   ][   ][ a ][   ][#,a][ 0 ][   ][ 2 ][ # ]
+    # [ A ][   ][ a ][   ][#,a][   ][ 8 ][   ][ # ]
+    # [   ][   ][ a ][   ][#,a][ 6 ][   ][ 4 ][ # ]
+    # [ 1 ][   ][   ][ 3 ][ # ][ # ][ # ][ # ][ # ]
+    # [   ][   ][ b ][   ][#,b][ 1 ][   ][ 3 ][ # ]
+    # [ B ][   ][ b ][   ][#,b][   ][ 9 ][   ][ # ]
+    # [   ][   ][ b ][   ][#,b][ 7 ][   ][ 5 ][ # ]
+    # [ 2 ][   ][   ][   ][ # ][ # ][ # ][ # ][ # ]
+    # """
+    # layout = """
+    # [ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ]
+    # [ # ][ 0 ][   ][ 2 ][ # ][ 1 ][   ][ 3 ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][ a ][ 8 ][ A ][#,a][ B ][ 9 ][ a ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][   ][   ][   ][ # ][   ][   ][   ][ # ]
+    # [ # ][ 6 ][   ][ 4 ][ # ][ 7 ][   ][ 5 ][ # ]
+    # [ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ][ # ]
+    # """
+    # visualize(layout, figsize=(17,9), save_path="maps/4buttons_4agents.pdf")
+    visualize(layout, figsize=(17,9))

dfa_gym-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,93 @@
+Metadata-Version: 2.4
+Name: dfa-gym
+Version: 0.2.0
+Summary: Python library for playing DFA bisimulation games and wrapping other RL environments with DFA goals.
+Author-email: Beyazit Yalcinkaya <beyazit@berkeley.edu>
+License-File: LICENSE
+Requires-Python: >=3.10
+Requires-Dist: dfax>=0.1.1
+Description-Content-Type: text/markdown
+# dfa-gym
+This repo implements (Multi-Agent) Reinforcement Learning environments in JAX for solving objectives given as Deteministic Finite Automata (DFAs). There are three environments:
+1. `TokenEnv` is a fully observable grid environment with tokens in cells. The grid can be created randomly or from a specific layout. It can be instantiated in both single- and multi-agent settings.
+2. `DFAWrapper` is an environment wrapper assigning tasks represented as Deterministic Finite Automata (DFAs) to the agents in the wrapped environment. DFAs are repsented as [`DFAx`](https://github.com/rad-dfa/dfax) objects.
+3. `DFABisimEnv` is an environment for solving DFA bisimulation games to learn RAD Embeddings, provably correct latent DFA representation, as described in [this paper](https://arxiv.org/pdf/2503.05042).
+## Installation
+This package will soon be made pip-installable. In the meantime, pull the repo and and install locally.
+```
+git clone https://github.com/rad-dfa/dfa-gym.git
+pip install -e dfa-gym
+```
+## TokenEnv
+Create a grid world with token and agent positions assigned randomly.
+```python
+from dfa_gym import TokenEnv
+env = TokenEnv(
+        n_agents=1, # Single agent
+        n_tokens=10, # 10 different token types
+        n_token_repeat=2, # Each token repeated twice
+        grid_shape=(7, 7), # Shape of the grid
+        fixed_map_seed=None, # If not None, then samples the same map using the given seed
+        max_steps_in_episode=100, # Episode length is 100
+    )
+```
+Create a grid world from a given layout.
+```python
+layout = """
+    [ 0 ][   ][   ][   ][ # ][ # ][ # ][ # ][ # ]
+    [   ][   ][ a ][   ][#,a][ 0 ][   ][ 2 ][ # ]
+    [ A ][   ][ a ][   ][#,a][   ][ 8 ][   ][ # ]
+    [   ][   ][ a ][   ][#,a][ 6 ][   ][ 4 ][ # ]
+    [ 1 ][   ][   ][ 3 ][ # ][ # ][ # ][ # ][ # ]
+    [   ][   ][ b ][   ][#,b][ 1 ][   ][ 3 ][ # ]
+    [ B ][   ][ b ][   ][#,b][   ][ 9 ][   ][ # ]
+    [   ][   ][ b ][   ][#,b][ 7 ][   ][ 5 ][ # ]
+    [ 2 ][   ][   ][   ][ # ][ # ][ # ][ # ][ # ]
+    """
+    env = TokenEnv(
+        layout=layout, # Set layout, where each [] indicates a cell, uppercase letters are
+                       # agents, # are walls, and lower case letters are buttons when alone
+                       # and doors when paired with a wall. For example, [#,a] is a door
+                       # that is open if an agent is on a [ a ] cell and closed otherwise.
+    )
+```
+## DFAWrapper
+Wrap a `TokenEnv` instance using `DFAWrapper `.
+```python
+from dfa_gym import DFAWrapper
+from dfax.samplers import ReachSampler
+env = DFAWrapper(
+    env=TokenEnv(layout=layout),
+    sampler=ReachSampler()
+)
+```
+## DFABisimEnv
+Create DFA bisimulation game.
+```python
+from dfa_gym import DFABisimEnv
+from dfax.samplers import RADSampler
+env = DFABisimEnv(sampler=RADSampler())
+```

dfa_gym-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+dfa_gym/__init__.py,sha256=8rauoRND6VqAFw1axw_xcFBOxIzHi9MrrN1d57y_bL4,185
+dfa_gym/dfa_bisim_env.py,sha256=QKh4ebg2HEENSODuY1x77IKySx8MIC9aM9iwA5cAF8o,4394
+dfa_gym/dfa_wrapper.py,sha256=o500-Zl8FELYH4dqL111iuul4THeZWuXrD_RlHVzuBY,6353
+dfa_gym/env.py,sha256=belmfaFHB_dYjnyNCe_zjniSOhldSK_1zXy2W9_FfBU,5415
+dfa_gym/robot.png,sha256=GdWmACflIoWlRBdwJq_rNdInksWqMuEcuLd9KAI8uQE,18616
+dfa_gym/spaces.py,sha256=jIBLrCSEwsnnSQRZ0xVebX-KjdpX_X_5CXQjcX5V7mo,4696
+dfa_gym/token_env.py,sha256=LWMqAh9K8XY33Uuv8epMwo8whz-RJhOHlyANpSQvzoE,24227
+dfa_gym/utils.py,sha256=DE32KxJ7LixEF47f60TX-6q6-uOK6uw7pnnwbhaZFEM,10772
+dfa_gym/maps/2buttons_2agents.pdf,sha256=eH5iwwCWXbyWWJteG4GcnVuA4_8C9jWZTiB6DmKwzHU,32646
+dfa_gym/maps/2rooms_2agents.pdf,sha256=LfUrnDuTyBmhNUp5wljbcxlU85fldRWy0sFVJ7f_aCY,31668
+dfa_gym/maps/4buttons_4agents.pdf,sha256=xABZVV-8Np0gY_6FiAONf-UXnT07_5u34_d6bAOMdo0,57783
+dfa_gym/maps/4rooms_4agents.pdf,sha256=1S_9APYr18sEtIgJtY0SHU44M-z6EgGIA6JTYSrk_lw,56058
+dfa_gym-0.2.0.dist-info/METADATA,sha256=t2G1j_wlOFJuBNypEBFKSO8fr5J9o3bjkVJIYaTDbsE,3157
+dfa_gym-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+dfa_gym-0.2.0.dist-info/licenses/LICENSE,sha256=Cvu0BZqt3rcFFv70hcFDgD_y8ryOKW85F-qGRfYI4iM,1071
+dfa_gym-0.2.0.dist-info/RECORD,,

{dfa_gym-0.1.0.dist-info → dfa_gym-0.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.27.0
+Generator: hatchling 1.28.0
 Root-Is-Purelib: true
 Tag: py3-none-any

dfa_gym/dfa_env.py DELETED Viewed

@@ -1,45 +0,0 @@
-import numpy as np
-import gymnasium as gym
-from gymnasium import spaces
-from dfa_samplers import DFASampler, RADSampler
-from typing import Any
-__all__ = ["DFAEnv"]
-class DFAEnv(gym.Env):
-    def __init__(
-        self,
-        sampler: DFASampler | None = None,
-        timeout: int = 100
-    ):
-        super().__init__()
-        self.sampler = sampler if sampler is not None else RADSampler()
-        self.size_bound = self.sampler.get_size_bound()
-        self.action_space = spaces.Discrete(self.sampler.n_tokens)
-        self.observation_space = spaces.Box(low=0, high=9, shape=(self.size_bound,), dtype=np.int64)
-        self.dfa = None
-        self.timeout = timeout
-        self.t = None
-    def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict[str, Any]]:
-        np.random.seed(seed)
-        self.dfa = self.sampler.sample()
-        self.t = 0
-        return self._get_dfa_obs(), {}
-    def step(self, action: int) -> tuple[np.ndarray, int, bool, bool, dict[str, Any]]:
-        self.dfa = self.dfa.advance([action]).minimize()
-        reward = 0
-        if self.dfa._label(self.dfa.start):
-            reward = 1
-        elif self.dfa.find_word() is None:
-            reward = -1
-        self.t += 1
-        done = reward != 0 or self.t > self.timeout
-        return self._get_dfa_obs(), reward, done, False, {}
-    def _get_dfa_obs(self) -> np.ndarray:
-        dfa_obs = np.array([int(i) for i in str(self.dfa.to_int())])
-        obs = np.pad(dfa_obs, (self.size_bound - dfa_obs.shape[0], 0), constant_values=0)
-        return obs

dfa_gym-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,11 +0,0 @@
-Metadata-Version: 2.4
-Name: dfa-gym
-Version: 0.1.0
-Summary: Gymnasium environment for solving DFAs and wrapping other environments with DFA goals
-License-File: LICENSE
-Requires-Python: >=3.12
-Requires-Dist: dfa-samplers>=0.1.0
-Requires-Dist: gymnasium>=1.0.0
-Description-Content-Type: text/markdown
-# dfa-gym

dfa_gym-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-dfa_gym/__init__.py,sha256=tLY48NluNVv66znFnlR7j9o-pRW5caaO766W724__HY,364
-dfa_gym/dfa_env.py,sha256=u-mOCPhXRljp2t-VmvDfsHbKIdHdn59UGomHxUa_BxQ,1601
-dfa_gym/dfa_wrapper.py,sha256=11eqfyl6g2v-wILGMWLg9L2sMJYnwl5rMn11p9YbQF0,2283
-dfa_gym-0.1.0.dist-info/METADATA,sha256=W_S2r-zMFEX9oeVJa3kCVCStnhQ91rNj-tA_FO6oBm8,309
-dfa_gym-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-dfa_gym-0.1.0.dist-info/licenses/LICENSE,sha256=Cvu0BZqt3rcFFv70hcFDgD_y8ryOKW85F-qGRfYI4iM,1071
-dfa_gym-0.1.0.dist-info/RECORD,,

{dfa_gym-0.1.0.dist-info → dfa_gym-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

dfa-gym 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

dfa-gym 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl