PyPI - plancraft - Versions diffs - 0.3.32__py3-none-any.whl → 0.3.34__py3-none-any.whl - Mend

plancraft 0.3.32py3-none-any.whl → 0.3.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

plancraft/environment/actions.py CHANGED Viewed

@@ -6,9 +6,9 @@ from pydantic import BaseModel, field_validator, model_validator
 # [A1], [A2], [A3], [B1], [B2], [B3], [C1], [C2], [C3], [I1]-[I36]
-SLOT_REGEX_PATTERN = r"\[([ABC][0-9]|I[1-9]|I[12][0-9]|I3[0-6])\]"
+SLOT_REGEX_PATTERN = r"\[([ABC][123]|I[1-9]|I[12][0-9]|I3[0-6])\]"
 # [0], [A1], [A2], [A3], [B1], [B2], [B3], [C1], [C2], [C3], [I1]-[I36]
-SLOT_REGEX_PATTERN_WITH_CRAFTING_SLOT = r"\[(0|[ABC][0-9]|I[1-9]|I[12][0-9]|I3[0-6])\]"
+SLOT_REGEX_PATTERN_WITH_CRAFTING_SLOT = r"\[(0|[ABC][123]|I[1-9]|I[12][0-9]|I3[0-6])\]"
 # 1-64
 QUANTITY_REGEX_PATTERN = r"([1-9]|[1-5][0-9]|6[0-4])"

plancraft/evaluator.py CHANGED Viewed

@@ -34,6 +34,13 @@ class Evaluator:
     It is also responsible for early stopping and verifying the target object has been craft.
     Finally, it also saves the results of the evaluation and the images generated during the evaluation.
+    This evaluator is designed to work with a PlancraftBaseModel and a set of ActionHandlerBase instances.
+    It supports multimodal content format and image-based inventory.
+    Importantly, it tracks the history of the dialogue and the environment state to provide a trace of the model's actions.
+    If you would want a simpler interface that just wraps the environment and actions to evaluate a single Plancraft example, you should use the EnvWrapper class.
     """
     def __init__(

plancraft/simple.py ADDED Viewed

@@ -0,0 +1,164 @@
+import json
+import os
+from typing import Optional
+from plancraft.config import PlancraftExample
+from plancraft.environment.actions import (
+    ActionHandlerBase,
+    MoveActionHandler,
+    SmeltActionHandler,
+    ImpossibleActionHandler,
+    StopAction,
+)
+from plancraft.environment.env import (
+    PlancraftEnvironment,
+    get_objective_str,
+    target_and_inventory_to_text_obs,
+)
+def get_plancraft_examples(split: str = "train") -> list[PlancraftExample]:
+    """
+    Load examples from the data directory
+    """
+    data_dir = os.path.join(os.path.dirname(__file__), "data")
+    with open(os.path.join(data_dir, f"{split}.json"), "r") as f:
+        examples = json.load(f)
+    return [PlancraftExample(**example) for example in examples]
+class EnvWrapper:
+    """
+    This wrapper class just wraps the environment and actions to evaluate a single example
+    This is useful if you want to bring your own agent/model to interact with the environment and not rely on the History class
+    and model class in the plancraft package.
+    """
+    def __init__(
+        self,
+        example: PlancraftExample,
+        actions: list[ActionHandlerBase] = [
+            MoveActionHandler(),
+            SmeltActionHandler(),
+            ImpossibleActionHandler(),
+        ],
+        max_steps: int = 30,
+        resolution: str = "high",
+        use_text_inventory: bool = True,
+    ):
+        self.actions = actions
+        self.max_steps = max_steps
+        # whether to convert the inventory to text observation
+        # if False, only the objective string is returned
+        self.use_text_inventory = use_text_inventory
+        self.current_step = 0
+        self.stopped = False
+        self.success = False
+        self.example = example
+        self.resolution = resolution
+        self.environment = PlancraftEnvironment(
+            example.slotted_inventory, resolution=self.resolution
+        )
+        if example.impossible:
+            assert "impossible" in [action.action_name for action in actions]
+    def check_done(self, inventory: dict, target: str):
+        """
+        Check that target object is obtained
+        """
+        for slot, item in inventory.items():
+            # ensure the target is in the inventory (not in slot 0)
+            if target == item["type"] and slot != 0:
+                return True
+        return False
+    def parse_raw_model_response(self, generated_text: str) -> str:
+        """
+        Given a message and set of action handlers, parse the content to return the action
+        or a message if the action is not valid/requires message response
+        """
+        for handler in self.actions:
+            match_output = handler.match(generated_text)
+            if match_output:
+                return match_output
+        action_names = [handler.action_name for handler in self.actions]
+        return f"Only select actions from the following: {', '.join(action_names)}"
+    def step(self, action: str) -> tuple[Optional[dict], float, bool]:
+        """
+        Execute action and return next observation, reward, and termination status
+        Returns:
+            observation: The environment observation after the action, observation is a dictionary with keys:
+                - text: The text observation (always present)
+                - inventory: The inventory after the action (if action was successful)
+                - target: The target object (if action was successful)
+                - image: The image observation (if action was successful)
+            reward: Reward for the current action (1.0 for success, 0.0 otherwise)
+            terminated: Whether the episode is done due to task completion, failure, or timeout
+        """
+        action = self.parse_raw_model_response(action)
+        self.current_step += 1
+        # Initialize return values
+        reward = 0.0
+        terminated = False
+        # Handle already stopped case
+        if self.stopped:
+            return {"text": "Plancraft environment is terminated"}, reward, True
+        # Handle max steps reached (terminate with no reward)
+        if self.current_step > self.max_steps:
+            self.success = False
+            return {"text": f"Max steps ({self.max_steps}) reached"}, reward, True
+        # Handle stop action
+        if isinstance(action, StopAction):
+            self.stopped = True
+            terminated = True
+            #  success is True if example was truly impossible
+            self.success = self.example.impossible
+            if self.success:
+                reward = 1.0
+            observation = {
+                "text": "Plancraft environment is terminate due to stop action"
+            }
+        # Handle invalid action
+        elif isinstance(action, str):
+            observation = self.environment.step()
+            observation["target"] = self.example.target
+            observation["text"] = action
+        # Handle regular action execution
+        # NOTE: if the action is valid but does not do anything
+        # the environment will return the same observation
+        else:
+            observation = self.environment.step(action)
+            observation["target"] = self.example.target
+            # Generate text observation
+            if self.use_text_inventory:
+                text = target_and_inventory_to_text_obs(
+                    target=self.example.target, inventory=observation["inventory"]
+                )
+            else:
+                text = get_objective_str(self.example.target)
+            observation["text"] = text
+            self.success = self.check_done(
+                observation["inventory"], self.example.target
+            )
+            # Set reward and termination for successful completion
+            if self.success:
+                reward = 1.0
+                terminated = True
+                self.stopped = True
+        return observation, reward, terminated

{plancraft-0.3.32.dist-info → plancraft-0.3.34.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.32
+Version: 0.3.34
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License

{plancraft-0.3.32.dist-info → plancraft-0.3.34.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,8 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
-plancraft/evaluator.py,sha256=mxzvbGpEDkiKW8u79QgYz5Q4wnZvkQSXiAvi0OVu4Qs,14754
+plancraft/evaluator.py,sha256=pthc7pxT4xKHzP4hULngrfR0rC9VvnTWPDfnF1YnwJw,15220
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
+plancraft/simple.py,sha256=OinkMTdq4DEFuWcbpOodTYf1G0EdmV10lxfdJxDuUc4,5923
 plancraft/utils.py,sha256=VhnxMihh6pRhNjQTK5HDc0FYWmF9_EcQyRP_a7fbIZA,7156
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
 plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
@@ -11,7 +12,7 @@ plancraft/data/val.json,sha256=IToAiaqUNQi_xhX1bzmInuskLaT7C2ryQjP-CZkzL24,13044
 plancraft/data/val.small.easy.json,sha256=9zEmqepjXG2NIp88xnFqOCkwsUsku3HEwHoQGxgTr6U,190252
 plancraft/data/val.small.json,sha256=76E9EFaljDQyAokg97e-IblvcOe6KbrdKkXvRxhhkgo,237653
 plancraft/environment/__init__.py,sha256=XFsFny4lH195AwAmL-WeCaF9ZCMgc7IgXIwhQ8FTdgE,505
-plancraft/environment/actions.py,sha256=fL_kzVUyOiud_74ST2YWhEgNFhL7y_ydwDPDmY1CTtU,11551
+plancraft/environment/actions.py,sha256=Pub21caxM5iZ9IaX-ny1-xxr_peJIwwV_QAx3BVSry0,11551
 plancraft/environment/env.py,sha256=A4532st7JFBYBF_Nh0CEEi3ZTLJAeaB3t9PAIVSemj0,16390
 plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
 plancraft/environment/planner.py,sha256=uIOJjIoyT_4pxeWeTKb8BkLJyKZG0-AMoEOkZs6Ua9A,19340
@@ -1920,7 +1921,7 @@ plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5w
 plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
 plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.32.dist-info/METADATA,sha256=vRc_HMJhCvX4LnEPLHIbgKaJCbQP4Gq0qb4xITGFkYQ,11148
-plancraft-0.3.32.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.32.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.32.dist-info/RECORD,,
+plancraft-0.3.34.dist-info/METADATA,sha256=xDFi9dYWn_op3CBUr0klhEWTl33LaRudM2GSYzAV9dc,11148
+plancraft-0.3.34.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.3.34.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.3.34.dist-info/RECORD,,

{plancraft-0.3.32.dist-info → plancraft-0.3.34.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.32.dist-info → plancraft-0.3.34.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.32__py3-none-any.whl → 0.3.34__py3-none-any.whl

plancraft 0.3.32py3-none-any.whl → 0.3.34py3-none-any.whl