PyPI - plancraft - Versions diffs - 0.3.22__py3-none-any.whl → 0.3.24__py3-none-any.whl - Mend

plancraft 0.3.22py3-none-any.whl → 0.3.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

plancraft/evaluator.py +68 -56
plancraft/models/base.py +12 -1
plancraft/models/dummy.py +6 -4
plancraft/utils.py +77 -21
{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/METADATA +1 -1
{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/RECORD +8 -8
{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/WHEEL +0 -0
{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/licenses/LICENSE +0 -0

plancraft/evaluator.py CHANGED Viewed

@@ -19,8 +19,8 @@ from plancraft.environment.env import (
     get_objective_str,
     target_and_inventory_to_text_obs,
 )
-from plancraft.models.base import PlancraftBaseModel
-from plancraft.utils import History
+from plancraft.models.base import PlancraftBaseModel, PlancraftModelOutput
+from plancraft.utils import HistoryBase, History, HistoryConfig
 class Evaluator:
@@ -41,40 +41,39 @@ class Evaluator:
         actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
         output_dir: str = "output",
         split: str = "val.small",
-        resolution: str = "high",
         max_steps: int = 30,
         resume: bool = False,
+        use_fasterrcnn: bool = False,
         use_multimodal_content_format: bool = False,
         use_images: bool = False,
         use_text_inventory: bool = False,
-        use_fasterrcnn: bool = False,
-        system_prompt: Optional[dict] = None,
-        prompt_examples: list[dict] = [],
-        prompt_images: list[str] = [],
-        few_shot: bool = True,
+        resolution: str = "high",
+        history_config: Optional[HistoryConfig] = None,
+        history_class: type[HistoryBase] = History,
     ):
         self.run_name = run_name
+        self.actions = actions
+        self.output_dir = f"{output_dir}/{run_name}/{split}"
+        self.max_steps = max_steps
+        self.resume = resume
+        self.use_fasterrcnn = use_fasterrcnn
+        self.generation_number = 0
         self.use_multimodal_content_format = use_multimodal_content_format
         self.use_images = use_images
         self.use_text_inventory = use_text_inventory
-        self.use_fasterrcnn = use_fasterrcnn
-        self.max_steps = max_steps
-        self.resume = resume
         self.resolution = resolution
-        # history args
-        self.system_prompt = system_prompt
-        self.prompt_examples = prompt_examples
-        self.prompt_images = prompt_images
-        self.few_shot = few_shot
+        # Set up history configuration
+        self.history_config = history_config or HistoryConfig()
+        self.history_class = history_class
-        self.output_dir = f"{output_dir}/{run_name}/{split}"
-        self.generation_number = 0
-        self.actions = actions
-        # load all examples
+        # load examples
         self.examples: list[PlancraftExample] = self.load_dataset(split)
+    def create_history(self) -> HistoryBase:
+        """Create a new History instance with current configuration"""
+        return self.history_class(actions=self.actions, config=self.history_config)
     def save_results_dict(self, example: PlancraftExample, results_dict: dict):
         output_dir = f"{self.output_dir}/{self.generation_number}"
         os.makedirs(output_dir, exist_ok=True)
@@ -187,17 +186,7 @@ class Evaluator:
         )
         # initialise history/dialogue tracking
-        history = History(
-            actions=self.actions,
-            use_multimodal_content_format=self.use_multimodal_content_format,
-            use_images=self.use_images,
-            use_text_inventory=self.use_text_inventory,
-            resolution=self.resolution,
-            few_shot=self.few_shot,
-            system_prompt=deepcopy(self.system_prompt),
-            prompt_examples=deepcopy(self.prompt_examples),
-            prompt_images=deepcopy(self.prompt_images),
-        )
+        history = self.create_history()
         success = False
         action = None
@@ -235,8 +224,24 @@ class Evaluator:
             history.add_message_to_history(content=observation["message"], role="user")
             # predict next action
             raw_action = model.step(observation, dialogue_history=history)
-            # add message to history
-            history.add_message_to_history(content=raw_action, role="assistant")
+            # if the model returns a PlancraftModelOutput, extract the action
+            if isinstance(raw_action, PlancraftModelOutput):
+                # add message to history
+                history.add_message_to_history(
+                    content=raw_action.action,
+                    role="assistant",
+                    **(raw_action.kwargs or {}),
+                )
+                raw_action = raw_action.action
+            elif isinstance(raw_action, str):
+                # add message to history
+                history.add_message_to_history(content=raw_action, role="assistant")
+            else:
+                raise ValueError(
+                    f"model.step() output must be a string or PlancraftModelOutput, got {type(raw_action)}"
+                )
             # parse the raw action
             action = self.parse_raw_model_response(
                 raw_action, observation=observation, history=history
@@ -267,20 +272,7 @@ class Evaluator:
             for i in range(len(examples))
         ]
-        histories = [
-            History(
-                actions=self.actions,
-                use_multimodal_content_format=self.use_multimodal_content_format,
-                use_images=self.use_images,
-                use_text_inventory=self.use_text_inventory,
-                resolution=self.resolution,
-                few_shot=self.few_shot,
-                system_prompt=deepcopy(self.system_prompt),
-                prompt_examples=deepcopy(self.prompt_examples),
-                prompt_images=deepcopy(self.prompt_images),
-            )
-            for _ in range(len(examples))
-        ]
+        histories = [self.create_history() for _ in range(len(examples))]
         # Track which environments are still active
         active_mask = [True for _ in range(len(examples))]
@@ -362,14 +354,34 @@ class Evaluator:
             for batch_idx, (idx, raw_action) in enumerate(
                 zip(active_indices, raw_actions)
             ):
-                histories[idx].add_message_to_history(
-                    content=raw_action, role="assistant"
-                )
-                actions[idx] = self.parse_raw_model_response(
-                    raw_action,
-                    observation=observations[batch_idx],
-                    history=histories[idx],
-                )
+                # if the model returns a PlancraftModelOutput, extract the action
+                if isinstance(raw_action, PlancraftModelOutput):
+                    # add message to history
+                    histories[idx].add_message_to_history(
+                        content=raw_action.action,
+                        role="assistant",
+                        **(raw_action.kwargs or {}),
+                    )
+                    actions[idx] = self.parse_raw_model_response(
+                        raw_action.action,
+                        observation=observations[batch_idx],
+                        history=histories[idx],
+                    )
+                # if the model returns a string, parse the raw action
+                elif isinstance(raw_action, str):
+                    # add message to history
+                    histories[idx].add_message_to_history(
+                        content=raw_action, role="assistant"
+                    )
+                    actions[idx] = self.parse_raw_model_response(
+                        raw_action,
+                        observation=observations[batch_idx],
+                        history=histories[idx],
+                    )
+                else:
+                    raise ValueError(
+                        f"model.step() output must be a string or PlancraftModelOutput, got {type(raw_action)}"
+                    )
         # Fill in results for environments that didn't finish
         for i, result in enumerate(results):

plancraft/models/base.py CHANGED Viewed

@@ -1,15 +1,26 @@
 import abc
+from dataclasses import dataclass
+from typing import Optional
 from plancraft.utils import History
+@dataclass
+class PlancraftModelOutput:
+    action: str
+    kwargs: Optional[dict] = None
 class PlancraftBaseModel(abc.ABC):
     """
     Model class must implement the following methods to work with evaluator
     """
     @abc.abstractmethod
-    def step(self, observation: dict, dialogue_history: History) -> str:
+    def step(
+        self, observation: dict, dialogue_history: History
+    ) -> PlancraftModelOutput | str:
         """
         Model should output an action in text based on the types available
         We also pass history to the model to allow for chat models to track the dialogue

plancraft/models/dummy.py CHANGED Viewed

@@ -3,7 +3,7 @@ import random
 from plancraft.environment.actions import (
     MoveAction,
 )
-from plancraft.models.base import PlancraftBaseModel
+from plancraft.models.base import PlancraftBaseModel, PlancraftModelOutput
 class DummyModel(PlancraftBaseModel):
@@ -38,8 +38,10 @@ class DummyModel(PlancraftBaseModel):
             slot_from=random_slot_from, slot_to=random_slot_to, quantity=1
         )
-    def step(self, observation: dict, **kwargs) -> str:
-        return str(self.random_select(observation))
+    def step(self, observation: dict, **kwargs) -> PlancraftModelOutput:
+        return PlancraftModelOutput(action=str(self.random_select(observation)))
-    def batch_step(self, observations: list[dict], **kwargs) -> list:
+    def batch_step(
+        self, observations: list[dict], **kwargs
+    ) -> list[PlancraftModelOutput]:
         return [self.step(observation) for observation in observations]

plancraft/utils.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import glob
 import pathlib
-from collections import Counter
 from copy import copy
 from typing import Optional
+import abc
+from dataclasses import dataclass, field
 import torch
 from loguru import logger
@@ -15,7 +16,56 @@ from plancraft.environment.prompts import (
 )
-class History:
+@dataclass
+class HistoryConfig:
+    """Configuration for History instances"""
+    few_shot: bool = True
+    system_prompt: Optional[dict] = None
+    prompt_examples: list[dict] = field(default_factory=list)
+    prompt_images: list[str] = field(default_factory=list)
+class HistoryBase(abc.ABC):
+    """Abstract base class defining the interface required by the Evaluator"""
+    @property
+    @abc.abstractmethod
+    def num_steps(self) -> int:
+        """Return the number of interaction steps taken"""
+        pass
+    @abc.abstractmethod
+    def add_message_to_history(
+        self, content: str | dict, role: str = "user", **kwargs
+    ) -> None:
+        """Add a message to the dialogue history"""
+        pass
+    @abc.abstractmethod
+    def add_observation_to_history(self, observation: dict, **kwargs) -> None:
+        """Add an observation (inventory, image) to history"""
+        pass
+    @abc.abstractmethod
+    def trace(self) -> dict:
+        """Return a traceable history of the interaction"""
+        pass
+    @property
+    @abc.abstractmethod
+    def images(self) -> list:
+        """Return list of images"""
+        pass
+    @images.setter
+    @abc.abstractmethod
+    def images(self, value: list) -> None:
+        """Set list of images"""
+        pass
+class History(HistoryBase):
     """
     History class to keep track of dialogue, actions, inventory and images
     Args:
@@ -27,42 +77,40 @@ class History:
     def __init__(
         self,
         actions: list[ActionHandlerBase] = [],
-        use_multimodal_content_format=False,
-        few_shot=False,
-        use_images=False,
-        use_text_inventory=False,
-        resolution="high",
-        system_prompt: Optional[dict] = None,
-        prompt_examples: list[dict] = [],
-        prompt_images: list[str] = [],
+        config: HistoryConfig = HistoryConfig(),
+        resolution: str = "high",
+        use_multimodal_content_format: bool = False,
+        use_images: bool = False,
+        use_text_inventory: bool = True,
     ):
         self.action_handlers = actions
         self.use_multimodal_content_format = use_multimodal_content_format
-        self.few_shot = few_shot
         self.use_images = use_images
         self.use_text_inventory = use_text_inventory
-        self.resolution = resolution  # low, medium, high
+        self.resolution = resolution
         self.inventory_history = []
         self.tokens_used = 0
         # use system prompt if provided
-        if system_prompt:
-            self.system_prompt_dialogue = system_prompt
+        if config.system_prompt:
+            self.system_prompt_dialogue = config.system_prompt
         else:
             # generate system prompt
             self.system_prompt_dialogue = get_system_prompt(
                 handlers=self.action_handlers,
                 use_multimodal_content_format=self.use_multimodal_content_format,
             )
+        self.few_shot = config.few_shot
         # set up dialogue history with few-shot prompt
-        self.prompt_examples = prompt_examples
-        self.prompt_images = prompt_images
+        self.prompt_examples = config.prompt_examples
+        self.prompt_images = config.prompt_images
         self.set_up_few_shot_prompt()
         self.dialogue_history = copy(self.prompt_examples)
-        self.images = copy(self.prompt_images)
+        self._images = copy(self.prompt_images)
         self.initial_dialogue_length = len(self.dialogue_history)
     def set_up_few_shot_prompt(self):
@@ -80,7 +128,7 @@ class History:
             if self.use_images:
                 self.prompt_images = load_prompt_images(resolution=self.resolution)
-    def add_message_to_history(self, content: str | dict, role="user"):
+    def add_message_to_history(self, content: str | dict, role="user", **kwargs):
         if isinstance(content, dict):
             assert "content" in content, "content key not found in message"
             content["role"] = role
@@ -102,9 +150,9 @@ class History:
         self.inventory_history.append(inventory)
     def add_image_to_history(self, image):
-        self.images.append(image)
+        self._images.append(image)
-    def add_observation_to_history(self, observation: dict):
+    def add_observation_to_history(self, observation: dict, **kwargs):
         if observation is None:
             return
         if "inventory" in observation:
@@ -118,7 +166,7 @@ class History:
     def reset(self):
         # reset dialogue history to few-shot prompt
         self.dialogue_history = copy(self.prompt_examples)
-        self.images = copy(self.prompt_images)
+        self._images = copy(self.prompt_images)
         self.initial_dialogue_length = len(self.dialogue_history)
         self.inventory_history = []
@@ -138,6 +186,14 @@ class History:
     def num_steps(self):
         return (len(self.dialogue_history) - self.initial_dialogue_length) // 2
+    @property
+    def images(self) -> list:
+        return self._images
+    @images.setter
+    def images(self, value: list) -> None:
+        self._images = value
 def get_downloaded_models() -> dict:
     """

{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.22
+Version: 0.3.24
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License

{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
-plancraft/evaluator.py,sha256=R_RZN9AL_ae0rIvj7HLhYolTpCVMuhPTJfIrmyoLaX4,16326
+plancraft/evaluator.py,sha256=dyszVJtTc_PThVEeGmp6YMkmEn4gaXQW52eWaKO2FQ8,17210
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
-plancraft/utils.py,sha256=67UUDMSv8TqX_I0fL5-yG_vkHvTZlnhSLkktWAg5p34,5712
+plancraft/utils.py,sha256=VhnxMihh6pRhNjQTK5HDc0FYWmF9_EcQyRP_a7fbIZA,7156
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
 plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
 plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -1913,14 +1913,14 @@ plancraft/environment/tags/wooden_trapdoors.json,sha256=DbjfwoHJL8VuYWV61A1uDqW7
 plancraft/environment/tags/wool.json,sha256=Z59l4mdPztVZBFaglJ4mV9H2OnyCVzhqQRi2dduak78,496
 plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,510
 plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
-plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
+plancraft/models/base.py,sha256=S8EdkqWpn8nE1WcrqDoA4Hx4p52qEttGxnqjIPWvl3Q,852
 plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
-plancraft/models/dummy.py,sha256=3Nsnw12s_n5mWMuxUTaPCuJIzPp0vLHWKE827iKY5o0,1391
+plancraft/models/dummy.py,sha256=_NUTviv5ye6KGzODRt0Zykk8shsek0QBqWCeZW3ldSQ,1495
 plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
 plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
 plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.22.dist-info/METADATA,sha256=jTX0TZZxJRldUDDFuJ6AhuN1Bf5Jc2DuDooPVwCBkAQ,11148
-plancraft-0.3.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.22.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.22.dist-info/RECORD,,
+plancraft-0.3.24.dist-info/METADATA,sha256=yCmPq3zXC2cEu5E9c1MOT1nsrfGcI_clOMYIV4GQoR4,11148
+plancraft-0.3.24.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.3.24.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.3.24.dist-info/RECORD,,

{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.22.dist-info → plancraft-0.3.24.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.22__py3-none-any.whl → 0.3.24__py3-none-any.whl

plancraft 0.3.22py3-none-any.whl → 0.3.24py3-none-any.whl