PyPI - plancraft - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

plancraft 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl

Files changed (8) hide show

plancraft/environment/prompts.py +13 -2
plancraft/evaluator.py +14 -10
plancraft/models/dummy.py +1 -2
plancraft/utils.py +22 -29
{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/METADATA +14 -15
{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/RECORD +8 -8
{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/WHEEL +0 -0
{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/licenses/LICENSE +0 -0

plancraft/environment/prompts.py CHANGED Viewed

@@ -59,7 +59,8 @@ SEARCH_STEPS = [
 def get_system_prompt(
     handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
-):
+    use_multimodal_content_format=False,
+) -> dict:
     action_names = [handler.action_name for handler in handlers]
     assert "move" in action_names, "MoveActionHandler should be one of the handlers"
     assert "smelt" in action_names, "SmeltActionHandler should be one of the handlers"
@@ -72,7 +73,17 @@ def get_system_prompt(
     for handler in handlers:
         output_format += f"\n\t- {handler.prompt_format_example}"
-    return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
+    system_prompt_text = f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
+    if use_multimodal_content_format:
+        return {
+            "role": "system",
+            "content": [{"text": system_prompt_text, "type": "text"}],
+        }
+    return {
+        "role": "system",
+        "content": system_prompt_text,
+    }
 def get_prompt_example(

plancraft/evaluator.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
+from typing import Optional
 import imageio
 from loguru import logger
@@ -8,18 +9,18 @@ from tqdm import tqdm
 import wandb
 from plancraft.config import PlancraftExample
 from plancraft.environment.actions import (
-    StopAction,
     ActionHandlerBase,
     MoveActionHandler,
     SmeltActionHandler,
+    StopAction,
 )
 from plancraft.environment.env import (
     PlancraftEnvironment,
     get_objective_str,
     target_and_inventory_to_text_obs,
 )
-from plancraft.utils import History
 from plancraft.models.base import PlancraftBaseModel
+from plancraft.utils import History
 class Evaluator:
@@ -48,6 +49,10 @@ class Evaluator:
         use_images: bool = False,
         use_text_inventory: bool = False,
         use_fasterrcnn: bool = False,
+        system_prompt: Optional[dict] = None,
+        prompt_examples: list[dict] = [],
+        prompt_images: list[str] = [],
+        few_shot: bool = True,
     ):
         self.run_name = run_name
         self.use_multimodal_content_format = use_multimodal_content_format
@@ -77,6 +82,10 @@ class Evaluator:
             use_images=use_images,
             use_text_inventory=use_text_inventory,
             resolution=resolution,
+            few_shot=few_shot,
+            system_prompt=system_prompt,
+            prompt_examples=prompt_examples,
+            prompt_images=prompt_images,
         )
         # load model
@@ -204,11 +213,9 @@ class Evaluator:
                 num_non_env_actions += 1
             # action is environment action
             else:
-                # add action to history
                 if isinstance(action, str):
                     observation = self.environment.step()
                 else:
-                    self.history.add_action_to_history(action)
                     observation = self.environment.step(action)
                 # convert inventory observation to text message
@@ -220,6 +227,9 @@ class Evaluator:
                 # check if the episode is done
                 success = self.check_done(observation["inventory"], example.target)
+            # exit if success
+            if success:
+                break
             # add observation to history
             self.history.add_observation_to_history(observation)
@@ -227,11 +237,6 @@ class Evaluator:
             self.history.add_message_to_history(
                 content=observation["message"], role="user"
             )
-            # exit if success
-            if success:
-                break
             # predict next action
             raw_action = self.model.step(observation, dialogue_history=self.history)
             # add message to history
@@ -247,7 +252,6 @@ class Evaluator:
             "number_of_steps": self.history.num_steps,
             "model_trace": self.history.trace(),
             "example_id": example.id,
-            "impossible": example.impossible,
         }
     def eval_all_examples(self, progress_bar=False) -> list:

plancraft/models/dummy.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import random
-from plancraft.config import EvalConfig
 from plancraft.environment.actions import (
     MoveAction,
 )
@@ -12,7 +11,7 @@ class DummyModel(PlancraftBaseModel):
     Dummy model returns actions that do random action
     """
-    def __init__(self, cfg: EvalConfig):
+    def __init__(self, cfg=None):
         pass
     def reset(self):

plancraft/utils.py CHANGED Viewed

@@ -2,18 +2,15 @@ import glob
 import pathlib
 from collections import Counter
 from copy import copy
+from typing import Optional
 import torch
 from loguru import logger
-from plancraft.environment.actions import (
-    ActionHandlerBase,
-    MoveAction,
-    SmeltAction,
-)
+from plancraft.environment.actions import ActionHandlerBase
 from plancraft.environment.prompts import (
-    get_system_prompt,
     get_prompt_example,
+    get_system_prompt,
     load_prompt_images,
 )
@@ -35,6 +32,9 @@ class History:
         use_images=False,
         use_text_inventory=False,
         resolution="high",
+        system_prompt: Optional[dict] = None,
+        prompt_examples: list[dict] = [],
+        prompt_images: list[str] = [],
     ):
         self.action_handlers = actions
         self.use_multimodal_content_format = use_multimodal_content_format
@@ -49,31 +49,30 @@ class History:
         self.tokens_used = 0
+        # use system prompt if provided
+        if system_prompt:
+            self.system_prompt_dialogue = system_prompt
+        else:
+            # generate system prompt
+            self.system_prompt_dialogue = get_system_prompt(
+                handlers=self.action_handlers,
+                use_multimodal_content_format=self.use_multimodal_content_format,
+            )
         # set up dialogue history with few-shot prompt
+        self.prompt_examples = prompt_examples
+        self.prompt_images = prompt_images
         self.set_up_few_shot_prompt()
-        self.system_prompt_dialogue = self.system_prompt()
         self.dialogue_history = copy(self.prompt_examples)
         self.images = copy(self.prompt_images)
         self.initial_dialogue_length = len(self.dialogue_history)
-    def system_prompt(self):
-        # kept separate from dialogue history because certain models deal with system prompt differently
-        system_prompt_text = get_system_prompt(handlers=self.action_handlers)
-        if self.use_multimodal_content_format:
-            return {
-                "role": "system",
-                "content": [{"text": system_prompt_text, "type": "text"}],
-            }
-        return {
-            "role": "system",
-            "content": system_prompt_text,
-        }
     def set_up_few_shot_prompt(self):
-        self.prompt_examples = []
-        self.prompt_images = []
+        # if either prompt_examples or prompt_images are provided, skip
+        if self.prompt_examples or self.prompt_images:
+            return
+        # if few-shot is not enabled, skip
         if self.few_shot:
             self.prompt_examples = get_prompt_example(
                 self.action_handlers,
@@ -105,10 +104,6 @@ class History:
             else:
                 self.dialogue_history.append({"role": role, "content": content})
-    def add_action_to_history(self, action: SmeltAction | MoveAction):
-        if isinstance(action, SmeltAction) or isinstance(action, MoveAction):
-            self.action_history.append(action.model_dump())
     def add_inventory_to_history(self, inventory: dict):
         self.inventory_history.append(inventory)
         # count inventory
@@ -145,7 +140,6 @@ class History:
         self.images = copy(self.prompt_images)
         self.initial_dialogue_length = len(self.dialogue_history)
-        self.action_history = []
         self.inventory_history = []
         self.inventory_counters = []
@@ -156,7 +150,6 @@ class History:
             "dialogue_history": copy(
                 self.dialogue_history[self.initial_dialogue_length :]
             ),
-            "action_history": copy(self.action_history),
             "inventory_history": copy(self.inventory_history),
             "tokens_used": copy(self.tokens_used),
         }

{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.3
+Version: 0.3.5
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License
@@ -72,6 +72,13 @@ You can install the package by running the following command:
 pip install plancraft
 ```
+Or:
+```bash
+uv add plancraft
+```
 ![gif-example3](docs/images/train_images/TRAIN0010.gif)
 ![gif-example1](docs/images/train_images/TRAIN1133.gif)
 ![gif-example2](docs/images/train_images/TRAIN0383.gif)
@@ -117,17 +124,14 @@ The package also provides an `Evaluator` class that can be used to evaluate the
 ```python
 from plancraft.evaluator import Evaluator
-from plancraft.config import EvalConfig
 def main():
-    # Create the config
-    config = EvalConfig(...)
     # create model -- Note you can create your own model by subclassing PlancraftBaseModel
-    model = get_model(config)
+    model = get_model("dummy")
     # Create the evaluator
-    evaluator = Evaluator(config, model=model)
+    evaluator = Evaluator(run_name="dummy", model=model)
     # Evaluate the agent
-    evaluator.eval_all_seeds()
+    evaluator.eval_all_examples()
 ```
 The evaluator class handles the environment loop and model interaction. The environment is created based on the configuration and the examples are loaded from the dataset. The `Evaluator` uses the dataset examples and initializes the environment with the example's inventory. It is also responsible for early stopping and verifying the target object has been craft. Finally, it also saves the results of the evaluation and the images generated during the evaluation.
@@ -159,7 +163,6 @@ while not history.check_stuck() and history.num_steps < max_steps:
             # Handle invalid case (exceeded non-env action limit)
             observation = environment.step()
         else:
-            history.add_action_to_history(action)  # Add action to history
             observation = environment.step(action)
         # Convert observation to message and reset non-env counter
@@ -170,19 +173,16 @@ while not history.check_stuck() and history.num_steps < max_steps:
         # Check if episode is complete
         success = check_done(observation["inventory"], example.target)
-    # Update history with observation and message
-    history.add_observation_to_history(observation)
-    history.add_message_to_history(content=observation["message"], role="user")
     if success:  # Exit loop if success
         break
+    # Update history with observation and message
+    history.add_observation_to_history(observation)
+    history.add_message_to_history(content=observation["message"], role="user")
     # Model predicts next action
     raw_action = model.step(observation, dialogue_history=history)
     # Update history with predicted action
     history.add_message_to_history(content=raw_action, role="assistant")
     # Parse raw action into a structured format
     action = parse_raw_model_response(raw_action)
@@ -194,7 +194,6 @@ return {
     "number_of_steps": history.num_steps,
     "model_trace": history.trace(),
     "example_id": example.id,
-    "impossible": example.impossible,
 }
 ```

{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 plancraft/config.py,sha256=Ppkps-E8xDNYEP9prOVxW2zEG9MpWVzcLJi4tmGLjuQ,4285
-plancraft/evaluator.py,sha256=7PjdITOTUCtjPywFOOd9vVhl5UDKZuFF7rc3mjUOn0Q,10717
+plancraft/evaluator.py,sha256=dTsE3FiQTJc094TmBvfBvefOpGSYcePIGVT36OEIClU,10910
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
-plancraft/utils.py,sha256=7VWKVlDhoMacRypRRSKM1K3hwwJ0nHR3zyx9jZH1C1g,7042
+plancraft/utils.py,sha256=8bO8wrblmIW1aXEJre7ALGbL6GvuFrY38aZDdA_8W-g,6882
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
 plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
 plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -15,7 +15,7 @@ plancraft/environment/actions.py,sha256=D9QqBW7yWsbWCjxNyWp61Xtb0c6EtyXk3PZ1I8SR
 plancraft/environment/env.py,sha256=F5xo1eAJ9MeuoE2IpG_LtbaE0BGd66URPB_rehAWIiU,16372
 plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
 plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
-plancraft/environment/prompts.py,sha256=OKxiv02NIhRk5FZJUEDRLkVWVMc-aXKJi7i7X61uUmk,6633
+plancraft/environment/prompts.py,sha256=8QXclX0ygpL02uZichE1AVkbdn_0HGteD5bzo0FZGOU,6947
 plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
 plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
 plancraft/environment/search.py,sha256=uFHpLvW40rMKOxDabcyWrpOrhKLDZqAJOF_jew4_WXk,1837
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
 plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
 plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
 plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
-plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
+plancraft/models/dummy.py,sha256=jBxke6VNpyYh_HBcFxCx64djO5F3wr5GbbnC0XePZ20,1015
 plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
 plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
 plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.3.dist-info/METADATA,sha256=UMIYGLhvaJO8CJyOj4hka_5OO2T728yqhzhX0rMQqfQ,11306
-plancraft-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.3.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.3.dist-info/RECORD,,
+plancraft-0.3.5.dist-info/METADATA,sha256=QxQSXPXF162We8KwESaZ-nn94gqfz_5PQaXNDWkvV1Y,11147
+plancraft-0.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.3.5.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.3.5.dist-info/RECORD,,

{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.3.dist-info → plancraft-0.3.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

plancraft 0.3.3py3-none-any.whl → 0.3.5py3-none-any.whl