PyPI - plancraft - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

plancraft 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

plancraft/evaluator.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import os
 from typing import Optional
+from copy import deepcopy
 import imageio
 from loguru import logger
@@ -38,7 +39,6 @@ class Evaluator:
     def __init__(
         self,
         run_name: str,
-        model: PlancraftBaseModel,
         actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
         output_dir: str = "output",
         split: str = "val.small",
@@ -61,6 +61,13 @@ class Evaluator:
         self.use_fasterrcnn = use_fasterrcnn
         self.max_steps = max_steps
         self.resume = resume
+        self.resolution = resolution
+        # history args
+        self.system_prompt = system_prompt
+        self.prompt_examples = prompt_examples
+        self.prompt_images = prompt_images
+        self.few_shot = few_shot
         self.output_dir = f"{output_dir}/{run_name}/{split}"
         self.generation_number = 0
@@ -69,28 +76,6 @@ class Evaluator:
         # load all examples
         self.examples: list[PlancraftExample] = self.load_dataset(split)
-        # start environment
-        self.environment = PlancraftEnvironment(
-            inventory=[],
-            resolution=resolution,
-        )
-        # initialise history/dialogue tracking
-        self.history = History(
-            actions=actions,
-            use_multimodal_content_format=use_multimodal_content_format,
-            use_images=use_images,
-            use_text_inventory=use_text_inventory,
-            resolution=resolution,
-            few_shot=few_shot,
-            system_prompt=system_prompt,
-            prompt_examples=prompt_examples,
-            prompt_images=prompt_images,
-        )
-        # load model
-        self.model = model
     def save_results_dict(self, example: PlancraftExample, results_dict: dict):
         output_dir = f"{self.output_dir}/{self.generation_number}"
         os.makedirs(output_dir, exist_ok=True)
@@ -124,14 +109,6 @@ class Evaluator:
             dataset = json.load(f)
             return [PlancraftExample(**example) for example in dataset]
-    def reset(
-        self,
-        example: PlancraftExample,
-    ):
-        self.environment.reset(new_inventory=example.slotted_inventory)
-        self.model.reset()
-        self.history.reset()
     def check_done(self, inventory: dict, target: str):
         """
         Check that target object is obtained
@@ -142,14 +119,16 @@ class Evaluator:
                 return True
         return False
-    def parse_raw_model_response(self, generated_text: str, observation=None) -> str:
+    def parse_raw_model_response(
+        self, generated_text: str, observation=None, history=None
+    ) -> str:
         """
         Given a message and set of action handlers, parse the content to return the action
         or a message if the action is not valid/requires message response
         """
         for handler in self.actions:
             match_output = handler.match(
-                generated_text, observation=observation, history=self.history
+                generated_text, observation=observation, history=history
             )
             if match_output:
                 return match_output
@@ -159,6 +138,7 @@ class Evaluator:
     def convert_observation_to_message(
         self,
         observation: dict,
+        model: PlancraftBaseModel = None,
     ) -> str | dict:
         """
         Convert an environment observation to the message format used by an LLM chat model
@@ -170,8 +150,9 @@ class Evaluator:
         - use_images: bool - Whether to append an image to the message content - must be used with use_multimodal_content_format.
         """
         if self.use_fasterrcnn:
+            assert model is not None, "Model must be provided to convert image to text"
             # convert image to inventory using fasterrcnn
-            inventory = self.model.bbox_model.get_inventory(observation["image"].copy())
+            inventory = model.bbox_model.get_inventory(observation["image"].copy())
             text_content = target_and_inventory_to_text_obs(
                 observation["target"], inventory
             )
@@ -190,15 +171,38 @@ class Evaluator:
             content_list.append({"type": "image"})
         return {"content": content_list}
-    def eval_example(self, example: PlancraftExample) -> dict:
+    def eval_example(
+        self,
+        example: PlancraftExample,
+        model: PlancraftBaseModel,
+    ) -> dict:
         """Given the loaded model and an example from Plancraft
         run the episode until success or termination."""
+        # start environment
+        environment = PlancraftEnvironment(
+            inventory=example.slotted_inventory,
+            resolution=self.resolution,
+        )
+        # initialise history/dialogue tracking
+        history = History(
+            actions=self.actions,
+            use_multimodal_content_format=self.use_multimodal_content_format,
+            use_images=self.use_images,
+            use_text_inventory=self.use_text_inventory,
+            resolution=self.resolution,
+            few_shot=self.few_shot,
+            system_prompt=deepcopy(self.system_prompt),
+            prompt_examples=deepcopy(self.prompt_examples),
+            prompt_images=deepcopy(self.prompt_images),
+        )
         success = False
-        self.reset(example)
         action = None
         # run episode until stuck or until max steps is reached
-        while self.history.num_steps < self.max_steps:
+        while history.num_steps < self.max_steps:
             # if the action is stop then we end the episode
             if isinstance(action, StopAction):
                 # if the action is stop and task is impossible then success
@@ -207,16 +211,16 @@ class Evaluator:
                 break
             # action is external tool then it is str
             if isinstance(action, str):
-                observation = self.environment.step()
+                observation = environment.step()
                 observation["target"] = example.target
                 observation["message"] = action
             # action is environment action
             else:
-                observation = self.environment.step(action)
+                observation = environment.step(action)
                 # convert inventory observation to text message
                 observation["target"] = example.target
                 observation["message"] = self.convert_observation_to_message(
-                    observation
+                    observation, model=model
                 )
                 # check if the episode is done
                 success = self.check_done(observation["inventory"], example.target)
@@ -225,29 +229,30 @@ class Evaluator:
                 break
             # add observation to history
-            self.history.add_observation_to_history(observation)
+            history.add_observation_to_history(observation)
             # add observation message to history
-            self.history.add_message_to_history(
-                content=observation["message"], role="user"
-            )
+            history.add_message_to_history(content=observation["message"], role="user")
             # predict next action
-            raw_action = self.model.step(observation, dialogue_history=self.history)
+            raw_action = model.step(observation, dialogue_history=history)
             # add message to history
-            self.history.add_message_to_history(content=raw_action, role="assistant")
+            history.add_message_to_history(content=raw_action, role="assistant")
             # parse the raw action
-            action = self.parse_raw_model_response(raw_action, observation=observation)
+            action = self.parse_raw_model_response(
+                raw_action, observation=observation, history=history
+            )
         # save results and reset
         return {
             "success": success,
             "recipe_type": example.recipe_type,
             "complexity": example.complexity_split,
-            "number_of_steps": self.history.num_steps,
-            "model_trace": self.history.trace(),
+            "number_of_steps": history.num_steps,
+            "model_trace": history.trace(),
             "example_id": example.id,
+            "images": history.images,
         }
-    def eval_all_examples(self, progress_bar=False) -> list:
+    def eval_all_examples(self, model, progress_bar=False) -> list:
         results = []
         pbar = tqdm(
             total=len(self.examples),
@@ -268,10 +273,14 @@ class Evaluator:
             ]:
                 continue
-            result = self.eval_example(example)
+            result = self.eval_example(example, model=model)
+            model.reset()
+            # save images and results
+            self.save_images(example, result["images"])
+            del result["images"]
             results.append(result)
             self.save_results_dict(example, result)
-            self.save_images(example, self.history.images)
             correct += int(result["success"])
             count += 1

plancraft/models/dummy.py CHANGED Viewed

@@ -18,14 +18,19 @@ class DummyModel(PlancraftBaseModel):
         pass
     def random_select(self, observation):
-        # randomly pick an item from the inventory
+        # randomly pick an item that has quantity 1 from the inventory
         item_indices = set()
         for slot, item in observation["inventory"].items():
-            if item["quantity"] > 0:
+            if item["quantity"] == 1:
                 item_indices.add(slot)
         all_slots_to = set(range(1, 46))
         empty_slots = all_slots_to - item_indices
+        # if not item with quantity == 1, randomly pick any item
+        if len(item_indices) == 0:
+            item_indices = set(observation["inventory"].keys())
+        # move the item to a random empty slot
         random_slot_from = random.choice(list(item_indices))
         random_slot_to = random.choice(list(empty_slots))

plancraft/utils.py CHANGED Viewed

@@ -44,8 +44,6 @@ class History:
         self.resolution = resolution  # low, medium, high
         self.inventory_history = []
-        self.inventory_counters = []
         self.tokens_used = 0
         # use system prompt if provided
@@ -105,14 +103,6 @@ class History:
     def add_inventory_to_history(self, inventory: dict):
         self.inventory_history.append(inventory)
-        # count inventory
-        counter = Counter()
-        for slot, item in inventory.items():
-            # ignore slot 0
-            if slot == 0:
-                continue
-            counter[item["type"]] += item["quantity"]
-        self.inventory_counters.append(counter)
     def add_image_to_history(self, image):
         self.images.append(image)
@@ -121,11 +111,6 @@ class History:
         if observation is None:
             return
         if "inventory" in observation:
-            # clean_inv = []
-            # remove empty slots
-            # for slot, item in observation["inventory"].items():
-            #     if item["quantity"] > 0:
-            #         clean_inv.append(item)
             self.add_inventory_to_history(observation["inventory"])
         if "image" in observation:
             self.add_image_to_history(observation["image"])
@@ -140,7 +125,6 @@ class History:
         self.initial_dialogue_length = len(self.dialogue_history)
         self.inventory_history = []
-        self.inventory_counters = []
         self.tokens_used = 0

{plancraft-0.3.10.dist-info → plancraft-0.3.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.10
+Version: 0.3.12
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License

{plancraft-0.3.10.dist-info → plancraft-0.3.12.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
-plancraft/evaluator.py,sha256=vu8RqAsvoDtUizLpiDA9w1fmCdCb6q91DUuE_4mUhUo,10745
+plancraft/evaluator.py,sha256=OZ9-xRiCfMPYIVMHZj8UMU53HGWinp18Ilj5BNySioI,11119
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
-plancraft/utils.py,sha256=5_llalestpE2BhpZeU3daLsHSkAFaUW43ChNioRGugo,6349
+plancraft/utils.py,sha256=0Uq-3VE-bTRstalzKknBJ-ExWf8ec_Jrg4QNEk8bJ-o,5778
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
 plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
 plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
 plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
 plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
 plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
-plancraft/models/dummy.py,sha256=jBxke6VNpyYh_HBcFxCx64djO5F3wr5GbbnC0XePZ20,1015
+plancraft/models/dummy.py,sha256=856oEX6NquXSIIfQLTEFFeB8ib7VUUs5cB0TVHAiFvI,1248
 plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
 plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
 plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.10.dist-info/METADATA,sha256=Mt-c0Y9JZUFlE43R9qVFFnRJ23LV6l0rGgH8FjErFbo,11148
-plancraft-0.3.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.10.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.10.dist-info/RECORD,,
+plancraft-0.3.12.dist-info/METADATA,sha256=RJyvF0PV84-_p-7ijstEcYrwjKLqedRce_LL5zF5ihs,11148
+plancraft-0.3.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.3.12.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.3.12.dist-info/RECORD,,

{plancraft-0.3.10.dist-info → plancraft-0.3.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.10.dist-info → plancraft-0.3.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

plancraft 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl