PyPI - plancraft - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl - Mend

plancraft 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

plancraft/environment/sampler.py CHANGED Viewed

@@ -39,7 +39,7 @@ def assign_to_slots(inventory: dict[str, int]) -> list[dict]:
     for item, total_count in inventory.items():
         while total_count > 0:
             if len(available_slots) == 0:
-                logger.info("Not enough slots available")
+                logger.warning("Not enough slots available")
                 break
             slot = available_slots.pop()
             count_in_slot = min(total_count, MAX_STACK_SIZE[item])

plancraft/evaluator.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import os
 from typing import Optional
+from copy import deepcopy
 import imageio
 from loguru import logger
@@ -38,7 +39,6 @@ class Evaluator:
     def __init__(
         self,
         run_name: str,
-        model: PlancraftBaseModel,
         actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
         output_dir: str = "output",
         split: str = "val.small",
@@ -61,6 +61,13 @@ class Evaluator:
         self.use_fasterrcnn = use_fasterrcnn
         self.max_steps = max_steps
         self.resume = resume
+        self.resolution = resolution
+        # history args
+        self.system_prompt = system_prompt
+        self.prompt_examples = prompt_examples
+        self.prompt_images = prompt_images
+        self.few_shot = few_shot
         self.output_dir = f"{output_dir}/{run_name}/{split}"
         self.generation_number = 0
@@ -69,28 +76,6 @@ class Evaluator:
         # load all examples
         self.examples: list[PlancraftExample] = self.load_dataset(split)
-        # start environment
-        self.environment = PlancraftEnvironment(
-            inventory=[],
-            resolution=resolution,
-        )
-        # initialise history/dialogue tracking
-        self.history = History(
-            actions=actions,
-            use_multimodal_content_format=use_multimodal_content_format,
-            use_images=use_images,
-            use_text_inventory=use_text_inventory,
-            resolution=resolution,
-            few_shot=few_shot,
-            system_prompt=system_prompt,
-            prompt_examples=prompt_examples,
-            prompt_images=prompt_images,
-        )
-        # load model
-        self.model = model
     def save_results_dict(self, example: PlancraftExample, results_dict: dict):
         output_dir = f"{self.output_dir}/{self.generation_number}"
         os.makedirs(output_dir, exist_ok=True)
@@ -124,14 +109,6 @@ class Evaluator:
             dataset = json.load(f)
             return [PlancraftExample(**example) for example in dataset]
-    def reset(
-        self,
-        example: PlancraftExample,
-    ):
-        self.environment.reset(new_inventory=example.slotted_inventory)
-        self.model.reset()
-        self.history.reset()
     def check_done(self, inventory: dict, target: str):
         """
         Check that target object is obtained
@@ -142,14 +119,16 @@ class Evaluator:
                 return True
         return False
-    def parse_raw_model_response(self, generated_text: str, observation=None) -> str:
+    def parse_raw_model_response(
+        self, generated_text: str, observation=None, history=None
+    ) -> str:
         """
         Given a message and set of action handlers, parse the content to return the action
         or a message if the action is not valid/requires message response
         """
         for handler in self.actions:
             match_output = handler.match(
-                generated_text, observation=observation, history=self.history
+                generated_text, observation=observation, history=history
             )
             if match_output:
                 return match_output
@@ -159,6 +138,7 @@ class Evaluator:
     def convert_observation_to_message(
         self,
         observation: dict,
+        model: PlancraftBaseModel = None,
     ) -> str | dict:
         """
         Convert an environment observation to the message format used by an LLM chat model
@@ -170,8 +150,9 @@ class Evaluator:
         - use_images: bool - Whether to append an image to the message content - must be used with use_multimodal_content_format.
         """
         if self.use_fasterrcnn:
+            assert model is not None, "Model must be provided to convert image to text"
             # convert image to inventory using fasterrcnn
-            inventory = self.model.bbox_model.get_inventory(observation["image"].copy())
+            inventory = model.bbox_model.get_inventory(observation["image"].copy())
             text_content = target_and_inventory_to_text_obs(
                 observation["target"], inventory
             )
@@ -190,15 +171,38 @@ class Evaluator:
             content_list.append({"type": "image"})
         return {"content": content_list}
-    def eval_example(self, example: PlancraftExample) -> dict:
+    def eval_example(
+        self,
+        example: PlancraftExample,
+        model: PlancraftBaseModel,
+    ) -> dict:
         """Given the loaded model and an example from Plancraft
         run the episode until success or termination."""
+        # start environment
+        environment = PlancraftEnvironment(
+            inventory=example.slotted_inventory,
+            resolution=self.resolution,
+        )
+        # initialise history/dialogue tracking
+        history = History(
+            actions=self.actions,
+            use_multimodal_content_format=self.use_multimodal_content_format,
+            use_images=self.use_images,
+            use_text_inventory=self.use_text_inventory,
+            resolution=self.resolution,
+            few_shot=self.few_shot,
+            system_prompt=deepcopy(self.system_prompt),
+            prompt_examples=deepcopy(self.prompt_examples),
+            prompt_images=deepcopy(self.prompt_images),
+        )
         success = False
-        self.reset(example)
         action = None
         # run episode until stuck or until max steps is reached
-        while self.history.num_steps < self.max_steps:
+        while history.num_steps < self.max_steps:
             # if the action is stop then we end the episode
             if isinstance(action, StopAction):
                 # if the action is stop and task is impossible then success
@@ -207,16 +211,16 @@ class Evaluator:
                 break
             # action is external tool then it is str
             if isinstance(action, str):
-                observation = self.environment.step()
+                observation = environment.step()
                 observation["target"] = example.target
                 observation["message"] = action
             # action is environment action
             else:
-                observation = self.environment.step(action)
+                observation = environment.step(action)
                 # convert inventory observation to text message
                 observation["target"] = example.target
                 observation["message"] = self.convert_observation_to_message(
-                    observation
+                    observation, model=model
                 )
                 # check if the episode is done
                 success = self.check_done(observation["inventory"], example.target)
@@ -225,29 +229,30 @@ class Evaluator:
                 break
             # add observation to history
-            self.history.add_observation_to_history(observation)
+            history.add_observation_to_history(observation)
             # add observation message to history
-            self.history.add_message_to_history(
-                content=observation["message"], role="user"
-            )
+            history.add_message_to_history(content=observation["message"], role="user")
             # predict next action
-            raw_action = self.model.step(observation, dialogue_history=self.history)
+            raw_action = model.step(observation, dialogue_history=history)
             # add message to history
-            self.history.add_message_to_history(content=raw_action, role="assistant")
+            history.add_message_to_history(content=raw_action, role="assistant")
             # parse the raw action
-            action = self.parse_raw_model_response(raw_action, observation=observation)
+            action = self.parse_raw_model_response(
+                raw_action, observation=observation, history=history
+            )
         # save results and reset
         return {
             "success": success,
             "recipe_type": example.recipe_type,
             "complexity": example.complexity_split,
-            "number_of_steps": self.history.num_steps,
-            "model_trace": self.history.trace(),
+            "number_of_steps": history.num_steps,
+            "model_trace": history.trace(),
             "example_id": example.id,
+            "images": history.images,
         }
-    def eval_all_examples(self, progress_bar=False) -> list:
+    def eval_all_examples(self, model, progress_bar=False) -> list:
         results = []
         pbar = tqdm(
             total=len(self.examples),
@@ -256,7 +261,6 @@ class Evaluator:
         correct = 0
         count = 0
         for example in self.examples:
-            logger.debug(f"Running example {example.id}")
             if resume_result := self.load_results_dict(example):
                 pbar.update(self.max_steps)
                 results.append(resume_result)
@@ -268,10 +272,14 @@ class Evaluator:
             ]:
                 continue
-            result = self.eval_example(example)
+            result = self.eval_example(example, model=model)
+            model.reset()
+            # save images and results
+            self.save_images(example, result["images"])
+            del result["images"]
             results.append(result)
             self.save_results_dict(example, result)
-            self.save_images(example, self.history.images)
             correct += int(result["success"])
             count += 1

plancraft/models/dummy.py CHANGED Viewed

@@ -18,14 +18,19 @@ class DummyModel(PlancraftBaseModel):
         pass
     def random_select(self, observation):
-        # randomly pick an item from the inventory
+        # randomly pick an item that has quantity 1 from the inventory
         item_indices = set()
         for slot, item in observation["inventory"].items():
-            if item["quantity"] > 0:
+            if item["quantity"] == 1:
                 item_indices.add(slot)
         all_slots_to = set(range(1, 46))
         empty_slots = all_slots_to - item_indices
+        # if not item with quantity == 1, randomly pick any item
+        if len(item_indices) == 0:
+            item_indices = set(observation["inventory"].keys())
+        # move the item to a random empty slot
         random_slot_from = random.choice(list(item_indices))
         random_slot_to = random.choice(list(empty_slots))

plancraft/utils.py CHANGED Viewed

@@ -81,9 +81,6 @@ class History:
                 self.prompt_images = load_prompt_images(resolution=self.resolution)
     def add_message_to_history(self, content: str | dict, role="user"):
-        if role == "assistant":
-            logger.info(content)
         if isinstance(content, dict):
             assert "content" in content, "content key not found in message"
             content["role"] = role

{plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.11
+Version: 0.3.13
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License

{plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
-plancraft/evaluator.py,sha256=vu8RqAsvoDtUizLpiDA9w1fmCdCb6q91DUuE_4mUhUo,10745
+plancraft/evaluator.py,sha256=q7khX8FrMeb5QOgYZba-24jC7ZXp83VU7sa1H1kKS08,11061
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
-plancraft/utils.py,sha256=0Uq-3VE-bTRstalzKknBJ-ExWf8ec_Jrg4QNEk8bJ-o,5778
+plancraft/utils.py,sha256=67UUDMSv8TqX_I0fL5-yG_vkHvTZlnhSLkktWAg5p34,5712
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
 plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
 plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -17,7 +17,7 @@ plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82
 plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
 plancraft/environment/prompts.py,sha256=8QXclX0ygpL02uZichE1AVkbdn_0HGteD5bzo0FZGOU,6947
 plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
-plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
+plancraft/environment/sampler.py,sha256=lTSiGfmrew0G7ewOWtz6dtt58Mj0rAg6PW8BIbBegXA,7646
 plancraft/environment/search.py,sha256=Dmdvj04kMvPlwvoWSc2261LTXV8RbMpS4FODV1YoZKs,1847
 plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
 plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
 plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
 plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
 plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
-plancraft/models/dummy.py,sha256=jBxke6VNpyYh_HBcFxCx64djO5F3wr5GbbnC0XePZ20,1015
+plancraft/models/dummy.py,sha256=856oEX6NquXSIIfQLTEFFeB8ib7VUUs5cB0TVHAiFvI,1248
 plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
 plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
 plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.11.dist-info/METADATA,sha256=wgvEebVv8N2uL51t9oRGEgkniXkbUCZbshDZPY7kRIo,11148
-plancraft-0.3.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.11.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.11.dist-info/RECORD,,
+plancraft-0.3.13.dist-info/METADATA,sha256=7dISD2bnB8aAMG7uvQZDJZq4aKBu7gGIRaLLeTiQMvk,11148
+plancraft-0.3.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.3.13.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.3.13.dist-info/RECORD,,

{plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

plancraft 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl