PyPI - plancraft - Versions diffs - 0.3.16__py3-none-any.whl → 0.3.18__py3-none-any.whl - Mend

plancraft 0.3.16py3-none-any.whl → 0.3.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

plancraft/environment/actions.py +34 -19
plancraft/environment/search.py +11 -5
plancraft/evaluator.py +134 -3
plancraft/models/dummy.py +3 -0
plancraft/models/oracle.py +9 -0
{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/METADATA +1 -1
{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/RECORD +9 -9
{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/WHEEL +0 -0
{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/licenses/LICENSE +0 -0

plancraft/environment/actions.py CHANGED Viewed

@@ -208,10 +208,10 @@ class MoveActionHandler(ActionHandlerBase):
         """
         Parse the raw model response to a MoveAction
         """
-        action_match = re.search(f"({self.action_name}):", generated_text)
-        if not action_match:
-            return
         try:
+            action_match = re.search(f"({self.action_name}):", generated_text)
+            if not action_match:
+                return
             slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
             slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
             quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
@@ -221,8 +221,10 @@ class MoveActionHandler(ActionHandlerBase):
                 quantity=quantity,
             )
             return action
-        except AttributeError as e:
-            return f"Format Error: {e}"
+        except AttributeError:
+            return (
+                f"Format Error. Action be in the format: {self.prompt_format_example}"
+            )
 class SmeltActionHandler(ActionHandlerBase):
@@ -242,10 +244,11 @@ class SmeltActionHandler(ActionHandlerBase):
         """
         Parse the raw model response to a SmeltAction
         """
-        action_match = re.search(f"({self.action_name}):", generated_text)
-        if not action_match:
-            return
         try:
+            action_match = re.search(f"({self.action_name}):", generated_text)
+            if not action_match:
+                return
             slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
             slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
             quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
@@ -255,8 +258,10 @@ class SmeltActionHandler(ActionHandlerBase):
                 quantity=quantity,
             )
             return action
-        except AttributeError as e:
-            return f"Format Error: {e}"
+        except AttributeError:
+            return (
+                f"Format Error. Action be in the format: {self.prompt_format_example}"
+            )
 class ImpossibleActionHandler(ActionHandlerBase):
@@ -276,11 +281,16 @@ class ImpossibleActionHandler(ActionHandlerBase):
         """
         Parse the raw model response to a StopAction
         """
-        action_match = re.search(f"({self.action_name}):", generated_text)
-        if not action_match:
-            return
-        reason = re.search(r"impossible: (.*)", generated_text).group(1)
-        return StopAction(reason=reason)
+        try:
+            action_match = re.search(f"({self.action_name}):", generated_text)
+            if not action_match:
+                return
+            reason = re.search(r"impossible: (.*)", generated_text).group(1)
+            return StopAction(reason=reason)
+        except AttributeError:
+            return (
+                f"Format Error. Action be in the format: {self.prompt_format_example}"
+            )
 class ThinkActionHandler(ActionHandlerBase):
@@ -300,7 +310,12 @@ class ThinkActionHandler(ActionHandlerBase):
         """
         Parse the raw model response to a ThinkAction
         """
-        action_match = re.search(f"({self.action_name}):", generated_text)
-        if not action_match:
-            return
-        return "Ok"
+        try:
+            action_match = re.search(f"({self.action_name}):", generated_text)
+            if not action_match:
+                return
+            return "Ok"
+        except AttributeError:
+            return (
+                f"Format Error. Action be in the format: {self.prompt_format_example}"
+            )

plancraft/environment/search.py CHANGED Viewed

@@ -46,8 +46,14 @@ class GoldSearchActionHandler(ActionHandlerBase):
         """
         Parse the raw model response to a SearchAction
         """
-        action_match = re.search(f"({self.action_name}):", generated_text)
-        if not action_match:
-            return
-        search_target = re.search(r"search: (\w+)", generated_text).group(1)
-        return gold_search_recipe(search_target)
+        try:
+            action_match = re.search(f"({self.action_name}):", generated_text)
+            if not action_match:
+                return
+            search_target = re.search(r"search: *(\w+)", generated_text).group(1)
+            return gold_search_recipe(search_target)
+        except AttributeError:
+            return (
+                f"Format Error. Action be in the format: {self.prompt_format_example}"
+            )

plancraft/evaluator.py CHANGED Viewed

@@ -176,12 +176,14 @@ class Evaluator:
         example: PlancraftExample,
         model: PlancraftBaseModel,
     ) -> dict:
-        """Given the loaded model and an example from Plancraft
-        run the episode until success or termination."""
+        """
+        Given the loaded model and an example from Plancraft
+        run the episode until success or termination.
+        """
         # start environment
         environment = PlancraftEnvironment(
-            inventory=example.slotted_inventory,
+            inventory=deepcopy(example.slotted_inventory),
             resolution=self.resolution,
         )
@@ -252,6 +254,135 @@ class Evaluator:
             "images": history.images,
         }
+    def batch_eval_examples(
+        self,
+        examples: list[PlancraftExample],
+        model,
+    ) -> list:
+        # Initialize environments and histories
+        environments = [
+            PlancraftEnvironment(
+                inventory=deepcopy(examples[i].slotted_inventory),
+                resolution=self.resolution,
+            )
+            for i in range(len(examples))
+        ]
+        histories = [
+            History(
+                actions=self.actions,
+                use_multimodal_content_format=self.use_multimodal_content_format,
+                use_images=self.use_images,
+                use_text_inventory=self.use_text_inventory,
+                resolution=self.resolution,
+                few_shot=self.few_shot,
+                system_prompt=deepcopy(self.system_prompt),
+                prompt_examples=deepcopy(self.prompt_examples),
+                prompt_images=deepcopy(self.prompt_images),
+            )
+            for _ in range(len(examples))
+        ]
+        # Track which environments are still active
+        active_mask = [True for _ in range(len(examples))]
+        results = [None for _ in range(len(examples))]
+        steps_taken = [0 for _ in range(len(examples))]
+        actions = [None for _ in range(len(examples))]
+        while any(active_mask) and all(steps < self.max_steps for steps in steps_taken):
+            # Get observations for all active environments
+            observations = []
+            active_indices = []
+            for i, (env, action, active) in enumerate(
+                zip(environments, actions, active_mask)
+            ):
+                if not active:
+                    continue
+                if isinstance(action, StopAction):
+                    # Handle stop action
+                    active_mask[i] = False
+                    results[i] = {
+                        "success": examples[i].impossible,
+                        "recipe_type": examples[i].recipe_type,
+                        "complexity": examples[i].complexity_split,
+                        "number_of_steps": steps_taken[i],
+                        "model_trace": histories[i].trace(),
+                        "example_id": examples[i].id,
+                        "images": histories[i].images,
+                    }
+                    logger.info("STOP")
+                    continue
+                active_indices.append(i)
+                if isinstance(action, str):
+                    # Handle message action
+                    obs = env.step()
+                    obs["target"] = examples[i].target
+                    obs["message"] = action
+                else:
+                    # Handle environment action
+                    obs = env.step(action)
+                    obs["target"] = examples[i].target
+                    obs["message"] = self.convert_observation_to_message(
+                        obs, model=model
+                    )
+                    # Check if done
+                    if self.check_done(obs["inventory"], examples[i].target):
+                        active_mask[i] = False
+                        results[i] = {
+                            "success": True,
+                            "recipe_type": examples[i].recipe_type,
+                            "complexity": examples[i].complexity_split,
+                            "number_of_steps": steps_taken[i],
+                            "model_trace": histories[i].trace(),
+                            "example_id": examples[i].id,
+                            "images": histories[i].images,
+                        }
+                        continue
+                observations.append(obs)
+                histories[i].add_observation_to_history(obs)
+                histories[i].add_message_to_history(content=obs["message"], role="user")
+                steps_taken[i] += 1
+            if not observations:
+                break
+            # Batch predict actions for active environments
+            active_histories = [histories[i] for i in active_indices]
+            raw_actions = model.batch_step(
+                observations, dialogue_histories=active_histories
+            )
+            # Process actions for each active environment
+            for idx, raw_action in zip(active_indices, raw_actions):
+                logger.info(f"{histories[idx].num_steps}, {raw_action}")
+                histories[idx].add_message_to_history(
+                    content=raw_action, role="assistant"
+                )
+                actions[idx] = self.parse_raw_model_response(
+                    raw_action,
+                    observation=observations[active_indices.index(idx)],
+                    history=histories[idx],
+                )
+        # Fill in results for environments that didn't finish
+        for i, result in enumerate(results):
+            if result is None:
+                results[i] = {
+                    "success": False,
+                    "recipe_type": examples[i].recipe_type,
+                    "complexity": examples[i].complexity_split,
+                    "number_of_steps": steps_taken[i],
+                    "model_trace": histories[i].trace(),
+                    "example_id": examples[i].id,
+                    "images": histories[i].images,
+                }
+        return results
     def eval_all_examples(self, model, progress_bar=False) -> list:
         results = []
         pbar = tqdm(

plancraft/models/dummy.py CHANGED Viewed

@@ -40,3 +40,6 @@ class DummyModel(PlancraftBaseModel):
     def step(self, observation: dict, **kwargs) -> str:
         return str(self.random_select(observation))
+    def batch_step(self, observations: list[dict], **kwargs) -> list:
+        return [self.step(observation) for observation in observations]

plancraft/models/oracle.py CHANGED Viewed

@@ -38,3 +38,12 @@ class OracleModel(PlancraftBaseModel):
         action = self.subplans.pop(0)
         return action
+    def batch_step(self, observations: list[dict], **kwargs) -> list:
+        # Need to fully isolate state between examples
+        actions = []
+        for observation in observations:
+            self.reset()
+            action = self.step(observation)
+            actions.append(action)
+        return actions

{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.16
+Version: 0.3.18
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License

{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
-plancraft/evaluator.py,sha256=q7khX8FrMeb5QOgYZba-24jC7ZXp83VU7sa1H1kKS08,11061
+plancraft/evaluator.py,sha256=v8itX8buduqTZdR39gtLwdhKGEnSX3rJv9Yd13EzNgQ,16395
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
 plancraft/utils.py,sha256=67UUDMSv8TqX_I0fL5-yG_vkHvTZlnhSLkktWAg5p34,5712
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
@@ -11,14 +11,14 @@ plancraft/data/val.json,sha256=IToAiaqUNQi_xhX1bzmInuskLaT7C2ryQjP-CZkzL24,13044
 plancraft/data/val.small.easy.json,sha256=9zEmqepjXG2NIp88xnFqOCkwsUsku3HEwHoQGxgTr6U,190252
 plancraft/data/val.small.json,sha256=76E9EFaljDQyAokg97e-IblvcOe6KbrdKkXvRxhhkgo,237653
 plancraft/environment/__init__.py,sha256=XFsFny4lH195AwAmL-WeCaF9ZCMgc7IgXIwhQ8FTdgE,505
-plancraft/environment/actions.py,sha256=AQxFaK4YW53mPwhuPhHrDF9wENSVjPHSWk0v77I1thw,9460
+plancraft/environment/actions.py,sha256=VhPSRr0b1ySxb106TcBFdb3MdycxWwQGzqDnWQagm-8,10007
 plancraft/environment/env.py,sha256=A4532st7JFBYBF_Nh0CEEi3ZTLJAeaB3t9PAIVSemj0,16390
 plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
 plancraft/environment/planner.py,sha256=uIOJjIoyT_4pxeWeTKb8BkLJyKZG0-AMoEOkZs6Ua9A,19340
 plancraft/environment/prompts.py,sha256=8QXclX0ygpL02uZichE1AVkbdn_0HGteD5bzo0FZGOU,6947
 plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
 plancraft/environment/sampler.py,sha256=79hLpTU0ajvMPoBsvSe8tE88x31c8Vlczb3tJZJcau0,7441
-plancraft/environment/search.py,sha256=Dmdvj04kMvPlwvoWSc2261LTXV8RbMpS4FODV1YoZKs,1847
+plancraft/environment/search.py,sha256=kk6t-MkpFGTL7I38GQ6H21BjW9qJLSNGMbJqvZhr1LE,2035
 plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
 plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
 plancraft/environment/assets/table.png,sha256=IKIViZKAPyR4FWnS0JP9AZ19vIEO3qoS5-YRGAO1ow8,5430
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
 plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
 plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
 plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
-plancraft/models/dummy.py,sha256=856oEX6NquXSIIfQLTEFFeB8ib7VUUs5cB0TVHAiFvI,1248
+plancraft/models/dummy.py,sha256=3Nsnw12s_n5mWMuxUTaPCuJIzPp0vLHWKE827iKY5o0,1391
 plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
-plancraft/models/oracle.py,sha256=tMp9mTwD70T3qohj-LZhJFjHYWyiVHDh8gu27asVimI,1342
+plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
 plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.16.dist-info/METADATA,sha256=FIfCMBzCVuDWFCf5cPPKXLdm1EHbgKRpQ2eT5khTMN0,11148
-plancraft-0.3.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.16.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.16.dist-info/RECORD,,
+plancraft-0.3.18.dist-info/METADATA,sha256=p_Ln_3jx77ygBZG6yjuLhVs883PysUXUCi1sK67QvJs,11148
+plancraft-0.3.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.3.18.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.3.18.dist-info/RECORD,,

{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.16.dist-info → plancraft-0.3.18.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.16__py3-none-any.whl → 0.3.18__py3-none-any.whl

plancraft 0.3.16py3-none-any.whl → 0.3.18py3-none-any.whl