PyPI - plancraft - Versions diffs - 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl - Mend

plancraft 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

plancraft/evaluator.py CHANGED Viewed

@@ -213,11 +213,9 @@ class Evaluator:
                 num_non_env_actions += 1
             # action is environment action
             else:
-                # add action to history
                 if isinstance(action, str):
                     observation = self.environment.step()
                 else:
-                    self.history.add_action_to_history(action)
                     observation = self.environment.step(action)
                 # convert inventory observation to text message
@@ -229,6 +227,9 @@ class Evaluator:
                 # check if the episode is done
                 success = self.check_done(observation["inventory"], example.target)
+            # exit if success
+            if success:
+                break
             # add observation to history
             self.history.add_observation_to_history(observation)
@@ -236,11 +237,6 @@ class Evaluator:
             self.history.add_message_to_history(
                 content=observation["message"], role="user"
             )
-            # exit if success
-            if success:
-                break
             # predict next action
             raw_action = self.model.step(observation, dialogue_history=self.history)
             # add message to history
@@ -256,7 +252,6 @@ class Evaluator:
             "number_of_steps": self.history.num_steps,
             "model_trace": self.history.trace(),
             "example_id": example.id,
-            "impossible": example.impossible,
         }
     def eval_all_examples(self, progress_bar=False) -> list:

plancraft/models/dummy.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import random
-from plancraft.config import EvalConfig
 from plancraft.environment.actions import (
     MoveAction,
 )
@@ -12,7 +11,7 @@ class DummyModel(PlancraftBaseModel):
     Dummy model returns actions that do random action
     """
-    def __init__(self, cfg: EvalConfig):
+    def __init__(self, cfg=None):
         pass
     def reset(self):

plancraft/utils.py CHANGED Viewed

@@ -7,11 +7,7 @@ from typing import Optional
 import torch
 from loguru import logger
-from plancraft.environment.actions import (
-    ActionHandlerBase,
-    MoveAction,
-    SmeltAction,
-)
+from plancraft.environment.actions import ActionHandlerBase
 from plancraft.environment.prompts import (
     get_prompt_example,
     get_system_prompt,
@@ -47,7 +43,6 @@ class History:
         self.use_text_inventory = use_text_inventory
         self.resolution = resolution  # low, medium, high
-        self.action_history = []
         self.inventory_history = []
         self.inventory_counters = []
@@ -108,10 +103,6 @@ class History:
             else:
                 self.dialogue_history.append({"role": role, "content": content})
-    def add_action_to_history(self, action: SmeltAction | MoveAction):
-        if isinstance(action, SmeltAction) or isinstance(action, MoveAction):
-            self.action_history.append(action.model_dump())
     def add_inventory_to_history(self, inventory: dict):
         self.inventory_history.append(inventory)
         # count inventory
@@ -148,7 +139,6 @@ class History:
         self.images = copy(self.prompt_images)
         self.initial_dialogue_length = len(self.dialogue_history)
-        self.action_history = []
         self.inventory_history = []
         self.inventory_counters = []
@@ -159,14 +149,13 @@ class History:
             "dialogue_history": copy(
                 self.dialogue_history[self.initial_dialogue_length :]
             ),
-            "action_history": copy(self.action_history),
             "inventory_history": copy(self.inventory_history),
             "tokens_used": copy(self.tokens_used),
         }
     @property
     def num_steps(self):
-        return len(self.action_history)
+        return (len(self.dialogue_history) - self.initial_dialogue_length) // 2
     def check_stuck(self, max_steps_no_change: int = 10) -> bool:
         """

{plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.4
+Version: 0.3.6
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License
@@ -72,6 +72,13 @@ You can install the package by running the following command:
 pip install plancraft
 ```
+Or:
+```bash
+uv add plancraft
+```
 ![gif-example3](docs/images/train_images/TRAIN0010.gif)
 ![gif-example1](docs/images/train_images/TRAIN1133.gif)
 ![gif-example2](docs/images/train_images/TRAIN0383.gif)
@@ -117,17 +124,14 @@ The package also provides an `Evaluator` class that can be used to evaluate the
 ```python
 from plancraft.evaluator import Evaluator
-from plancraft.config import EvalConfig
 def main():
-    # Create the config
-    config = EvalConfig(...)
     # create model -- Note you can create your own model by subclassing PlancraftBaseModel
-    model = get_model(config)
+    model = get_model("dummy")
     # Create the evaluator
-    evaluator = Evaluator(config, model=model)
+    evaluator = Evaluator(run_name="dummy", model=model)
     # Evaluate the agent
-    evaluator.eval_all_seeds()
+    evaluator.eval_all_examples()
 ```
 The evaluator class handles the environment loop and model interaction. The environment is created based on the configuration and the examples are loaded from the dataset. The `Evaluator` uses the dataset examples and initializes the environment with the example's inventory. It is also responsible for early stopping and verifying the target object has been craft. Finally, it also saves the results of the evaluation and the images generated during the evaluation.
@@ -159,7 +163,6 @@ while not history.check_stuck() and history.num_steps < max_steps:
             # Handle invalid case (exceeded non-env action limit)
             observation = environment.step()
         else:
-            history.add_action_to_history(action)  # Add action to history
             observation = environment.step(action)
         # Convert observation to message and reset non-env counter
@@ -170,19 +173,16 @@ while not history.check_stuck() and history.num_steps < max_steps:
         # Check if episode is complete
         success = check_done(observation["inventory"], example.target)
-    # Update history with observation and message
-    history.add_observation_to_history(observation)
-    history.add_message_to_history(content=observation["message"], role="user")
     if success:  # Exit loop if success
         break
+    # Update history with observation and message
+    history.add_observation_to_history(observation)
+    history.add_message_to_history(content=observation["message"], role="user")
     # Model predicts next action
     raw_action = model.step(observation, dialogue_history=history)
     # Update history with predicted action
     history.add_message_to_history(content=raw_action, role="assistant")
     # Parse raw action into a structured format
     action = parse_raw_model_response(raw_action)
@@ -194,7 +194,6 @@ return {
     "number_of_steps": history.num_steps,
     "model_trace": history.trace(),
     "example_id": example.id,
-    "impossible": example.impossible,
 }
 ```

{plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 plancraft/config.py,sha256=Ppkps-E8xDNYEP9prOVxW2zEG9MpWVzcLJi4tmGLjuQ,4285
-plancraft/evaluator.py,sha256=adGmrn3GMQd5KSfFGQZxHjisQbvoxvEv1W1CPxZnFi8,11061
+plancraft/evaluator.py,sha256=dTsE3FiQTJc094TmBvfBvefOpGSYcePIGVT36OEIClU,10910
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
-plancraft/utils.py,sha256=rYiqLUaEqjdUG-nqeHmeVG3PaExAlYiBGXH5qzLZPhs,7224
+plancraft/utils.py,sha256=phaHzbIS85YZrBPaGG9TStHY8ZBKR1LKfuN1exfVy1U,6889
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
 plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
 plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
 plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
 plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
 plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
-plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
+plancraft/models/dummy.py,sha256=jBxke6VNpyYh_HBcFxCx64djO5F3wr5GbbnC0XePZ20,1015
 plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
 plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
 plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.4.dist-info/METADATA,sha256=W14g4fJ1y6zALGre8NKFRZXu9cVCrQS9i-24akOIWSw,11306
-plancraft-0.3.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.4.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.4.dist-info/RECORD,,
+plancraft-0.3.6.dist-info/METADATA,sha256=MVnklft3zz21a9085j-H-_VYAPJREH2-1ojS08ICbzk,11147
+plancraft-0.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.3.6.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.3.6.dist-info/RECORD,,

{plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

plancraft 0.3.4py3-none-any.whl → 0.3.6py3-none-any.whl