plancraft 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -59,7 +59,8 @@ SEARCH_STEPS = [
59
59
 
60
60
  def get_system_prompt(
61
61
  handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
62
- ):
62
+ use_multimodal_content_format=False,
63
+ ) -> dict:
63
64
  action_names = [handler.action_name for handler in handlers]
64
65
  assert "move" in action_names, "MoveActionHandler should be one of the handlers"
65
66
  assert "smelt" in action_names, "SmeltActionHandler should be one of the handlers"
@@ -72,7 +73,17 @@ def get_system_prompt(
72
73
  for handler in handlers:
73
74
  output_format += f"\n\t- {handler.prompt_format_example}"
74
75
 
75
- return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
76
+ system_prompt_text = f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
77
+
78
+ if use_multimodal_content_format:
79
+ return {
80
+ "role": "system",
81
+ "content": [{"text": system_prompt_text, "type": "text"}],
82
+ }
83
+ return {
84
+ "role": "system",
85
+ "content": system_prompt_text,
86
+ }
76
87
 
77
88
 
78
89
  def get_prompt_example(
plancraft/evaluator.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import os
3
+ from typing import Optional
3
4
 
4
5
  import imageio
5
6
  from loguru import logger
@@ -8,18 +9,18 @@ from tqdm import tqdm
8
9
  import wandb
9
10
  from plancraft.config import PlancraftExample
10
11
  from plancraft.environment.actions import (
11
- StopAction,
12
12
  ActionHandlerBase,
13
13
  MoveActionHandler,
14
14
  SmeltActionHandler,
15
+ StopAction,
15
16
  )
16
17
  from plancraft.environment.env import (
17
18
  PlancraftEnvironment,
18
19
  get_objective_str,
19
20
  target_and_inventory_to_text_obs,
20
21
  )
21
- from plancraft.utils import History
22
22
  from plancraft.models.base import PlancraftBaseModel
23
+ from plancraft.utils import History
23
24
 
24
25
 
25
26
  class Evaluator:
@@ -48,6 +49,10 @@ class Evaluator:
48
49
  use_images: bool = False,
49
50
  use_text_inventory: bool = False,
50
51
  use_fasterrcnn: bool = False,
52
+ system_prompt: Optional[dict] = None,
53
+ prompt_examples: list[dict] = [],
54
+ prompt_images: list[str] = [],
55
+ few_shot: bool = True,
51
56
  ):
52
57
  self.run_name = run_name
53
58
  self.use_multimodal_content_format = use_multimodal_content_format
@@ -77,6 +82,10 @@ class Evaluator:
77
82
  use_images=use_images,
78
83
  use_text_inventory=use_text_inventory,
79
84
  resolution=resolution,
85
+ few_shot=few_shot,
86
+ system_prompt=system_prompt,
87
+ prompt_examples=prompt_examples,
88
+ prompt_images=prompt_images,
80
89
  )
81
90
 
82
91
  # load model
@@ -204,11 +213,9 @@ class Evaluator:
204
213
  num_non_env_actions += 1
205
214
  # action is environment action
206
215
  else:
207
- # add action to history
208
216
  if isinstance(action, str):
209
217
  observation = self.environment.step()
210
218
  else:
211
- self.history.add_action_to_history(action)
212
219
  observation = self.environment.step(action)
213
220
 
214
221
  # convert inventory observation to text message
@@ -220,6 +227,9 @@ class Evaluator:
220
227
 
221
228
  # check if the episode is done
222
229
  success = self.check_done(observation["inventory"], example.target)
230
+ # exit if success
231
+ if success:
232
+ break
223
233
 
224
234
  # add observation to history
225
235
  self.history.add_observation_to_history(observation)
@@ -227,11 +237,6 @@ class Evaluator:
227
237
  self.history.add_message_to_history(
228
238
  content=observation["message"], role="user"
229
239
  )
230
-
231
- # exit if success
232
- if success:
233
- break
234
-
235
240
  # predict next action
236
241
  raw_action = self.model.step(observation, dialogue_history=self.history)
237
242
  # add message to history
@@ -247,7 +252,6 @@ class Evaluator:
247
252
  "number_of_steps": self.history.num_steps,
248
253
  "model_trace": self.history.trace(),
249
254
  "example_id": example.id,
250
- "impossible": example.impossible,
251
255
  }
252
256
 
253
257
  def eval_all_examples(self, progress_bar=False) -> list:
plancraft/models/dummy.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import random
2
2
 
3
- from plancraft.config import EvalConfig
4
3
  from plancraft.environment.actions import (
5
4
  MoveAction,
6
5
  )
@@ -12,7 +11,7 @@ class DummyModel(PlancraftBaseModel):
12
11
  Dummy model returns actions that do random action
13
12
  """
14
13
 
15
- def __init__(self, cfg: EvalConfig):
14
+ def __init__(self, cfg=None):
16
15
  pass
17
16
 
18
17
  def reset(self):
plancraft/utils.py CHANGED
@@ -2,18 +2,15 @@ import glob
2
2
  import pathlib
3
3
  from collections import Counter
4
4
  from copy import copy
5
+ from typing import Optional
5
6
 
6
7
  import torch
7
8
  from loguru import logger
8
9
 
9
- from plancraft.environment.actions import (
10
- ActionHandlerBase,
11
- MoveAction,
12
- SmeltAction,
13
- )
10
+ from plancraft.environment.actions import ActionHandlerBase
14
11
  from plancraft.environment.prompts import (
15
- get_system_prompt,
16
12
  get_prompt_example,
13
+ get_system_prompt,
17
14
  load_prompt_images,
18
15
  )
19
16
 
@@ -35,6 +32,9 @@ class History:
35
32
  use_images=False,
36
33
  use_text_inventory=False,
37
34
  resolution="high",
35
+ system_prompt: Optional[dict] = None,
36
+ prompt_examples: list[dict] = [],
37
+ prompt_images: list[str] = [],
38
38
  ):
39
39
  self.action_handlers = actions
40
40
  self.use_multimodal_content_format = use_multimodal_content_format
@@ -49,31 +49,30 @@ class History:
49
49
 
50
50
  self.tokens_used = 0
51
51
 
52
+ # use system prompt if provided
53
+ if system_prompt:
54
+ self.system_prompt_dialogue = system_prompt
55
+ else:
56
+ # generate system prompt
57
+ self.system_prompt_dialogue = get_system_prompt(
58
+ handlers=self.action_handlers,
59
+ use_multimodal_content_format=self.use_multimodal_content_format,
60
+ )
61
+
52
62
  # set up dialogue history with few-shot prompt
63
+ self.prompt_examples = prompt_examples
64
+ self.prompt_images = prompt_images
53
65
  self.set_up_few_shot_prompt()
54
- self.system_prompt_dialogue = self.system_prompt()
55
66
 
56
67
  self.dialogue_history = copy(self.prompt_examples)
57
68
  self.images = copy(self.prompt_images)
58
69
  self.initial_dialogue_length = len(self.dialogue_history)
59
70
 
60
- def system_prompt(self):
61
- # kept separate from dialogue history because certain models deal with system prompt differently
62
- system_prompt_text = get_system_prompt(handlers=self.action_handlers)
63
- if self.use_multimodal_content_format:
64
- return {
65
- "role": "system",
66
- "content": [{"text": system_prompt_text, "type": "text"}],
67
- }
68
- return {
69
- "role": "system",
70
- "content": system_prompt_text,
71
- }
72
-
73
71
  def set_up_few_shot_prompt(self):
74
- self.prompt_examples = []
75
- self.prompt_images = []
76
-
72
+ # if either prompt_examples or prompt_images are provided, skip
73
+ if self.prompt_examples or self.prompt_images:
74
+ return
75
+ # if few-shot is not enabled, skip
77
76
  if self.few_shot:
78
77
  self.prompt_examples = get_prompt_example(
79
78
  self.action_handlers,
@@ -105,10 +104,6 @@ class History:
105
104
  else:
106
105
  self.dialogue_history.append({"role": role, "content": content})
107
106
 
108
- def add_action_to_history(self, action: SmeltAction | MoveAction):
109
- if isinstance(action, SmeltAction) or isinstance(action, MoveAction):
110
- self.action_history.append(action.model_dump())
111
-
112
107
  def add_inventory_to_history(self, inventory: dict):
113
108
  self.inventory_history.append(inventory)
114
109
  # count inventory
@@ -145,7 +140,6 @@ class History:
145
140
  self.images = copy(self.prompt_images)
146
141
  self.initial_dialogue_length = len(self.dialogue_history)
147
142
 
148
- self.action_history = []
149
143
  self.inventory_history = []
150
144
  self.inventory_counters = []
151
145
 
@@ -156,7 +150,6 @@ class History:
156
150
  "dialogue_history": copy(
157
151
  self.dialogue_history[self.initial_dialogue_length :]
158
152
  ),
159
- "action_history": copy(self.action_history),
160
153
  "inventory_history": copy(self.inventory_history),
161
154
  "tokens_used": copy(self.tokens_used),
162
155
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plancraft
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: Plancraft: an evaluation dataset for planning with LLM agents
5
5
  License: MIT License
6
6
 
@@ -72,6 +72,13 @@ You can install the package by running the following command:
72
72
  pip install plancraft
73
73
  ```
74
74
 
75
+ Or:
76
+
77
+ ```bash
78
+ uv add plancraft
79
+ ```
80
+
81
+
75
82
  ![gif-example3](docs/images/train_images/TRAIN0010.gif)
76
83
  ![gif-example1](docs/images/train_images/TRAIN1133.gif)
77
84
  ![gif-example2](docs/images/train_images/TRAIN0383.gif)
@@ -117,17 +124,14 @@ The package also provides an `Evaluator` class that can be used to evaluate the
117
124
 
118
125
  ```python
119
126
  from plancraft.evaluator import Evaluator
120
- from plancraft.config import EvalConfig
121
127
 
122
128
  def main():
123
- # Create the config
124
- config = EvalConfig(...)
125
129
  # create model -- Note you can create your own model by subclassing PlancraftBaseModel
126
- model = get_model(config)
130
+ model = get_model("dummy")
127
131
  # Create the evaluator
128
- evaluator = Evaluator(config, model=model)
132
+ evaluator = Evaluator(run_name="dummy", model=model)
129
133
  # Evaluate the agent
130
- evaluator.eval_all_seeds()
134
+ evaluator.eval_all_examples()
131
135
  ```
132
136
 
133
137
  The evaluator class handles the environment loop and model interaction. The environment is created based on the configuration and the examples are loaded from the dataset. The `Evaluator` uses the dataset examples and initializes the environment with the example's inventory. It is also responsible for early stopping and verifying the target object has been craft. Finally, it also saves the results of the evaluation and the images generated during the evaluation.
@@ -159,7 +163,6 @@ while not history.check_stuck() and history.num_steps < max_steps:
159
163
  # Handle invalid case (exceeded non-env action limit)
160
164
  observation = environment.step()
161
165
  else:
162
- history.add_action_to_history(action) # Add action to history
163
166
  observation = environment.step(action)
164
167
 
165
168
  # Convert observation to message and reset non-env counter
@@ -170,19 +173,16 @@ while not history.check_stuck() and history.num_steps < max_steps:
170
173
  # Check if episode is complete
171
174
  success = check_done(observation["inventory"], example.target)
172
175
 
173
- # Update history with observation and message
174
- history.add_observation_to_history(observation)
175
- history.add_message_to_history(content=observation["message"], role="user")
176
-
177
176
  if success: # Exit loop if success
178
177
  break
179
178
 
179
+ # Update history with observation and message
180
+ history.add_observation_to_history(observation)
181
+ history.add_message_to_history(content=observation["message"], role="user")
180
182
  # Model predicts next action
181
183
  raw_action = model.step(observation, dialogue_history=history)
182
-
183
184
  # Update history with predicted action
184
185
  history.add_message_to_history(content=raw_action, role="assistant")
185
-
186
186
  # Parse raw action into a structured format
187
187
  action = parse_raw_model_response(raw_action)
188
188
 
@@ -194,7 +194,6 @@ return {
194
194
  "number_of_steps": history.num_steps,
195
195
  "model_trace": history.trace(),
196
196
  "example_id": example.id,
197
- "impossible": example.impossible,
198
197
  }
199
198
  ```
200
199
 
@@ -1,8 +1,8 @@
1
1
  plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  plancraft/config.py,sha256=Ppkps-E8xDNYEP9prOVxW2zEG9MpWVzcLJi4tmGLjuQ,4285
3
- plancraft/evaluator.py,sha256=7PjdITOTUCtjPywFOOd9vVhl5UDKZuFF7rc3mjUOn0Q,10717
3
+ plancraft/evaluator.py,sha256=dTsE3FiQTJc094TmBvfBvefOpGSYcePIGVT36OEIClU,10910
4
4
  plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
5
- plancraft/utils.py,sha256=7VWKVlDhoMacRypRRSKM1K3hwwJ0nHR3zyx9jZH1C1g,7042
5
+ plancraft/utils.py,sha256=8bO8wrblmIW1aXEJre7ALGbL6GvuFrY38aZDdA_8W-g,6882
6
6
  plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
7
7
  plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
8
8
  plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -15,7 +15,7 @@ plancraft/environment/actions.py,sha256=D9QqBW7yWsbWCjxNyWp61Xtb0c6EtyXk3PZ1I8SR
15
15
  plancraft/environment/env.py,sha256=F5xo1eAJ9MeuoE2IpG_LtbaE0BGd66URPB_rehAWIiU,16372
16
16
  plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
17
17
  plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
18
- plancraft/environment/prompts.py,sha256=OKxiv02NIhRk5FZJUEDRLkVWVMc-aXKJi7i7X61uUmk,6633
18
+ plancraft/environment/prompts.py,sha256=8QXclX0ygpL02uZichE1AVkbdn_0HGteD5bzo0FZGOU,6947
19
19
  plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
20
20
  plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
21
21
  plancraft/environment/search.py,sha256=uFHpLvW40rMKOxDabcyWrpOrhKLDZqAJOF_jew4_WXk,1837
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
1915
1915
  plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
1916
1916
  plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
1917
1917
  plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
1918
- plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
1918
+ plancraft/models/dummy.py,sha256=jBxke6VNpyYh_HBcFxCx64djO5F3wr5GbbnC0XePZ20,1015
1919
1919
  plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
1920
1920
  plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
1921
1921
  plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
1922
1922
  plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
1923
- plancraft-0.3.3.dist-info/METADATA,sha256=UMIYGLhvaJO8CJyOj4hka_5OO2T728yqhzhX0rMQqfQ,11306
1924
- plancraft-0.3.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
- plancraft-0.3.3.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
- plancraft-0.3.3.dist-info/RECORD,,
1923
+ plancraft-0.3.5.dist-info/METADATA,sha256=QxQSXPXF162We8KwESaZ-nn94gqfz_5PQaXNDWkvV1Y,11147
1924
+ plancraft-0.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
+ plancraft-0.3.5.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
+ plancraft-0.3.5.dist-info/RECORD,,