plancraft 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
plancraft/evaluator.py CHANGED
@@ -213,11 +213,9 @@ class Evaluator:
213
213
  num_non_env_actions += 1
214
214
  # action is environment action
215
215
  else:
216
- # add action to history
217
216
  if isinstance(action, str):
218
217
  observation = self.environment.step()
219
218
  else:
220
- self.history.add_action_to_history(action)
221
219
  observation = self.environment.step(action)
222
220
 
223
221
  # convert inventory observation to text message
@@ -229,6 +227,9 @@ class Evaluator:
229
227
 
230
228
  # check if the episode is done
231
229
  success = self.check_done(observation["inventory"], example.target)
230
+ # exit if success
231
+ if success:
232
+ break
232
233
 
233
234
  # add observation to history
234
235
  self.history.add_observation_to_history(observation)
@@ -236,11 +237,6 @@ class Evaluator:
236
237
  self.history.add_message_to_history(
237
238
  content=observation["message"], role="user"
238
239
  )
239
-
240
- # exit if success
241
- if success:
242
- break
243
-
244
240
  # predict next action
245
241
  raw_action = self.model.step(observation, dialogue_history=self.history)
246
242
  # add message to history
@@ -256,7 +252,6 @@ class Evaluator:
256
252
  "number_of_steps": self.history.num_steps,
257
253
  "model_trace": self.history.trace(),
258
254
  "example_id": example.id,
259
- "impossible": example.impossible,
260
255
  }
261
256
 
262
257
  def eval_all_examples(self, progress_bar=False) -> list:
plancraft/models/dummy.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import random
2
2
 
3
- from plancraft.config import EvalConfig
4
3
  from plancraft.environment.actions import (
5
4
  MoveAction,
6
5
  )
@@ -12,7 +11,7 @@ class DummyModel(PlancraftBaseModel):
12
11
  Dummy model returns actions that do random action
13
12
  """
14
13
 
15
- def __init__(self, cfg: EvalConfig):
14
+ def __init__(self, cfg=None):
16
15
  pass
17
16
 
18
17
  def reset(self):
plancraft/utils.py CHANGED
@@ -7,11 +7,7 @@ from typing import Optional
7
7
  import torch
8
8
  from loguru import logger
9
9
 
10
- from plancraft.environment.actions import (
11
- ActionHandlerBase,
12
- MoveAction,
13
- SmeltAction,
14
- )
10
+ from plancraft.environment.actions import ActionHandlerBase
15
11
  from plancraft.environment.prompts import (
16
12
  get_prompt_example,
17
13
  get_system_prompt,
@@ -108,10 +104,6 @@ class History:
108
104
  else:
109
105
  self.dialogue_history.append({"role": role, "content": content})
110
106
 
111
- def add_action_to_history(self, action: SmeltAction | MoveAction):
112
- if isinstance(action, SmeltAction) or isinstance(action, MoveAction):
113
- self.action_history.append(action.model_dump())
114
-
115
107
  def add_inventory_to_history(self, inventory: dict):
116
108
  self.inventory_history.append(inventory)
117
109
  # count inventory
@@ -148,7 +140,6 @@ class History:
148
140
  self.images = copy(self.prompt_images)
149
141
  self.initial_dialogue_length = len(self.dialogue_history)
150
142
 
151
- self.action_history = []
152
143
  self.inventory_history = []
153
144
  self.inventory_counters = []
154
145
 
@@ -159,7 +150,6 @@ class History:
159
150
  "dialogue_history": copy(
160
151
  self.dialogue_history[self.initial_dialogue_length :]
161
152
  ),
162
- "action_history": copy(self.action_history),
163
153
  "inventory_history": copy(self.inventory_history),
164
154
  "tokens_used": copy(self.tokens_used),
165
155
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plancraft
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Summary: Plancraft: an evaluation dataset for planning with LLM agents
5
5
  License: MIT License
6
6
 
@@ -72,6 +72,13 @@ You can install the package by running the following command:
72
72
  pip install plancraft
73
73
  ```
74
74
 
75
+ Or:
76
+
77
+ ```bash
78
+ uv add plancraft
79
+ ```
80
+
81
+
75
82
  ![gif-example3](docs/images/train_images/TRAIN0010.gif)
76
83
  ![gif-example1](docs/images/train_images/TRAIN1133.gif)
77
84
  ![gif-example2](docs/images/train_images/TRAIN0383.gif)
@@ -117,17 +124,14 @@ The package also provides an `Evaluator` class that can be used to evaluate the
117
124
 
118
125
  ```python
119
126
  from plancraft.evaluator import Evaluator
120
- from plancraft.config import EvalConfig
121
127
 
122
128
  def main():
123
- # Create the config
124
- config = EvalConfig(...)
125
129
  # create model -- Note you can create your own model by subclassing PlancraftBaseModel
126
- model = get_model(config)
130
+ model = get_model("dummy")
127
131
  # Create the evaluator
128
- evaluator = Evaluator(config, model=model)
132
+ evaluator = Evaluator(run_name="dummy", model=model)
129
133
  # Evaluate the agent
130
- evaluator.eval_all_seeds()
134
+ evaluator.eval_all_examples()
131
135
  ```
132
136
 
133
137
  The evaluator class handles the environment loop and model interaction. The environment is created based on the configuration and the examples are loaded from the dataset. The `Evaluator` uses the dataset examples and initializes the environment with the example's inventory. It is also responsible for early stopping and verifying the target object has been craft. Finally, it also saves the results of the evaluation and the images generated during the evaluation.
@@ -159,7 +163,6 @@ while not history.check_stuck() and history.num_steps < max_steps:
159
163
  # Handle invalid case (exceeded non-env action limit)
160
164
  observation = environment.step()
161
165
  else:
162
- history.add_action_to_history(action) # Add action to history
163
166
  observation = environment.step(action)
164
167
 
165
168
  # Convert observation to message and reset non-env counter
@@ -170,19 +173,16 @@ while not history.check_stuck() and history.num_steps < max_steps:
170
173
  # Check if episode is complete
171
174
  success = check_done(observation["inventory"], example.target)
172
175
 
173
- # Update history with observation and message
174
- history.add_observation_to_history(observation)
175
- history.add_message_to_history(content=observation["message"], role="user")
176
-
177
176
  if success: # Exit loop if success
178
177
  break
179
178
 
179
+ # Update history with observation and message
180
+ history.add_observation_to_history(observation)
181
+ history.add_message_to_history(content=observation["message"], role="user")
180
182
  # Model predicts next action
181
183
  raw_action = model.step(observation, dialogue_history=history)
182
-
183
184
  # Update history with predicted action
184
185
  history.add_message_to_history(content=raw_action, role="assistant")
185
-
186
186
  # Parse raw action into a structured format
187
187
  action = parse_raw_model_response(raw_action)
188
188
 
@@ -194,7 +194,6 @@ return {
194
194
  "number_of_steps": history.num_steps,
195
195
  "model_trace": history.trace(),
196
196
  "example_id": example.id,
197
- "impossible": example.impossible,
198
197
  }
199
198
  ```
200
199
 
@@ -1,8 +1,8 @@
1
1
  plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  plancraft/config.py,sha256=Ppkps-E8xDNYEP9prOVxW2zEG9MpWVzcLJi4tmGLjuQ,4285
3
- plancraft/evaluator.py,sha256=adGmrn3GMQd5KSfFGQZxHjisQbvoxvEv1W1CPxZnFi8,11061
3
+ plancraft/evaluator.py,sha256=dTsE3FiQTJc094TmBvfBvefOpGSYcePIGVT36OEIClU,10910
4
4
  plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
5
- plancraft/utils.py,sha256=rYiqLUaEqjdUG-nqeHmeVG3PaExAlYiBGXH5qzLZPhs,7224
5
+ plancraft/utils.py,sha256=8bO8wrblmIW1aXEJre7ALGbL6GvuFrY38aZDdA_8W-g,6882
6
6
  plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
7
7
  plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
8
8
  plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
1915
1915
  plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
1916
1916
  plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
1917
1917
  plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
1918
- plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
1918
+ plancraft/models/dummy.py,sha256=jBxke6VNpyYh_HBcFxCx64djO5F3wr5GbbnC0XePZ20,1015
1919
1919
  plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
1920
1920
  plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
1921
1921
  plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
1922
1922
  plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
1923
- plancraft-0.3.4.dist-info/METADATA,sha256=W14g4fJ1y6zALGre8NKFRZXu9cVCrQS9i-24akOIWSw,11306
1924
- plancraft-0.3.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
- plancraft-0.3.4.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
- plancraft-0.3.4.dist-info/RECORD,,
1923
+ plancraft-0.3.5.dist-info/METADATA,sha256=QxQSXPXF162We8KwESaZ-nn94gqfz_5PQaXNDWkvV1Y,11147
1924
+ plancraft-0.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
+ plancraft-0.3.5.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
+ plancraft-0.3.5.dist-info/RECORD,,