plancraft 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- plancraft/evaluator.py +3 -8
- plancraft/models/dummy.py +1 -2
- plancraft/utils.py +2 -13
- {plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/METADATA +14 -15
- {plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/RECORD +7 -7
- {plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/WHEEL +0 -0
- {plancraft-0.3.4.dist-info → plancraft-0.3.6.dist-info}/licenses/LICENSE +0 -0
plancraft/evaluator.py
CHANGED
@@ -213,11 +213,9 @@ class Evaluator:
|
|
213
213
|
num_non_env_actions += 1
|
214
214
|
# action is environment action
|
215
215
|
else:
|
216
|
-
# add action to history
|
217
216
|
if isinstance(action, str):
|
218
217
|
observation = self.environment.step()
|
219
218
|
else:
|
220
|
-
self.history.add_action_to_history(action)
|
221
219
|
observation = self.environment.step(action)
|
222
220
|
|
223
221
|
# convert inventory observation to text message
|
@@ -229,6 +227,9 @@ class Evaluator:
|
|
229
227
|
|
230
228
|
# check if the episode is done
|
231
229
|
success = self.check_done(observation["inventory"], example.target)
|
230
|
+
# exit if success
|
231
|
+
if success:
|
232
|
+
break
|
232
233
|
|
233
234
|
# add observation to history
|
234
235
|
self.history.add_observation_to_history(observation)
|
@@ -236,11 +237,6 @@ class Evaluator:
|
|
236
237
|
self.history.add_message_to_history(
|
237
238
|
content=observation["message"], role="user"
|
238
239
|
)
|
239
|
-
|
240
|
-
# exit if success
|
241
|
-
if success:
|
242
|
-
break
|
243
|
-
|
244
240
|
# predict next action
|
245
241
|
raw_action = self.model.step(observation, dialogue_history=self.history)
|
246
242
|
# add message to history
|
@@ -256,7 +252,6 @@ class Evaluator:
|
|
256
252
|
"number_of_steps": self.history.num_steps,
|
257
253
|
"model_trace": self.history.trace(),
|
258
254
|
"example_id": example.id,
|
259
|
-
"impossible": example.impossible,
|
260
255
|
}
|
261
256
|
|
262
257
|
def eval_all_examples(self, progress_bar=False) -> list:
|
plancraft/models/dummy.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import random
|
2
2
|
|
3
|
-
from plancraft.config import EvalConfig
|
4
3
|
from plancraft.environment.actions import (
|
5
4
|
MoveAction,
|
6
5
|
)
|
@@ -12,7 +11,7 @@ class DummyModel(PlancraftBaseModel):
|
|
12
11
|
Dummy model returns actions that do random action
|
13
12
|
"""
|
14
13
|
|
15
|
-
def __init__(self, cfg
|
14
|
+
def __init__(self, cfg=None):
|
16
15
|
pass
|
17
16
|
|
18
17
|
def reset(self):
|
plancraft/utils.py
CHANGED
@@ -7,11 +7,7 @@ from typing import Optional
|
|
7
7
|
import torch
|
8
8
|
from loguru import logger
|
9
9
|
|
10
|
-
from plancraft.environment.actions import
|
11
|
-
ActionHandlerBase,
|
12
|
-
MoveAction,
|
13
|
-
SmeltAction,
|
14
|
-
)
|
10
|
+
from plancraft.environment.actions import ActionHandlerBase
|
15
11
|
from plancraft.environment.prompts import (
|
16
12
|
get_prompt_example,
|
17
13
|
get_system_prompt,
|
@@ -47,7 +43,6 @@ class History:
|
|
47
43
|
self.use_text_inventory = use_text_inventory
|
48
44
|
self.resolution = resolution # low, medium, high
|
49
45
|
|
50
|
-
self.action_history = []
|
51
46
|
self.inventory_history = []
|
52
47
|
self.inventory_counters = []
|
53
48
|
|
@@ -108,10 +103,6 @@ class History:
|
|
108
103
|
else:
|
109
104
|
self.dialogue_history.append({"role": role, "content": content})
|
110
105
|
|
111
|
-
def add_action_to_history(self, action: SmeltAction | MoveAction):
|
112
|
-
if isinstance(action, SmeltAction) or isinstance(action, MoveAction):
|
113
|
-
self.action_history.append(action.model_dump())
|
114
|
-
|
115
106
|
def add_inventory_to_history(self, inventory: dict):
|
116
107
|
self.inventory_history.append(inventory)
|
117
108
|
# count inventory
|
@@ -148,7 +139,6 @@ class History:
|
|
148
139
|
self.images = copy(self.prompt_images)
|
149
140
|
self.initial_dialogue_length = len(self.dialogue_history)
|
150
141
|
|
151
|
-
self.action_history = []
|
152
142
|
self.inventory_history = []
|
153
143
|
self.inventory_counters = []
|
154
144
|
|
@@ -159,14 +149,13 @@ class History:
|
|
159
149
|
"dialogue_history": copy(
|
160
150
|
self.dialogue_history[self.initial_dialogue_length :]
|
161
151
|
),
|
162
|
-
"action_history": copy(self.action_history),
|
163
152
|
"inventory_history": copy(self.inventory_history),
|
164
153
|
"tokens_used": copy(self.tokens_used),
|
165
154
|
}
|
166
155
|
|
167
156
|
@property
|
168
157
|
def num_steps(self):
|
169
|
-
return len(self.
|
158
|
+
return (len(self.dialogue_history) - self.initial_dialogue_length) // 2
|
170
159
|
|
171
160
|
def check_stuck(self, max_steps_no_change: int = 10) -> bool:
|
172
161
|
"""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: plancraft
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.6
|
4
4
|
Summary: Plancraft: an evaluation dataset for planning with LLM agents
|
5
5
|
License: MIT License
|
6
6
|
|
@@ -72,6 +72,13 @@ You can install the package by running the following command:
|
|
72
72
|
pip install plancraft
|
73
73
|
```
|
74
74
|
|
75
|
+
Or:
|
76
|
+
|
77
|
+
```bash
|
78
|
+
uv add plancraft
|
79
|
+
```
|
80
|
+
|
81
|
+
|
75
82
|
![gif-example3](docs/images/train_images/TRAIN0010.gif)
|
76
83
|
![gif-example1](docs/images/train_images/TRAIN1133.gif)
|
77
84
|
![gif-example2](docs/images/train_images/TRAIN0383.gif)
|
@@ -117,17 +124,14 @@ The package also provides an `Evaluator` class that can be used to evaluate the
|
|
117
124
|
|
118
125
|
```python
|
119
126
|
from plancraft.evaluator import Evaluator
|
120
|
-
from plancraft.config import EvalConfig
|
121
127
|
|
122
128
|
def main():
|
123
|
-
# Create the config
|
124
|
-
config = EvalConfig(...)
|
125
129
|
# create model -- Note you can create your own model by subclassing PlancraftBaseModel
|
126
|
-
model = get_model(
|
130
|
+
model = get_model("dummy")
|
127
131
|
# Create the evaluator
|
128
|
-
evaluator = Evaluator(
|
132
|
+
evaluator = Evaluator(run_name="dummy", model=model)
|
129
133
|
# Evaluate the agent
|
130
|
-
evaluator.
|
134
|
+
evaluator.eval_all_examples()
|
131
135
|
```
|
132
136
|
|
133
137
|
The evaluator class handles the environment loop and model interaction. The environment is created based on the configuration and the examples are loaded from the dataset. The `Evaluator` uses the dataset examples and initializes the environment with the example's inventory. It is also responsible for early stopping and verifying the target object has been craft. Finally, it also saves the results of the evaluation and the images generated during the evaluation.
|
@@ -159,7 +163,6 @@ while not history.check_stuck() and history.num_steps < max_steps:
|
|
159
163
|
# Handle invalid case (exceeded non-env action limit)
|
160
164
|
observation = environment.step()
|
161
165
|
else:
|
162
|
-
history.add_action_to_history(action) # Add action to history
|
163
166
|
observation = environment.step(action)
|
164
167
|
|
165
168
|
# Convert observation to message and reset non-env counter
|
@@ -170,19 +173,16 @@ while not history.check_stuck() and history.num_steps < max_steps:
|
|
170
173
|
# Check if episode is complete
|
171
174
|
success = check_done(observation["inventory"], example.target)
|
172
175
|
|
173
|
-
# Update history with observation and message
|
174
|
-
history.add_observation_to_history(observation)
|
175
|
-
history.add_message_to_history(content=observation["message"], role="user")
|
176
|
-
|
177
176
|
if success: # Exit loop if success
|
178
177
|
break
|
179
178
|
|
179
|
+
# Update history with observation and message
|
180
|
+
history.add_observation_to_history(observation)
|
181
|
+
history.add_message_to_history(content=observation["message"], role="user")
|
180
182
|
# Model predicts next action
|
181
183
|
raw_action = model.step(observation, dialogue_history=history)
|
182
|
-
|
183
184
|
# Update history with predicted action
|
184
185
|
history.add_message_to_history(content=raw_action, role="assistant")
|
185
|
-
|
186
186
|
# Parse raw action into a structured format
|
187
187
|
action = parse_raw_model_response(raw_action)
|
188
188
|
|
@@ -194,7 +194,6 @@ return {
|
|
194
194
|
"number_of_steps": history.num_steps,
|
195
195
|
"model_trace": history.trace(),
|
196
196
|
"example_id": example.id,
|
197
|
-
"impossible": example.impossible,
|
198
197
|
}
|
199
198
|
```
|
200
199
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
plancraft/config.py,sha256=Ppkps-E8xDNYEP9prOVxW2zEG9MpWVzcLJi4tmGLjuQ,4285
|
3
|
-
plancraft/evaluator.py,sha256=
|
3
|
+
plancraft/evaluator.py,sha256=dTsE3FiQTJc094TmBvfBvefOpGSYcePIGVT36OEIClU,10910
|
4
4
|
plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
|
5
|
-
plancraft/utils.py,sha256=
|
5
|
+
plancraft/utils.py,sha256=phaHzbIS85YZrBPaGG9TStHY8ZBKR1LKfuN1exfVy1U,6889
|
6
6
|
plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
|
7
7
|
plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
|
8
8
|
plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
|
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
|
|
1915
1915
|
plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
|
1916
1916
|
plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
|
1917
1917
|
plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
|
1918
|
-
plancraft/models/dummy.py,sha256=
|
1918
|
+
plancraft/models/dummy.py,sha256=jBxke6VNpyYh_HBcFxCx64djO5F3wr5GbbnC0XePZ20,1015
|
1919
1919
|
plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
|
1920
1920
|
plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
|
1921
1921
|
plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
|
1922
1922
|
plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
|
1923
|
-
plancraft-0.3.
|
1924
|
-
plancraft-0.3.
|
1925
|
-
plancraft-0.3.
|
1926
|
-
plancraft-0.3.
|
1923
|
+
plancraft-0.3.6.dist-info/METADATA,sha256=MVnklft3zz21a9085j-H-_VYAPJREH2-1ojS08ICbzk,11147
|
1924
|
+
plancraft-0.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
1925
|
+
plancraft-0.3.6.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
|
1926
|
+
plancraft-0.3.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|