plancraft 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plancraft/environment/sampler.py +1 -1
- plancraft/evaluator.py +61 -53
- plancraft/models/dummy.py +7 -2
- plancraft/utils.py +0 -3
- {plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/METADATA +1 -1
- {plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/RECORD +8 -8
- {plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/WHEEL +0 -0
- {plancraft-0.3.11.dist-info → plancraft-0.3.13.dist-info}/licenses/LICENSE +0 -0
plancraft/environment/sampler.py
CHANGED
@@ -39,7 +39,7 @@ def assign_to_slots(inventory: dict[str, int]) -> list[dict]:
|
|
39
39
|
for item, total_count in inventory.items():
|
40
40
|
while total_count > 0:
|
41
41
|
if len(available_slots) == 0:
|
42
|
-
logger.
|
42
|
+
logger.warning("Not enough slots available")
|
43
43
|
break
|
44
44
|
slot = available_slots.pop()
|
45
45
|
count_in_slot = min(total_count, MAX_STACK_SIZE[item])
|
plancraft/evaluator.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
3
|
from typing import Optional
|
4
|
+
from copy import deepcopy
|
4
5
|
|
5
6
|
import imageio
|
6
7
|
from loguru import logger
|
@@ -38,7 +39,6 @@ class Evaluator:
|
|
38
39
|
def __init__(
|
39
40
|
self,
|
40
41
|
run_name: str,
|
41
|
-
model: PlancraftBaseModel,
|
42
42
|
actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
|
43
43
|
output_dir: str = "output",
|
44
44
|
split: str = "val.small",
|
@@ -61,6 +61,13 @@ class Evaluator:
|
|
61
61
|
self.use_fasterrcnn = use_fasterrcnn
|
62
62
|
self.max_steps = max_steps
|
63
63
|
self.resume = resume
|
64
|
+
self.resolution = resolution
|
65
|
+
|
66
|
+
# history args
|
67
|
+
self.system_prompt = system_prompt
|
68
|
+
self.prompt_examples = prompt_examples
|
69
|
+
self.prompt_images = prompt_images
|
70
|
+
self.few_shot = few_shot
|
64
71
|
|
65
72
|
self.output_dir = f"{output_dir}/{run_name}/{split}"
|
66
73
|
self.generation_number = 0
|
@@ -69,28 +76,6 @@ class Evaluator:
|
|
69
76
|
# load all examples
|
70
77
|
self.examples: list[PlancraftExample] = self.load_dataset(split)
|
71
78
|
|
72
|
-
# start environment
|
73
|
-
self.environment = PlancraftEnvironment(
|
74
|
-
inventory=[],
|
75
|
-
resolution=resolution,
|
76
|
-
)
|
77
|
-
|
78
|
-
# initialise history/dialogue tracking
|
79
|
-
self.history = History(
|
80
|
-
actions=actions,
|
81
|
-
use_multimodal_content_format=use_multimodal_content_format,
|
82
|
-
use_images=use_images,
|
83
|
-
use_text_inventory=use_text_inventory,
|
84
|
-
resolution=resolution,
|
85
|
-
few_shot=few_shot,
|
86
|
-
system_prompt=system_prompt,
|
87
|
-
prompt_examples=prompt_examples,
|
88
|
-
prompt_images=prompt_images,
|
89
|
-
)
|
90
|
-
|
91
|
-
# load model
|
92
|
-
self.model = model
|
93
|
-
|
94
79
|
def save_results_dict(self, example: PlancraftExample, results_dict: dict):
|
95
80
|
output_dir = f"{self.output_dir}/{self.generation_number}"
|
96
81
|
os.makedirs(output_dir, exist_ok=True)
|
@@ -124,14 +109,6 @@ class Evaluator:
|
|
124
109
|
dataset = json.load(f)
|
125
110
|
return [PlancraftExample(**example) for example in dataset]
|
126
111
|
|
127
|
-
def reset(
|
128
|
-
self,
|
129
|
-
example: PlancraftExample,
|
130
|
-
):
|
131
|
-
self.environment.reset(new_inventory=example.slotted_inventory)
|
132
|
-
self.model.reset()
|
133
|
-
self.history.reset()
|
134
|
-
|
135
112
|
def check_done(self, inventory: dict, target: str):
|
136
113
|
"""
|
137
114
|
Check that target object is obtained
|
@@ -142,14 +119,16 @@ class Evaluator:
|
|
142
119
|
return True
|
143
120
|
return False
|
144
121
|
|
145
|
-
def parse_raw_model_response(
|
122
|
+
def parse_raw_model_response(
|
123
|
+
self, generated_text: str, observation=None, history=None
|
124
|
+
) -> str:
|
146
125
|
"""
|
147
126
|
Given a message and set of action handlers, parse the content to return the action
|
148
127
|
or a message if the action is not valid/requires message response
|
149
128
|
"""
|
150
129
|
for handler in self.actions:
|
151
130
|
match_output = handler.match(
|
152
|
-
generated_text, observation=observation, history=
|
131
|
+
generated_text, observation=observation, history=history
|
153
132
|
)
|
154
133
|
if match_output:
|
155
134
|
return match_output
|
@@ -159,6 +138,7 @@ class Evaluator:
|
|
159
138
|
def convert_observation_to_message(
|
160
139
|
self,
|
161
140
|
observation: dict,
|
141
|
+
model: PlancraftBaseModel = None,
|
162
142
|
) -> str | dict:
|
163
143
|
"""
|
164
144
|
Convert an environment observation to the message format used by an LLM chat model
|
@@ -170,8 +150,9 @@ class Evaluator:
|
|
170
150
|
- use_images: bool - Whether to append an image to the message content - must be used with use_multimodal_content_format.
|
171
151
|
"""
|
172
152
|
if self.use_fasterrcnn:
|
153
|
+
assert model is not None, "Model must be provided to convert image to text"
|
173
154
|
# convert image to inventory using fasterrcnn
|
174
|
-
inventory =
|
155
|
+
inventory = model.bbox_model.get_inventory(observation["image"].copy())
|
175
156
|
text_content = target_and_inventory_to_text_obs(
|
176
157
|
observation["target"], inventory
|
177
158
|
)
|
@@ -190,15 +171,38 @@ class Evaluator:
|
|
190
171
|
content_list.append({"type": "image"})
|
191
172
|
return {"content": content_list}
|
192
173
|
|
193
|
-
def eval_example(
|
174
|
+
def eval_example(
|
175
|
+
self,
|
176
|
+
example: PlancraftExample,
|
177
|
+
model: PlancraftBaseModel,
|
178
|
+
) -> dict:
|
194
179
|
"""Given the loaded model and an example from Plancraft
|
195
180
|
run the episode until success or termination."""
|
181
|
+
|
182
|
+
# start environment
|
183
|
+
environment = PlancraftEnvironment(
|
184
|
+
inventory=example.slotted_inventory,
|
185
|
+
resolution=self.resolution,
|
186
|
+
)
|
187
|
+
|
188
|
+
# initialise history/dialogue tracking
|
189
|
+
history = History(
|
190
|
+
actions=self.actions,
|
191
|
+
use_multimodal_content_format=self.use_multimodal_content_format,
|
192
|
+
use_images=self.use_images,
|
193
|
+
use_text_inventory=self.use_text_inventory,
|
194
|
+
resolution=self.resolution,
|
195
|
+
few_shot=self.few_shot,
|
196
|
+
system_prompt=deepcopy(self.system_prompt),
|
197
|
+
prompt_examples=deepcopy(self.prompt_examples),
|
198
|
+
prompt_images=deepcopy(self.prompt_images),
|
199
|
+
)
|
200
|
+
|
196
201
|
success = False
|
197
|
-
self.reset(example)
|
198
202
|
action = None
|
199
203
|
|
200
204
|
# run episode until stuck or until max steps is reached
|
201
|
-
while
|
205
|
+
while history.num_steps < self.max_steps:
|
202
206
|
# if the action is stop then we end the episode
|
203
207
|
if isinstance(action, StopAction):
|
204
208
|
# if the action is stop and task is impossible then success
|
@@ -207,16 +211,16 @@ class Evaluator:
|
|
207
211
|
break
|
208
212
|
# action is external tool then it is str
|
209
213
|
if isinstance(action, str):
|
210
|
-
observation =
|
214
|
+
observation = environment.step()
|
211
215
|
observation["target"] = example.target
|
212
216
|
observation["message"] = action
|
213
217
|
# action is environment action
|
214
218
|
else:
|
215
|
-
observation =
|
219
|
+
observation = environment.step(action)
|
216
220
|
# convert inventory observation to text message
|
217
221
|
observation["target"] = example.target
|
218
222
|
observation["message"] = self.convert_observation_to_message(
|
219
|
-
observation
|
223
|
+
observation, model=model
|
220
224
|
)
|
221
225
|
# check if the episode is done
|
222
226
|
success = self.check_done(observation["inventory"], example.target)
|
@@ -225,29 +229,30 @@ class Evaluator:
|
|
225
229
|
break
|
226
230
|
|
227
231
|
# add observation to history
|
228
|
-
|
232
|
+
history.add_observation_to_history(observation)
|
229
233
|
# add observation message to history
|
230
|
-
|
231
|
-
content=observation["message"], role="user"
|
232
|
-
)
|
234
|
+
history.add_message_to_history(content=observation["message"], role="user")
|
233
235
|
# predict next action
|
234
|
-
raw_action =
|
236
|
+
raw_action = model.step(observation, dialogue_history=history)
|
235
237
|
# add message to history
|
236
|
-
|
238
|
+
history.add_message_to_history(content=raw_action, role="assistant")
|
237
239
|
# parse the raw action
|
238
|
-
action = self.parse_raw_model_response(
|
240
|
+
action = self.parse_raw_model_response(
|
241
|
+
raw_action, observation=observation, history=history
|
242
|
+
)
|
239
243
|
|
240
244
|
# save results and reset
|
241
245
|
return {
|
242
246
|
"success": success,
|
243
247
|
"recipe_type": example.recipe_type,
|
244
248
|
"complexity": example.complexity_split,
|
245
|
-
"number_of_steps":
|
246
|
-
"model_trace":
|
249
|
+
"number_of_steps": history.num_steps,
|
250
|
+
"model_trace": history.trace(),
|
247
251
|
"example_id": example.id,
|
252
|
+
"images": history.images,
|
248
253
|
}
|
249
254
|
|
250
|
-
def eval_all_examples(self, progress_bar=False) -> list:
|
255
|
+
def eval_all_examples(self, model, progress_bar=False) -> list:
|
251
256
|
results = []
|
252
257
|
pbar = tqdm(
|
253
258
|
total=len(self.examples),
|
@@ -256,7 +261,6 @@ class Evaluator:
|
|
256
261
|
correct = 0
|
257
262
|
count = 0
|
258
263
|
for example in self.examples:
|
259
|
-
logger.debug(f"Running example {example.id}")
|
260
264
|
if resume_result := self.load_results_dict(example):
|
261
265
|
pbar.update(self.max_steps)
|
262
266
|
results.append(resume_result)
|
@@ -268,10 +272,14 @@ class Evaluator:
|
|
268
272
|
]:
|
269
273
|
continue
|
270
274
|
|
271
|
-
result = self.eval_example(example)
|
275
|
+
result = self.eval_example(example, model=model)
|
276
|
+
model.reset()
|
277
|
+
|
278
|
+
# save images and results
|
279
|
+
self.save_images(example, result["images"])
|
280
|
+
del result["images"]
|
272
281
|
results.append(result)
|
273
282
|
self.save_results_dict(example, result)
|
274
|
-
self.save_images(example, self.history.images)
|
275
283
|
|
276
284
|
correct += int(result["success"])
|
277
285
|
count += 1
|
plancraft/models/dummy.py
CHANGED
@@ -18,14 +18,19 @@ class DummyModel(PlancraftBaseModel):
|
|
18
18
|
pass
|
19
19
|
|
20
20
|
def random_select(self, observation):
|
21
|
-
# randomly pick an item from the inventory
|
21
|
+
# randomly pick an item that has quantity 1 from the inventory
|
22
22
|
item_indices = set()
|
23
23
|
for slot, item in observation["inventory"].items():
|
24
|
-
if item["quantity"]
|
24
|
+
if item["quantity"] == 1:
|
25
25
|
item_indices.add(slot)
|
26
26
|
all_slots_to = set(range(1, 46))
|
27
27
|
empty_slots = all_slots_to - item_indices
|
28
28
|
|
29
|
+
# if not item with quantity == 1, randomly pick any item
|
30
|
+
if len(item_indices) == 0:
|
31
|
+
item_indices = set(observation["inventory"].keys())
|
32
|
+
|
33
|
+
# move the item to a random empty slot
|
29
34
|
random_slot_from = random.choice(list(item_indices))
|
30
35
|
random_slot_to = random.choice(list(empty_slots))
|
31
36
|
|
plancraft/utils.py
CHANGED
@@ -81,9 +81,6 @@ class History:
|
|
81
81
|
self.prompt_images = load_prompt_images(resolution=self.resolution)
|
82
82
|
|
83
83
|
def add_message_to_history(self, content: str | dict, role="user"):
|
84
|
-
if role == "assistant":
|
85
|
-
logger.info(content)
|
86
|
-
|
87
84
|
if isinstance(content, dict):
|
88
85
|
assert "content" in content, "content key not found in message"
|
89
86
|
content["role"] = role
|
@@ -1,8 +1,8 @@
|
|
1
1
|
plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
|
3
|
-
plancraft/evaluator.py,sha256=
|
3
|
+
plancraft/evaluator.py,sha256=q7khX8FrMeb5QOgYZba-24jC7ZXp83VU7sa1H1kKS08,11061
|
4
4
|
plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
|
5
|
-
plancraft/utils.py,sha256=
|
5
|
+
plancraft/utils.py,sha256=67UUDMSv8TqX_I0fL5-yG_vkHvTZlnhSLkktWAg5p34,5712
|
6
6
|
plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
|
7
7
|
plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
|
8
8
|
plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
|
@@ -17,7 +17,7 @@ plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82
|
|
17
17
|
plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
|
18
18
|
plancraft/environment/prompts.py,sha256=8QXclX0ygpL02uZichE1AVkbdn_0HGteD5bzo0FZGOU,6947
|
19
19
|
plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
|
20
|
-
plancraft/environment/sampler.py,sha256=
|
20
|
+
plancraft/environment/sampler.py,sha256=lTSiGfmrew0G7ewOWtz6dtt58Mj0rAg6PW8BIbBegXA,7646
|
21
21
|
plancraft/environment/search.py,sha256=Dmdvj04kMvPlwvoWSc2261LTXV8RbMpS4FODV1YoZKs,1847
|
22
22
|
plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
|
23
23
|
plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
|
@@ -1915,12 +1915,12 @@ plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,
|
|
1915
1915
|
plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
|
1916
1916
|
plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
|
1917
1917
|
plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
|
1918
|
-
plancraft/models/dummy.py,sha256=
|
1918
|
+
plancraft/models/dummy.py,sha256=856oEX6NquXSIIfQLTEFFeB8ib7VUUs5cB0TVHAiFvI,1248
|
1919
1919
|
plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
|
1920
1920
|
plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
|
1921
1921
|
plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
|
1922
1922
|
plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
|
1923
|
-
plancraft-0.3.
|
1924
|
-
plancraft-0.3.
|
1925
|
-
plancraft-0.3.
|
1926
|
-
plancraft-0.3.
|
1923
|
+
plancraft-0.3.13.dist-info/METADATA,sha256=7dISD2bnB8aAMG7uvQZDJZq4aKBu7gGIRaLLeTiQMvk,11148
|
1924
|
+
plancraft-0.3.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
1925
|
+
plancraft-0.3.13.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
|
1926
|
+
plancraft-0.3.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|