plancraft 0.3.30__py3-none-any.whl → 0.3.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plancraft/evaluator.py +89 -162
- plancraft/models/act.py +0 -3
- plancraft/models/base.py +0 -7
- plancraft/models/dummy.py +0 -6
- plancraft/models/oracle.py +0 -6
- {plancraft-0.3.30.dist-info → plancraft-0.3.32.dist-info}/METADATA +1 -1
- {plancraft-0.3.30.dist-info → plancraft-0.3.32.dist-info}/RECORD +9 -9
- {plancraft-0.3.30.dist-info → plancraft-0.3.32.dist-info}/WHEEL +0 -0
- {plancraft-0.3.30.dist-info → plancraft-0.3.32.dist-info}/licenses/LICENSE +0 -0
plancraft/evaluator.py
CHANGED
@@ -170,96 +170,70 @@ class Evaluator:
|
|
170
170
|
content_list.append({"type": "image"})
|
171
171
|
return {"content": content_list}
|
172
172
|
|
173
|
-
def
|
174
|
-
|
175
|
-
example: PlancraftExample,
|
176
|
-
model: PlancraftBaseModel,
|
177
|
-
) -> dict:
|
178
|
-
"""
|
179
|
-
Given the loaded model and an example from Plancraft
|
180
|
-
run the episode until success or termination.
|
181
|
-
"""
|
182
|
-
|
183
|
-
# start environment
|
173
|
+
def _init_environment(self, example: PlancraftExample) -> tuple:
|
174
|
+
"""Initialize environment and history for an example"""
|
184
175
|
environment = PlancraftEnvironment(
|
185
176
|
inventory=deepcopy(example.slotted_inventory),
|
186
177
|
resolution=self.resolution,
|
187
178
|
)
|
188
|
-
|
189
|
-
# initialise history/dialogue tracking
|
190
179
|
history = self.create_history()
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
180
|
+
obs = environment.step()
|
181
|
+
obs["target"] = example.target
|
182
|
+
obs["message"] = self.convert_observation_to_message(obs)
|
183
|
+
return environment, history, obs
|
184
|
+
|
185
|
+
def _process_model_output(
|
186
|
+
self, raw_action, observation: dict, history: HistoryBase
|
187
|
+
) -> tuple:
|
188
|
+
"""Process model output and update history"""
|
189
|
+
if isinstance(raw_action, PlancraftModelOutput):
|
190
|
+
history.add_message_to_history(
|
191
|
+
content=raw_action.action,
|
192
|
+
role="assistant",
|
193
|
+
**(raw_action.kwargs or {}),
|
194
|
+
)
|
195
|
+
raw_action = raw_action.action
|
196
|
+
else:
|
197
|
+
history.add_message_to_history(content=raw_action, role="assistant")
|
198
|
+
|
199
|
+
action = self.parse_raw_model_response(
|
200
|
+
raw_action,
|
201
|
+
observation=observation,
|
202
|
+
history=history,
|
196
203
|
)
|
204
|
+
return action
|
197
205
|
|
206
|
+
def _execute_action(
|
207
|
+
self, action, example: PlancraftExample, environment, model=None
|
208
|
+
) -> tuple[dict, bool]:
|
209
|
+
"""Execute action and return next observation and success status"""
|
198
210
|
success = False
|
199
|
-
# run episode until stuck or until max steps is reached
|
200
|
-
while history.num_steps < self.max_steps:
|
201
|
-
# add observation to history
|
202
|
-
history.add_observation_to_history(observation)
|
203
|
-
history.add_message_to_history(content=observation["message"], role="user")
|
204
|
-
# predict next action
|
205
|
-
raw_action = model.step(observation, dialogue_history=history)
|
206
|
-
|
207
|
-
# if the model returns a PlancraftModelOutput, extract the action
|
208
|
-
if isinstance(raw_action, PlancraftModelOutput):
|
209
|
-
history.add_message_to_history(
|
210
|
-
content=raw_action.action,
|
211
|
-
role="assistant",
|
212
|
-
**(raw_action.kwargs or {}),
|
213
|
-
)
|
214
|
-
raw_action = raw_action.action
|
215
|
-
elif isinstance(raw_action, str):
|
216
|
-
history.add_message_to_history(content=raw_action, role="assistant")
|
217
|
-
else:
|
218
|
-
raise ValueError(
|
219
|
-
f"model.step() output must be a string or PlancraftModelOutput, got {type(raw_action)}"
|
220
|
-
)
|
221
211
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
observation
|
238
|
-
# action is environment action
|
239
|
-
else:
|
240
|
-
observation = environment.step(action)
|
241
|
-
observation["target"] = example.target
|
242
|
-
observation["message"] = self.convert_observation_to_message(
|
243
|
-
observation, model=model
|
244
|
-
)
|
245
|
-
# check if the episode is done
|
246
|
-
success = self.check_done(observation["inventory"], example.target)
|
247
|
-
|
248
|
-
# update model with success or failure
|
249
|
-
# observation is the next state after the action (s1)
|
250
|
-
# history is the dialogue history
|
251
|
-
# -- the last message contains the action taken (a0)
|
252
|
-
# -- the second to last message is the observation (s0)
|
253
|
-
# success is whether the episode is sucessful (r)
|
254
|
-
model.update(
|
255
|
-
observation=observation, history=history, success=success, action=action
|
212
|
+
# stop action
|
213
|
+
if isinstance(action, StopAction):
|
214
|
+
observation = None
|
215
|
+
# success is True if example was truly impossible
|
216
|
+
success = example.impossible
|
217
|
+
# if action is a string, it is a message response
|
218
|
+
elif isinstance(action, str):
|
219
|
+
observation = environment.step()
|
220
|
+
observation["target"] = example.target
|
221
|
+
observation["message"] = action
|
222
|
+
# execute action and check if target is obtained
|
223
|
+
else:
|
224
|
+
observation = environment.step(action)
|
225
|
+
observation["target"] = example.target
|
226
|
+
observation["message"] = self.convert_observation_to_message(
|
227
|
+
observation, model=model
|
256
228
|
)
|
229
|
+
success = self.check_done(observation["inventory"], example.target)
|
257
230
|
|
258
|
-
|
259
|
-
if success or isinstance(action, StopAction):
|
260
|
-
break
|
231
|
+
return observation, success
|
261
232
|
|
262
|
-
|
233
|
+
def _create_result(
|
234
|
+
self, example: PlancraftExample, success: bool, history: HistoryBase
|
235
|
+
) -> dict:
|
236
|
+
"""Create result dictionary for an example"""
|
263
237
|
return {
|
264
238
|
"success": success,
|
265
239
|
"recipe_type": example.recipe_type,
|
@@ -270,20 +244,36 @@ class Evaluator:
|
|
270
244
|
"images": history.images,
|
271
245
|
}
|
272
246
|
|
247
|
+
def eval_example(
|
248
|
+
self,
|
249
|
+
example: PlancraftExample,
|
250
|
+
model: PlancraftBaseModel,
|
251
|
+
) -> dict:
|
252
|
+
environment, history, observation = self._init_environment(example)
|
253
|
+
success = False
|
254
|
+
|
255
|
+
while history.num_steps < self.max_steps:
|
256
|
+
history.add_observation_to_history(observation)
|
257
|
+
history.add_message_to_history(content=observation["message"], role="user")
|
258
|
+
|
259
|
+
raw_action = model.step(observation, dialogue_history=history)
|
260
|
+
action = self._process_model_output(raw_action, observation, history)
|
261
|
+
|
262
|
+
observation, success = self._execute_action(
|
263
|
+
action, example, environment, model
|
264
|
+
)
|
265
|
+
if success or isinstance(action, StopAction):
|
266
|
+
break
|
267
|
+
|
268
|
+
return self._create_result(example, success, history)
|
269
|
+
|
273
270
|
def batch_eval_examples(
|
274
271
|
self,
|
275
272
|
examples: list[PlancraftExample],
|
276
273
|
model,
|
277
274
|
batch_size: int = 4,
|
275
|
+
callback_fn: Optional[callable] = None,
|
278
276
|
) -> list:
|
279
|
-
"""
|
280
|
-
Processes examples in batches with dynamic replacement from a queue.
|
281
|
-
|
282
|
-
Args:
|
283
|
-
examples: List of examples to process
|
284
|
-
model: Model to use for evaluation
|
285
|
-
batch_size: Maximum number of concurrent environments
|
286
|
-
"""
|
287
277
|
pending_examples = deque(examples)
|
288
278
|
active_examples = []
|
289
279
|
active_environments = []
|
@@ -294,21 +284,13 @@ class Evaluator:
|
|
294
284
|
# Initialize first batch
|
295
285
|
while len(active_examples) < batch_size and pending_examples:
|
296
286
|
example = pending_examples.popleft()
|
297
|
-
env =
|
298
|
-
inventory=deepcopy(example.slotted_inventory),
|
299
|
-
resolution=self.resolution,
|
300
|
-
)
|
301
|
-
history = self.create_history()
|
302
|
-
obs = env.step()
|
303
|
-
obs["target"] = example.target
|
304
|
-
obs["message"] = self.convert_observation_to_message(obs, model=model)
|
287
|
+
env, history, obs = self._init_environment(example)
|
305
288
|
|
306
289
|
active_examples.append(example)
|
307
290
|
active_environments.append(env)
|
308
291
|
active_histories.append(history)
|
309
292
|
active_observations.append(obs)
|
310
293
|
|
311
|
-
# Process until all examples are done
|
312
294
|
while active_examples:
|
313
295
|
# Add observations to histories
|
314
296
|
for i in range(len(active_examples)):
|
@@ -317,12 +299,10 @@ class Evaluator:
|
|
317
299
|
content=active_observations[i]["message"], role="user"
|
318
300
|
)
|
319
301
|
|
320
|
-
# Get model predictions for current batch
|
321
302
|
raw_actions = model.batch_step(
|
322
303
|
active_observations, dialogue_histories=active_histories
|
323
304
|
)
|
324
305
|
|
325
|
-
# Process each active environment
|
326
306
|
completed_indices = []
|
327
307
|
successes = []
|
328
308
|
actions = []
|
@@ -330,71 +310,28 @@ class Evaluator:
|
|
330
310
|
for i, (example, raw_action) in enumerate(
|
331
311
|
zip(active_examples, raw_actions)
|
332
312
|
):
|
333
|
-
|
334
|
-
|
335
|
-
active_histories[i].add_message_to_history(
|
336
|
-
content=raw_action.action,
|
337
|
-
role="assistant",
|
338
|
-
**(raw_action.kwargs or {}),
|
339
|
-
)
|
340
|
-
raw_action = raw_action.action
|
341
|
-
else:
|
342
|
-
active_histories[i].add_message_to_history(
|
343
|
-
content=raw_action, role="assistant"
|
344
|
-
)
|
345
|
-
|
346
|
-
# Parse and execute action
|
347
|
-
action = self.parse_raw_model_response(
|
348
|
-
raw_action,
|
349
|
-
observation=active_observations[i],
|
350
|
-
history=active_histories[i],
|
313
|
+
action = self._process_model_output(
|
314
|
+
raw_action, active_observations[i], active_histories[i]
|
351
315
|
)
|
352
316
|
actions.append(action)
|
353
|
-
success = False
|
354
|
-
|
355
|
-
if isinstance(action, StopAction):
|
356
|
-
success = example.impossible
|
357
|
-
active_observations[i] = None
|
358
|
-
elif isinstance(action, str):
|
359
|
-
obs = active_environments[i].step()
|
360
|
-
obs["target"] = example.target
|
361
|
-
obs["message"] = action
|
362
|
-
active_observations[i] = obs
|
363
|
-
else:
|
364
|
-
obs = active_environments[i].step(action)
|
365
|
-
obs["target"] = example.target
|
366
|
-
obs["message"] = self.convert_observation_to_message(
|
367
|
-
obs, model=model
|
368
|
-
)
|
369
|
-
active_observations[i] = obs
|
370
|
-
success = self.check_done(obs["inventory"], example.target)
|
371
317
|
|
318
|
+
obs, success = self._execute_action(
|
319
|
+
action, example, active_environments[i], model
|
320
|
+
)
|
321
|
+
active_observations[i] = obs
|
372
322
|
successes.append(success)
|
373
323
|
|
374
|
-
# Check if environment is done
|
375
324
|
if (
|
376
325
|
success
|
377
326
|
or isinstance(action, StopAction)
|
378
327
|
or active_histories[i].num_steps >= self.max_steps
|
379
328
|
):
|
380
|
-
results[example.id] =
|
381
|
-
|
382
|
-
|
383
|
-
"complexity": example.complexity_split,
|
384
|
-
"number_of_steps": active_histories[i].num_steps,
|
385
|
-
"model_trace": active_histories[i].trace(),
|
386
|
-
"example_id": example.id,
|
387
|
-
"images": active_histories[i].images,
|
388
|
-
}
|
329
|
+
results[example.id] = self._create_result(
|
330
|
+
example, success, active_histories[i]
|
331
|
+
)
|
389
332
|
completed_indices.append(i)
|
390
|
-
|
391
|
-
|
392
|
-
model.batch_update(
|
393
|
-
observations=active_observations,
|
394
|
-
histories=active_histories,
|
395
|
-
successes=successes,
|
396
|
-
actions=actions,
|
397
|
-
)
|
333
|
+
if callback_fn:
|
334
|
+
callback_fn(example=example, results=results[example.id])
|
398
335
|
|
399
336
|
# Remove completed environments and replace with new ones
|
400
337
|
for i in reversed(completed_indices):
|
@@ -403,19 +340,9 @@ class Evaluator:
|
|
403
340
|
active_histories.pop(i)
|
404
341
|
active_observations.pop(i)
|
405
342
|
|
406
|
-
# Add new environment if there are pending examples
|
407
343
|
if pending_examples:
|
408
344
|
example = pending_examples.popleft()
|
409
|
-
env =
|
410
|
-
inventory=deepcopy(example.slotted_inventory),
|
411
|
-
resolution=self.resolution,
|
412
|
-
)
|
413
|
-
history = self.create_history()
|
414
|
-
obs = env.step()
|
415
|
-
obs["target"] = example.target
|
416
|
-
obs["message"] = self.convert_observation_to_message(
|
417
|
-
obs, model=model
|
418
|
-
)
|
345
|
+
env, history, obs = self._init_environment(example)
|
419
346
|
|
420
347
|
active_examples.append(example)
|
421
348
|
active_environments.append(env)
|
plancraft/models/act.py
CHANGED
plancraft/models/base.py
CHANGED
@@ -33,10 +33,3 @@ class PlancraftBaseModel(abc.ABC):
|
|
33
33
|
Reset the model state - ready for a new episode
|
34
34
|
"""
|
35
35
|
raise NotImplementedError()
|
36
|
-
|
37
|
-
@abc.abstractmethod
|
38
|
-
def update(self, **kwargs) -> None:
|
39
|
-
"""
|
40
|
-
Update the model state based on the dialogue history
|
41
|
-
"""
|
42
|
-
raise NotImplementedError()
|
plancraft/models/dummy.py
CHANGED
@@ -45,9 +45,3 @@ class DummyModel(PlancraftBaseModel):
|
|
45
45
|
self, observations: list[dict], **kwargs
|
46
46
|
) -> list[PlancraftModelOutput]:
|
47
47
|
return [self.step(observation) for observation in observations]
|
48
|
-
|
49
|
-
def update(self, **kwargs):
|
50
|
-
pass
|
51
|
-
|
52
|
-
def batch_update(self, **kwargs):
|
53
|
-
pass
|
plancraft/models/oracle.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
|
3
|
-
plancraft/evaluator.py,sha256=
|
3
|
+
plancraft/evaluator.py,sha256=mxzvbGpEDkiKW8u79QgYz5Q4wnZvkQSXiAvi0OVu4Qs,14754
|
4
4
|
plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
|
5
5
|
plancraft/utils.py,sha256=VhnxMihh6pRhNjQTK5HDc0FYWmF9_EcQyRP_a7fbIZA,7156
|
6
6
|
plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
|
@@ -1912,15 +1912,15 @@ plancraft/environment/tags/wooden_stairs.json,sha256=GCr2_5UGPMYZECqQ_5NYSvbwuwt
|
|
1912
1912
|
plancraft/environment/tags/wooden_trapdoors.json,sha256=DbjfwoHJL8VuYWV61A1uDqW7LJsGlOP4eoxcGIQVYr4,303
|
1913
1913
|
plancraft/environment/tags/wool.json,sha256=Z59l4mdPztVZBFaglJ4mV9H2OnyCVzhqQRi2dduak78,496
|
1914
1914
|
plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,510
|
1915
|
-
plancraft/models/act.py,sha256=
|
1916
|
-
plancraft/models/base.py,sha256=
|
1915
|
+
plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
|
1916
|
+
plancraft/models/base.py,sha256=S8EdkqWpn8nE1WcrqDoA4Hx4p52qEttGxnqjIPWvl3Q,852
|
1917
1917
|
plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
|
1918
|
-
plancraft/models/dummy.py,sha256=
|
1918
|
+
plancraft/models/dummy.py,sha256=_NUTviv5ye6KGzODRt0Zykk8shsek0QBqWCeZW3ldSQ,1495
|
1919
1919
|
plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
|
1920
|
-
plancraft/models/oracle.py,sha256=
|
1920
|
+
plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
|
1921
1921
|
plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
|
1922
1922
|
plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
|
1923
|
-
plancraft-0.3.
|
1924
|
-
plancraft-0.3.
|
1925
|
-
plancraft-0.3.
|
1926
|
-
plancraft-0.3.
|
1923
|
+
plancraft-0.3.32.dist-info/METADATA,sha256=vRc_HMJhCvX4LnEPLHIbgKaJCbQP4Gq0qb4xITGFkYQ,11148
|
1924
|
+
plancraft-0.3.32.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
1925
|
+
plancraft-0.3.32.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
|
1926
|
+
plancraft-0.3.32.dist-info/RECORD,,
|
File without changes
|
File without changes
|