plancraft 0.3.30__py3-none-any.whl → 0.3.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plancraft/evaluator.py CHANGED
@@ -245,16 +245,6 @@ class Evaluator:
245
245
  # check if the episode is done
246
246
  success = self.check_done(observation["inventory"], example.target)
247
247
 
248
- # update model with success or failure
249
- # observation is the next state after the action (s1)
250
- # history is the dialogue history
251
- # -- the last message contains the action taken (a0)
252
- # -- the second to last message is the observation (s0)
253
- # success is whether the episode is sucessful (r)
254
- model.update(
255
- observation=observation, history=history, success=success, action=action
256
- )
257
-
258
248
  # exit if success
259
249
  if success or isinstance(action, StopAction):
260
250
  break
@@ -275,6 +265,7 @@ class Evaluator:
275
265
  examples: list[PlancraftExample],
276
266
  model,
277
267
  batch_size: int = 4,
268
+ callback_fn: Optional[callable] = None,
278
269
  ) -> list:
279
270
  """
280
271
  Processes examples in batches with dynamic replacement from a queue.
@@ -283,6 +274,7 @@ class Evaluator:
283
274
  examples: List of examples to process
284
275
  model: Model to use for evaluation
285
276
  batch_size: Maximum number of concurrent environments
277
+ callback_fn: Optional callback function to call after each result
286
278
  """
287
279
  pending_examples = deque(examples)
288
280
  active_examples = []
@@ -387,14 +379,8 @@ class Evaluator:
387
379
  "images": active_histories[i].images,
388
380
  }
389
381
  completed_indices.append(i)
390
-
391
- # Update model
392
- model.batch_update(
393
- observations=active_observations,
394
- histories=active_histories,
395
- successes=successes,
396
- actions=actions,
397
- )
382
+ if callback_fn:
383
+ callback_fn(results[example.id])
398
384
 
399
385
  # Remove completed environments and replace with new ones
400
386
  for i in reversed(completed_indices):
plancraft/models/act.py CHANGED
@@ -72,6 +72,3 @@ class ActModel(PlancraftBaseModel):
72
72
  dialogue_history.tokens_used += action_token_used
73
73
  # return raw action message
74
74
  return action_messages[0].split("\n")[0].strip()
75
-
76
- def update(self, **kwargs):
77
- pass
plancraft/models/base.py CHANGED
@@ -33,10 +33,3 @@ class PlancraftBaseModel(abc.ABC):
33
33
  Reset the model state - ready for a new episode
34
34
  """
35
35
  raise NotImplementedError()
36
-
37
- @abc.abstractmethod
38
- def update(self, **kwargs) -> None:
39
- """
40
- Update the model state based on the dialogue history
41
- """
42
- raise NotImplementedError()
plancraft/models/dummy.py CHANGED
@@ -45,9 +45,3 @@ class DummyModel(PlancraftBaseModel):
45
45
  self, observations: list[dict], **kwargs
46
46
  ) -> list[PlancraftModelOutput]:
47
47
  return [self.step(observation) for observation in observations]
48
-
49
- def update(self, **kwargs):
50
- pass
51
-
52
- def batch_update(self, **kwargs):
53
- pass
@@ -47,9 +47,3 @@ class OracleModel(PlancraftBaseModel):
47
47
  action = self.step(observation)
48
48
  actions.append(action)
49
49
  return actions
50
-
51
- def update(self, **kwargs):
52
- pass
53
-
54
- def batch_update(self, **kwargs):
55
- pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plancraft
3
- Version: 0.3.30
3
+ Version: 0.3.31
4
4
  Summary: Plancraft: an evaluation dataset for planning with LLM agents
5
5
  License: MIT License
6
6
 
@@ -1,6 +1,6 @@
1
1
  plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
3
- plancraft/evaluator.py,sha256=VFJnfitixU2Y4RTxp0lDALoCSFMMwMJPgSQC0Y0tmH8,18121
3
+ plancraft/evaluator.py,sha256=VteLAT_rPogw8NYZos7jEuuakyfE_3CsFuv6A39Geyw,17614
4
4
  plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
5
5
  plancraft/utils.py,sha256=VhnxMihh6pRhNjQTK5HDc0FYWmF9_EcQyRP_a7fbIZA,7156
6
6
  plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
@@ -1912,15 +1912,15 @@ plancraft/environment/tags/wooden_stairs.json,sha256=GCr2_5UGPMYZECqQ_5NYSvbwuwt
1912
1912
  plancraft/environment/tags/wooden_trapdoors.json,sha256=DbjfwoHJL8VuYWV61A1uDqW7LJsGlOP4eoxcGIQVYr4,303
1913
1913
  plancraft/environment/tags/wool.json,sha256=Z59l4mdPztVZBFaglJ4mV9H2OnyCVzhqQRi2dduak78,496
1914
1914
  plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,510
1915
- plancraft/models/act.py,sha256=_OZo9a_6R0wajdR7axZarjI3IJP7glFrWeDIrbcHDmw,2737
1916
- plancraft/models/base.py,sha256=Krm6MdOjU-qlps1WSX7pxdnqXLiyI3qsI9Na7Xk8r1c,1038
1915
+ plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
1916
+ plancraft/models/base.py,sha256=S8EdkqWpn8nE1WcrqDoA4Hx4p52qEttGxnqjIPWvl3Q,852
1917
1917
  plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
1918
- plancraft/models/dummy.py,sha256=UWbW3bjrQr_0UYYrNf_D0jWpUq6e50vAp21F0zi8iFM,1593
1918
+ plancraft/models/dummy.py,sha256=_NUTviv5ye6KGzODRt0Zykk8shsek0QBqWCeZW3ldSQ,1495
1919
1919
  plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
1920
- plancraft/models/oracle.py,sha256=jmt_kBBNXt0VWUX7q6OHkJoRZWItCMy4qGH5qbLSc1c,1755
1920
+ plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
1921
1921
  plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
1922
1922
  plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
1923
- plancraft-0.3.30.dist-info/METADATA,sha256=tltUHYqXhfDXfsQGU5NLhEp6TjR41g6X0OWFn5dpttg,11148
1924
- plancraft-0.3.30.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
- plancraft-0.3.30.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
- plancraft-0.3.30.dist-info/RECORD,,
1923
+ plancraft-0.3.31.dist-info/METADATA,sha256=gU6j3SQEGdXIeW1pab_Pz6hspDhl_g0vaPIkIXRScYo,11148
1924
+ plancraft-0.3.31.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
+ plancraft-0.3.31.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
+ plancraft-0.3.31.dist-info/RECORD,,