plancraft 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,7 +70,7 @@ class ActionHandlerBase(abc.ABC):
70
70
  raise NotImplementedError()
71
71
 
72
72
  @abc.abstractmethod
73
- def match(self, generated_text: str):
73
+ def match(self, generated_text: str, **kwargs) -> Optional[BaseModel | str]:
74
74
  """
75
75
  Match the generated text to the action/tool
76
76
  """
@@ -204,7 +204,7 @@ class MoveActionHandler(ActionHandlerBase):
204
204
  def action_name(self) -> str:
205
205
  return "move"
206
206
 
207
- def match(self, generated_text: str) -> Optional[MoveAction | str]:
207
+ def match(self, generated_text: str, **kwargs) -> Optional[MoveAction | str]:
208
208
  """
209
209
  Parse the raw model response to a MoveAction
210
210
  """
@@ -238,7 +238,7 @@ class SmeltActionHandler(ActionHandlerBase):
238
238
  def action_name(self) -> str:
239
239
  return "smelt"
240
240
 
241
- def match(self, generated_text: str) -> Optional[SmeltAction | str]:
241
+ def match(self, generated_text: str, **kwargs) -> Optional[SmeltAction | str]:
242
242
  """
243
243
  Parse the raw model response to a SmeltAction
244
244
  """
@@ -272,7 +272,7 @@ class ImpossibleActionHandler(ActionHandlerBase):
272
272
  def action_name(self) -> str:
273
273
  return "impossible"
274
274
 
275
- def match(self, generated_text) -> Optional[StopAction]:
275
+ def match(self, generated_text, **kwargs) -> Optional[StopAction]:
276
276
  """
277
277
  Parse the raw model response to a StopAction
278
278
  """
@@ -296,7 +296,7 @@ class ThinkActionHandler(ActionHandlerBase):
296
296
  def action_name(self) -> str:
297
297
  return "think"
298
298
 
299
- def match(self, generated_text) -> Optional[str]:
299
+ def match(self, generated_text, **kwargs) -> Optional[str]:
300
300
  """
301
301
  Parse the raw model response to a ThinkAction
302
302
  """
@@ -42,7 +42,7 @@ class GoldSearchActionHandler(ActionHandlerBase):
42
42
  def action_name(self) -> str:
43
43
  return "search"
44
44
 
45
- def match(self, generated_text) -> Optional[str]:
45
+ def match(self, generated_text, **kwargs) -> Optional[str]:
46
46
  """
47
47
  Parse the raw model response to a SearchAction
48
48
  """
plancraft/evaluator.py CHANGED
@@ -142,13 +142,15 @@ class Evaluator:
142
142
  return True
143
143
  return False
144
144
 
145
- def parse_raw_model_response(self, generated_text: str):
145
+ def parse_raw_model_response(self, generated_text: str, observation=None) -> str:
146
146
  """
147
147
  Given a message and set of action handlers, parse the content to return the action
148
148
  or a message if the action is not valid/requires message response
149
149
  """
150
150
  for handler in self.actions:
151
- match_output = handler.match(generated_text)
151
+ match_output = handler.match(
152
+ generated_text, observation=observation, history=self.history
153
+ )
152
154
  if match_output:
153
155
  return match_output
154
156
  action_names = [handler.action_name for handler in self.actions]
@@ -242,7 +244,7 @@ class Evaluator:
242
244
  # add message to history
243
245
  self.history.add_message_to_history(content=raw_action, role="assistant")
244
246
  # parse the raw action
245
- action = self.parse_raw_model_response(raw_action)
247
+ action = self.parse_raw_model_response(raw_action, observation=observation)
246
248
 
247
249
  # save results and reset
248
250
  return {
plancraft/utils.py CHANGED
@@ -43,7 +43,6 @@ class History:
43
43
  self.use_text_inventory = use_text_inventory
44
44
  self.resolution = resolution # low, medium, high
45
45
 
46
- self.action_history = []
47
46
  self.inventory_history = []
48
47
  self.inventory_counters = []
49
48
 
@@ -156,7 +155,7 @@ class History:
156
155
 
157
156
  @property
158
157
  def num_steps(self):
159
- return len(self.action_history)
158
+ return (len(self.dialogue_history) - self.initial_dialogue_length) // 2
160
159
 
161
160
  def check_stuck(self, max_steps_no_change: int = 10) -> bool:
162
161
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plancraft
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: Plancraft: an evaluation dataset for planning with LLM agents
5
5
  License: MIT License
6
6
 
@@ -1,8 +1,8 @@
1
1
  plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  plancraft/config.py,sha256=Ppkps-E8xDNYEP9prOVxW2zEG9MpWVzcLJi4tmGLjuQ,4285
3
- plancraft/evaluator.py,sha256=dTsE3FiQTJc094TmBvfBvefOpGSYcePIGVT36OEIClU,10910
3
+ plancraft/evaluator.py,sha256=zWk3k1EiqGOIJkSWGL5Qk27xqwLowUMwRRVc6dm0gGo,11037
4
4
  plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
5
- plancraft/utils.py,sha256=8bO8wrblmIW1aXEJre7ALGbL6GvuFrY38aZDdA_8W-g,6882
5
+ plancraft/utils.py,sha256=phaHzbIS85YZrBPaGG9TStHY8ZBKR1LKfuN1exfVy1U,6889
6
6
  plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
7
7
  plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
8
8
  plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -11,14 +11,14 @@ plancraft/data/val.json,sha256=IToAiaqUNQi_xhX1bzmInuskLaT7C2ryQjP-CZkzL24,13044
11
11
  plancraft/data/val.small.easy.json,sha256=9zEmqepjXG2NIp88xnFqOCkwsUsku3HEwHoQGxgTr6U,190252
12
12
  plancraft/data/val.small.json,sha256=76E9EFaljDQyAokg97e-IblvcOe6KbrdKkXvRxhhkgo,237653
13
13
  plancraft/environment/__init__.py,sha256=XFsFny4lH195AwAmL-WeCaF9ZCMgc7IgXIwhQ8FTdgE,505
14
- plancraft/environment/actions.py,sha256=D9QqBW7yWsbWCjxNyWp61Xtb0c6EtyXk3PZ1I8SRoBQ,9381
14
+ plancraft/environment/actions.py,sha256=AQxFaK4YW53mPwhuPhHrDF9wENSVjPHSWk0v77I1thw,9460
15
15
  plancraft/environment/env.py,sha256=F5xo1eAJ9MeuoE2IpG_LtbaE0BGd66URPB_rehAWIiU,16372
16
16
  plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
17
17
  plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
18
18
  plancraft/environment/prompts.py,sha256=8QXclX0ygpL02uZichE1AVkbdn_0HGteD5bzo0FZGOU,6947
19
19
  plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
20
20
  plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
21
- plancraft/environment/search.py,sha256=uFHpLvW40rMKOxDabcyWrpOrhKLDZqAJOF_jew4_WXk,1837
21
+ plancraft/environment/search.py,sha256=Dmdvj04kMvPlwvoWSc2261LTXV8RbMpS4FODV1YoZKs,1847
22
22
  plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
23
23
  plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
24
24
  plancraft/environment/assets/table.png,sha256=IKIViZKAPyR4FWnS0JP9AZ19vIEO3qoS5-YRGAO1ow8,5430
@@ -1920,7 +1920,7 @@ plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5w
1920
1920
  plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
1921
1921
  plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
1922
1922
  plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
1923
- plancraft-0.3.5.dist-info/METADATA,sha256=QxQSXPXF162We8KwESaZ-nn94gqfz_5PQaXNDWkvV1Y,11147
1924
- plancraft-0.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
- plancraft-0.3.5.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
- plancraft-0.3.5.dist-info/RECORD,,
1923
+ plancraft-0.3.7.dist-info/METADATA,sha256=fZOUxkStOAD_MOeokSk-MNcsFIf5m2DmrJnALuNrA9Q,11147
1924
+ plancraft-0.3.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
+ plancraft-0.3.7.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
+ plancraft-0.3.7.dist-info/RECORD,,