plancraft 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plancraft/config.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import Literal, Optional, Union
2
2
 
3
- from pydantic import BaseModel, model_validator
3
+ from pydantic import BaseModel
4
4
  from pydantic_settings import BaseSettings, SettingsConfigDict
5
5
 
6
6
  from plancraft.environment.recipes import RECIPES
@@ -40,15 +40,6 @@ class PlancraftConfig(BaseModel):
40
40
  False # whether to use multimodal content format
41
41
  )
42
42
 
43
- @model_validator(mode="after")
44
- def validate(self):
45
- assert set(
46
- self.valid_actions
47
- ).issubset(
48
- {"move", "smelt", "think", "search", "impossible"}
49
- ), "valid_actions should be subset of {'move', 'smelt', 'think', 'search', 'impossible'}"
50
- return self
51
-
52
43
 
53
44
  class WandbConfig(BaseModel):
54
45
  project: str
@@ -0,0 +1,21 @@
1
+ from .actions import (
2
+ ImpossibleActionHandler,
3
+ MoveActionHandler,
4
+ SmeltActionHandler,
5
+ ThinkActionHandler,
6
+ convert_from_slot_index,
7
+ convert_to_slot_index,
8
+ )
9
+ from .env import PlancraftEnvironment
10
+ from .search import GoldSearchActionHandler
11
+
12
+ __all__ = [
13
+ "ImpossibleActionHandler",
14
+ "MoveActionHandler",
15
+ "SmeltActionHandler",
16
+ "ThinkActionHandler",
17
+ "PlancraftEnvironment",
18
+ "GoldSearchActionHandler",
19
+ "convert_from_slot_index",
20
+ "convert_to_slot_index",
21
+ ]
@@ -1,3 +1,7 @@
1
+ import abc
2
+ import re
3
+ from typing import Optional
4
+
1
5
  from pydantic import BaseModel, field_validator, model_validator
2
6
 
3
7
 
@@ -40,6 +44,39 @@ def convert_from_slot_index(slot_index: int) -> str:
40
44
  return f"[I{slot_index-9}]"
41
45
 
42
46
 
47
+ class ActionHandlerBase(abc.ABC):
48
+ @property
49
+ @abc.abstractmethod
50
+ def prompt_description(self) -> str:
51
+ """
52
+ Return the prompt description for the model
53
+ """
54
+ raise NotImplementedError()
55
+
56
+ @property
57
+ @abc.abstractmethod
58
+ def prompt_format_example(self) -> str:
59
+ """
60
+ Return the prompt format example for the model
61
+ """
62
+ raise NotImplementedError()
63
+
64
+ @property
65
+ @abc.abstractmethod
66
+ def action_name(self) -> str:
67
+ """
68
+ Return the action name for the model
69
+ """
70
+ raise NotImplementedError()
71
+
72
+ @abc.abstractmethod
73
+ def match(self, generated_text: str):
74
+ """
75
+ Match the generated text to the action/tool
76
+ """
77
+ raise NotImplementedError()
78
+
79
+
43
80
  class MoveAction(BaseModel):
44
81
  """ "Moves an item from one slot to another"""
45
82
 
@@ -152,3 +189,118 @@ class StopAction(BaseModel):
152
189
 
153
190
  # when symbolic action is true, can either move objects around or smelt
154
191
  SymbolicAction = MoveAction | SmeltAction
192
+
193
+
194
+ class MoveActionHandler(ActionHandlerBase):
195
+ @property
196
+ def prompt_description(self) -> str:
197
+ return "Transfer a specific quantity of an item from one slot to another"
198
+
199
+ @property
200
+ def prompt_format_example(self) -> str:
201
+ return "`move: from [Source] to [Target] with quantity N`"
202
+
203
+ @property
204
+ def action_name(self) -> str:
205
+ return "move"
206
+
207
+ def match(self, generated_text: str) -> Optional[MoveAction | str]:
208
+ """
209
+ Parse the raw model response to a MoveAction
210
+ """
211
+ action_match = re.search(f"({self.action_name}):", generated_text)
212
+ if not action_match:
213
+ return
214
+ try:
215
+ slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
216
+ slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
217
+ quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
218
+ action = MoveAction(
219
+ slot_from=slot_from,
220
+ slot_to=slot_to,
221
+ quantity=quantity,
222
+ )
223
+ return action
224
+ except AttributeError as e:
225
+ return f"Format Error: {e}"
226
+
227
+
228
+ class SmeltActionHandler(ActionHandlerBase):
229
+ @property
230
+ def prompt_description(self) -> str:
231
+ return "Smelt an item in a furnace and moves the output to a specific slot"
232
+
233
+ @property
234
+ def prompt_format_example(self) -> str:
235
+ return "`smelt: from [Source] to [Target] with quantity N`"
236
+
237
+ @property
238
+ def action_name(self) -> str:
239
+ return "smelt"
240
+
241
+ def match(self, generated_text: str) -> Optional[SmeltAction | str]:
242
+ """
243
+ Parse the raw model response to a SmeltAction
244
+ """
245
+ action_match = re.search(f"({self.action_name}):", generated_text)
246
+ if not action_match:
247
+ return
248
+ try:
249
+ slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
250
+ slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
251
+ quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
252
+ action = SmeltAction(
253
+ slot_from=slot_from,
254
+ slot_to=slot_to,
255
+ quantity=quantity,
256
+ )
257
+ return action
258
+ except AttributeError as e:
259
+ return f"Format Error: {e}"
260
+
261
+
262
+ class ImpossibleActionHandler(ActionHandlerBase):
263
+ @property
264
+ def prompt_description(self) -> str:
265
+ return "Stop task if it is certain that it is impossible with given inventory"
266
+
267
+ @property
268
+ def prompt_format_example(self) -> str:
269
+ return "`impossible: <reason>`"
270
+
271
+ @property
272
+ def action_name(self) -> str:
273
+ return "impossible"
274
+
275
+ def match(self, generated_text) -> Optional[StopAction]:
276
+ """
277
+ Parse the raw model response to a StopAction
278
+ """
279
+ action_match = re.search(f"({self.action_name}):", generated_text)
280
+ if not action_match:
281
+ return
282
+ reason = re.search(r"impossible: (.*)", generated_text).group(1)
283
+ return StopAction(reason=reason)
284
+
285
+
286
+ class ThinkActionHandler(ActionHandlerBase):
287
+ @property
288
+ def prompt_description(self) -> str:
289
+ return "Generate thoughts to help you decide on the next action"
290
+
291
+ @property
292
+ def prompt_format_example(self) -> str:
293
+ return "`think: <thought message>`"
294
+
295
+ @property
296
+ def action_name(self) -> str:
297
+ return "think"
298
+
299
+ def match(self, generated_text) -> Optional[str]:
300
+ """
301
+ Parse the raw model response to a ThinkAction
302
+ """
303
+ action_match = re.search(f"({self.action_name}):", generated_text)
304
+ if not action_match:
305
+ return
306
+ return "Ok"
@@ -1,6 +1,5 @@
1
1
  import glob
2
2
  import os
3
- from collections import defaultdict
4
3
  from typing import Literal, Optional
5
4
 
6
5
  import numpy as np
@@ -323,6 +322,9 @@ class PlancraftEnvironment:
323
322
  # not enough
324
323
  if self.slot_empty(slot_from) or self.state[slot_from]["quantity"] < quantity:
325
324
  return
325
+ # if craft slot - must take all
326
+ if slot_from == 0 and self.state[slot_from]["quantity"] != quantity:
327
+ return
326
328
 
327
329
  item = self.state[slot_from]
328
330
 
@@ -2,31 +2,11 @@ import numpy as np
2
2
 
3
3
  from plancraft.environment.env import PlancraftEnvironment
4
4
  from plancraft.environment.search import gold_search_recipe
5
-
6
- VALID_ACTIONS = ["move", "smelt", "think", "search", "impossible"]
7
-
8
- ACTIONS_DESCRIPTIONS = {
9
- "move": {
10
- "description": "Transfer a specific quantity of an item from one slot to another",
11
- "format": "`move: from [Source] to [Target] with quantity N`",
12
- },
13
- "smelt": {
14
- "description": "Smelt an item in a furnace and moves the output to a specific slot",
15
- "format": "`smelt: from [Source] to [Target] with quantity N`",
16
- },
17
- "think": {
18
- "description": "Generate thoughts to help you decide on the next action",
19
- "format": "`think: <thought message>`",
20
- },
21
- "search": {
22
- "description": "Search for a recipe to craft a specific item",
23
- "format": "`search: <recipe name>`",
24
- },
25
- "impossible": {
26
- "description": "Stop task if it is certain that it is impossible with given inventory",
27
- "format": "`impossible: <reason>`",
28
- },
29
- }
5
+ from plancraft.environment.actions import (
6
+ ActionHandlerBase,
7
+ MoveActionHandler,
8
+ SmeltActionHandler,
9
+ )
30
10
 
31
11
  BASE_SYSTEM_PROMPT = """You are crafting in Minecraft. You need to decide on the next action.
32
12
 
@@ -48,23 +28,6 @@ Constraints:
48
28
  - If an item is not in slot [0] then the recipe is incorrect
49
29
  - You need to move items from [0] to a free inventory slot to complete the crafting process"""
50
30
 
51
-
52
- def get_system_prompt(actions: list[str]):
53
- assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
54
- assert "move" in actions, "move should be one of the actions"
55
- assert "smelt" in actions, "smelt should be one of the actions"
56
-
57
- descriptions = ""
58
- for action in actions:
59
- descriptions += f"\n\t- {action}: {ACTIONS_DESCRIPTIONS[action]['description']}"
60
-
61
- output_format = ""
62
- for action in actions:
63
- output_format += f"\n\t- {ACTIONS_DESCRIPTIONS[action]['format']}"
64
-
65
- return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
66
-
67
-
68
31
  CRAFTING_STEPS = [
69
32
  "Craft an item of type: andesite\ninventory:\n - diorite [I18] quantity 1\n - cobblestone [I30] quantity 1",
70
33
  "Craft an item of type: andesite\ninventory:\n - diorite [B1] quantity 1\n - cobblestone [I30] quantity 1",
@@ -94,8 +57,26 @@ SEARCH_STEPS = [
94
57
  ]
95
58
 
96
59
 
60
+ def get_system_prompt(
61
+ handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
62
+ ):
63
+ action_names = [handler.action_name for handler in handlers]
64
+ assert "move" in action_names, "MoveActionHandler should be one of the handlers"
65
+ assert "smelt" in action_names, "SmeltActionHandler should be one of the handlers"
66
+
67
+ descriptions = ""
68
+ for handler in handlers:
69
+ descriptions += f"\n\t- {handler.action_name}: {handler.prompt_description}"
70
+
71
+ output_format = ""
72
+ for handler in handlers:
73
+ output_format += f"\n\t- {handler.prompt_format_example}"
74
+
75
+ return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
76
+
77
+
97
78
  def get_prompt_example(
98
- actions: list[str],
79
+ handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
99
80
  use_text_inventory=True,
100
81
  use_multimodal_content_format=False,
101
82
  use_images=False,
@@ -103,10 +84,9 @@ def get_prompt_example(
103
84
  """
104
85
  Generates a few-shot prompt for the crafting task
105
86
  """
106
-
107
- assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
108
- assert "move" in actions, "move should be one of the actions"
109
- assert "smelt" in actions, "smelt should be one of the actions"
87
+ handler_names = [handler.action_name for handler in handlers]
88
+ assert "move" in handler_names, "move should be one of the actions"
89
+ assert "smelt" in handler_names, "smelt should be one of the actions"
110
90
 
111
91
  if use_images:
112
92
  assert (
@@ -120,12 +100,12 @@ def get_prompt_example(
120
100
  text = text.split("\ninventory:\n")[0]
121
101
 
122
102
  example_dialogue.append({"role": "user", "content": text})
123
- if "search" in actions and SEARCH_STEPS[i]:
103
+ if "search" in handler_names and SEARCH_STEPS[i]:
124
104
  example_dialogue.append({"role": "assistant", "content": SEARCH_STEPS[i]})
125
105
  search_target = text.split("seach: ")[-1].strip()
126
106
  search_response = gold_search_recipe(search_target)
127
107
  example_dialogue.append({"role": "user", "content": search_response})
128
- if "think" in actions:
108
+ if "think" in handler_names:
129
109
  example_dialogue.append({"role": "assistant", "content": THINK_STEPS[i]})
130
110
  example_dialogue.append({"role": "user", "content": "Ok"})
131
111
  example_dialogue.append({"role": "assistant", "content": BASE_ACTION_STEPS[i]})
@@ -1,4 +1,7 @@
1
- from plancraft.environment.actions import convert_from_slot_index
1
+ import re
2
+ from typing import Optional
3
+
4
+ from plancraft.environment.actions import convert_from_slot_index, ActionHandlerBase
2
5
  from plancraft.environment.recipes import RECIPES
3
6
 
4
7
 
@@ -24,3 +27,27 @@ def gold_search_recipe(recipe_name: str) -> str:
24
27
  recipe_instructions = f"smelt {r.ingredient}\n"
25
28
  out_string += f"recipe {i+1}:\n{recipe_instructions}"
26
29
  return out_string
30
+
31
+
32
+ class GoldSearchActionHandler(ActionHandlerBase):
33
+ @property
34
+ def prompt_description(self) -> str:
35
+ return "Search for recipes to craft a specific item"
36
+
37
+ @property
38
+ def prompt_format_example(self) -> str:
39
+ return "`search: <recipe name>`"
40
+
41
+ @property
42
+ def action_name(self) -> str:
43
+ return "search"
44
+
45
+ def match(self, generated_text) -> Optional[str]:
46
+ """
47
+ Parse the raw model response to a SearchAction
48
+ """
49
+ action_match = re.search(f"({self.action_name}):", generated_text)
50
+ if not action_match:
51
+ return
52
+ search_target = re.search(r"search: (\w+)", generated_text).group(1)
53
+ return gold_search_recipe(search_target)
plancraft/evaluator.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  import os
3
3
  import random
4
- import re
5
4
  import string
6
5
  import time
7
6
 
@@ -12,15 +11,19 @@ from tqdm import tqdm
12
11
 
13
12
  import wandb
14
13
  from plancraft.config import EvalConfig, PlancraftExample
15
- from plancraft.environment.actions import MoveAction, SmeltAction, StopAction
14
+ from plancraft.environment.actions import (
15
+ StopAction,
16
+ ActionHandlerBase,
17
+ MoveActionHandler,
18
+ SmeltActionHandler,
19
+ )
16
20
  from plancraft.environment.env import (
17
21
  PlancraftEnvironment,
18
22
  get_objective_str,
19
23
  target_and_inventory_to_text_obs,
20
24
  )
21
- from plancraft.environment.search import gold_search_recipe
22
- from plancraft.models import get_model
23
25
  from plancraft.utils import History
26
+ from plancraft.models.base import PlancraftBaseModel
24
27
 
25
28
 
26
29
  class Evaluator:
@@ -35,12 +38,18 @@ class Evaluator:
35
38
  Finally, it also saves the results of the evaluation and the images generated during the evaluation.
36
39
  """
37
40
 
38
- def __init__(self, cfg: EvalConfig):
41
+ def __init__(
42
+ self,
43
+ cfg: EvalConfig,
44
+ run_name: str,
45
+ model: PlancraftBaseModel,
46
+ actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
47
+ ):
39
48
  self.cfg = cfg
40
- self.output_dir = (
41
- f"{cfg.plancraft.output_dir}/{self.evaluator_name()}/{cfg.plancraft.split}"
42
- )
49
+ self.run_name = run_name
50
+ self.output_dir = f"{cfg.plancraft.output_dir}/{run_name}/{cfg.plancraft.split}"
43
51
  self.generation_number = 0
52
+ self.actions = actions
44
53
 
45
54
  # load all examples
46
55
  self.examples: list[PlancraftExample] = self.load_dataset(cfg.plancraft.split)
@@ -53,7 +62,7 @@ class Evaluator:
53
62
 
54
63
  # initialise history/dialogue tracking
55
64
  self.history = History(
56
- valid_actions=cfg.plancraft.valid_actions,
65
+ actions=actions,
57
66
  use_multimodal_content_format=cfg.plancraft.use_multimodal_content_format,
58
67
  use_images=cfg.plancraft.use_images,
59
68
  use_text_inventory=cfg.plancraft.use_text_inventory,
@@ -61,45 +70,7 @@ class Evaluator:
61
70
  )
62
71
 
63
72
  # load model
64
- self.model = get_model(cfg)
65
-
66
- def evaluator_name(self) -> str:
67
- if self.cfg.plancraft.use_text_inventory and self.cfg.plancraft.use_images:
68
- name_str = "both"
69
- elif self.cfg.plancraft.use_images:
70
- name_str = "images"
71
- elif self.cfg.plancraft.use_text_inventory:
72
- name_str = "text"
73
- else:
74
- raise ValueError(
75
- "At least one of use_text_inventory or use_images should be True"
76
- )
77
-
78
- if self.cfg.plancraft.use_fasterrcnn:
79
- name_str += "_fasterrcnn"
80
-
81
- model_name = self.cfg.plancraft.model.split("/")[-1]
82
- if self.cfg.plancraft.adapter != "":
83
- model_name = self.cfg.plancraft.adapter.split("/")[-1]
84
-
85
- mode = self.cfg.plancraft.mode
86
- if mode in ["dummy", "oracle"]:
87
- return f"{mode}_{name_str}"
88
-
89
- valid_actions_to_str = {
90
- "move": "m",
91
- "smelt": "s",
92
- "think": "t",
93
- "search": "se",
94
- "impossible": "i",
95
- }
96
- actions = "|".join(
97
- [
98
- valid_actions_to_str[action]
99
- for action in self.cfg.plancraft.valid_actions
100
- ]
101
- )
102
- return f"{self.cfg.plancraft.mode}_{name_str}_{model_name}_{actions}"
73
+ self.model = model
103
74
 
104
75
  def save_results_dict(self, example: PlancraftExample, results_dict: dict):
105
76
  output_dir = f"{self.output_dir}/{self.generation_number}"
@@ -152,48 +123,17 @@ class Evaluator:
152
123
  return True
153
124
  return False
154
125
 
155
- def parse_raw_model_response(
156
- self, content: str
157
- ) -> str | MoveAction | SmeltAction | StopAction:
126
+ def parse_raw_model_response(self, generated_text: str):
158
127
  """
159
- Given a message and set of valid actions, parse the content to return the action
128
+ Given a message and set of action handlers, parse the content to return the action
160
129
  or a message if the action is not valid/requires message response
161
130
  """
162
-
163
- action_match = re.search(
164
- f"({'|'.join(self.cfg.plancraft.valid_actions)}):", content
165
- )
166
- if action_match:
167
- action = action_match.group(1)
168
- if action == "think":
169
- return "Ok"
170
- elif action == "impossible":
171
- reason = re.search(r"impossible: (.*)", content).group(1)
172
- return StopAction(reason=reason)
173
- elif action == "search":
174
- search_target = re.search(r"search: (\w+)", content).group(1)
175
- return gold_search_recipe(search_target)
176
- else:
177
- try:
178
- slot_from = re.search(r" from (\[[ABCI]?\d+\])", content).group(1)
179
- slot_to = re.search(r" to (\[[ABCI]?\d+\])", content).group(1)
180
- quantity = re.search(r"with quantity (\d+)", content).group(1)
181
- if action == "move":
182
- action = MoveAction(
183
- slot_from=slot_from,
184
- slot_to=slot_to,
185
- quantity=quantity,
186
- )
187
- else:
188
- action = SmeltAction(
189
- slot_from=slot_from,
190
- slot_to=slot_to,
191
- quantity=quantity,
192
- )
193
- return action
194
- except AttributeError as e:
195
- return f"Format Error: {e}"
196
- return f"Only select actions from the following: {', '.join(self.cfg.plancraft.valid_actions)}"
131
+ for handler in self.actions:
132
+ match_output = handler.match(generated_text)
133
+ if match_output:
134
+ return match_output
135
+ action_names = [handler.action_name for handler in self.actions]
136
+ return f"Only select actions from the following: {', '.join(action_names)}"
197
137
 
198
138
  def convert_observation_to_message(
199
139
  self,
@@ -230,11 +170,8 @@ class Evaluator:
230
170
  return {"content": content_list}
231
171
 
232
172
  def eval_example(self, example: PlancraftExample) -> dict:
233
- """
234
- Given the loaded model and an example from Plancraft
235
- run the episode until success or termination.
236
- Termination can happen from: early stopping (stuck) / max_steps / stop_action
237
- """
173
+ """Given the loaded model and an example from Plancraft
174
+ run the episode until success or termination."""
238
175
  success = False
239
176
  num_non_env_actions = 0
240
177
  self.reset(example)
@@ -346,7 +283,7 @@ class Evaluator:
346
283
  f"Running evaluation over {len(self.examples)} examples {self.cfg.plancraft.num_generations} times."
347
284
  )
348
285
  run_name = (
349
- f"{self.evaluator_name()} {self.cfg.plancraft.split}".replace(" ", "_")
286
+ f"{self.run_name} {self.cfg.plancraft.split}".replace(" ", "_")
350
287
  .replace(".", "_")
351
288
  .strip()
352
289
  )
@@ -38,8 +38,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
38
38
  if slot == from_slot:
39
39
  continue
40
40
  item_type = item["type"]
41
- # if item["quantity"] == 0:
42
- # item_type = "air"
43
41
  if item_type not in type_to_slot:
44
42
  type_to_slot[item_type] = [slot]
45
43
  else:
@@ -57,12 +55,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
57
55
  <= MAX_STACK_SIZE[from_item_type]
58
56
  ):
59
57
  return slot
60
- # if there is a free slot with air
61
- # if "air" in type_to_slot:
62
- # for slot in type_to_slot["air"]:
63
- # if slot > 10:
64
- # return slot
65
-
66
58
  if len(empty_slots) > 0:
67
59
  return empty_slots.pop()
68
60
 
@@ -80,8 +72,6 @@ def get_inventory_counter(inventory: dict) -> Counter:
80
72
  for slot, item in inventory.items():
81
73
  if slot == 0:
82
74
  continue
83
- # if item["type"] == "air":
84
- # continue
85
75
  counter[item["type"]] += item["quantity"]
86
76
  return counter
87
77
 
@@ -170,7 +160,6 @@ class OracleModel(PlancraftBaseModel):
170
160
 
171
161
  if isinstance(plan_recipe, ShapelessRecipe):
172
162
  crafting_slot = 1
173
-
174
163
  # add each item to crafting slots
175
164
  for item, quantity in items_to_use_counter.items():
176
165
  n = 0
plancraft/utils.py CHANGED
@@ -7,6 +7,7 @@ import torch
7
7
  from loguru import logger
8
8
 
9
9
  from plancraft.environment.actions import (
10
+ ActionHandlerBase,
10
11
  MoveAction,
11
12
  SmeltAction,
12
13
  )
@@ -21,21 +22,21 @@ class History:
21
22
  """
22
23
  History class to keep track of dialogue, actions, inventory and images
23
24
  Args:
24
- valid_actions: list of valid actions
25
+ valid_actions: list of valid actions names
25
26
  initial_dialogue: list of dialogue messages
26
27
  use_multimodal_content_format: whether to use multimodal content format (list of content with types)
27
28
  """
28
29
 
29
30
  def __init__(
30
31
  self,
31
- valid_actions: list[str] = ["move", "smelt"],
32
+ actions: list[ActionHandlerBase] = [],
32
33
  use_multimodal_content_format=False,
33
34
  few_shot=False,
34
35
  use_images=False,
35
36
  use_text_inventory=False,
36
37
  resolution="high",
37
38
  ):
38
- self.valid_actions = valid_actions
39
+ self.action_handlers = actions
39
40
  self.use_multimodal_content_format = use_multimodal_content_format
40
41
  self.few_shot = few_shot
41
42
  self.use_images = use_images
@@ -58,7 +59,7 @@ class History:
58
59
 
59
60
  def system_prompt(self):
60
61
  # kept separate from dialogue history because certain models deal with system prompt differently
61
- system_prompt_text = get_system_prompt(self.valid_actions)
62
+ system_prompt_text = get_system_prompt(handlers=self.action_handlers)
62
63
  if self.use_multimodal_content_format:
63
64
  return {
64
65
  "role": "system",
@@ -75,7 +76,7 @@ class History:
75
76
 
76
77
  if self.few_shot:
77
78
  self.prompt_examples = get_prompt_example(
78
- self.valid_actions,
79
+ self.action_handlers,
79
80
  use_text_inventory=self.use_text_inventory,
80
81
  use_multimodal_content_format=self.use_multimodal_content_format,
81
82
  use_images=self.use_images,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plancraft
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Plancraft: an evaluation dataset for planning with LLM agents
5
5
  License: MIT License
6
6
 
@@ -25,7 +25,6 @@ License: MIT License
25
25
  SOFTWARE.
26
26
  License-File: LICENSE
27
27
  Requires-Python: >=3.9
28
- Requires-Dist: accelerate>=1.1.1
29
28
  Requires-Dist: hydra-core>=1.3.2
30
29
  Requires-Dist: imageio>=2.36.0
31
30
  Requires-Dist: loguru
@@ -65,7 +64,7 @@ Description-Content-Type: text/markdown
65
64
 
66
65
  [Paper](https://arxiv.org/abs/2412.21033) | [Website](https://gautierdag.github.io/plancraft/)
67
66
 
68
- Plancraft is a minecraft environment and agent that innovates on planning LLM agents with a retriever
67
+ Plancraft is a minecraft environment and agent that innovates on planning LLM agents with an oracle RAG retriever.
69
68
 
70
69
  You can install the package by running the following command:
71
70
 
@@ -123,8 +122,10 @@ from plancraft.config import EvalConfig
123
122
  def main():
124
123
  # Create the config
125
124
  config = EvalConfig(...)
125
+ # create model -- Note you can create your own model by subclassing PlancraftBaseModel
126
+ model = get_model(config)
126
127
  # Create the evaluator
127
- evaluator = Evaluator(config)
128
+ evaluator = Evaluator(config, model=model)
128
129
  # Evaluate the agent
129
130
  evaluator.eval_all_seeds()
130
131
  ```
@@ -207,7 +208,7 @@ The observation returned by the `Evaluator` class is a dictionary with the follo
207
208
 
208
209
  To implement a model, you need to subclass the `PlancraftBaseModel` class and implement the `step` and `reset` method. See the `plancraft.models.dummy` module for an example of how to implement a basic model.
209
210
 
210
- You will also need to modify the `get_model` function in the `plancraft.models` module to return an instance of your model when the correct config is passed.
211
+ You should then be able to use the `Evaluator` class to evaluate it.
211
212
 
212
213
  ## Reproducing the Results tables in the paper
213
214
 
@@ -225,10 +226,13 @@ The image is available on [Docker Hub](https://hub.docker.com/r/gautierdag/planc
225
226
 
226
227
  ## To Do
227
228
 
229
+ Non-exhaustive list of things to do from highest to lowest priority:
230
+
231
+ - [ ] Add minecraft wiki scrape and non-oracle search for pages
232
+ - [ ] Improve planner to bring closer to optimal (the oracle planner does not consider future crafting steps when moving items -- see paper for more details)
228
233
  - [ ] Rerun image models with better bounding box model
229
234
  - [ ] Track bounding box accuracy
230
- - [ ] Improve planner to bring closer to optimal (the oracle planner does not consider future crafting steps when moving items -- see paper for more details)
231
- - [ ] Add minecraft wiki scrape and non-oracle search for pages
235
+ - [ ] Implement a version of the image environment entirely on cuda/pytorch rather than cpu
232
236
 
233
237
  ## PRs Welcomed
234
238
 
@@ -1,8 +1,8 @@
1
1
  plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- plancraft/config.py,sha256=hsEv_YFk4St0xb6uiT2zuWqgUw4-fZAC1jB_U6DM7HU,4544
3
- plancraft/evaluator.py,sha256=iPONcYvozqGpJ-Wr2EvtbwMamyiZDhNf3HJCvXBtsBk,16391
2
+ plancraft/config.py,sha256=HNHFDewz_0IF1EiPoS8B_ND5JfQvWjE4-0MbX-xvsRQ,4215
3
+ plancraft/evaluator.py,sha256=0J1Mk-n5Y_7L-WhuH6UpoMWhMnGtdFAGW-aqZDhuhLk,13844
4
4
  plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
5
- plancraft/utils.py,sha256=Fs9ZVaSjKxhSqn9UNNZx-dioB5Oyb7hO5c8QMkmW020,7011
5
+ plancraft/utils.py,sha256=7VWKVlDhoMacRypRRSKM1K3hwwJ0nHR3zyx9jZH1C1g,7042
6
6
  plancraft/data/test.json,sha256=7ozxAb-PzoaOMQbMMh52RvN0pQBor6aAUwMrtc2C-y0,1670677
7
7
  plancraft/data/test.small.easy.json,sha256=IsrnRUACUWUdq2_BKGw_H2GptstqmFw66y0Grwmrwj8,238854
8
8
  plancraft/data/test.small.json,sha256=RnPJJf_wLhdUQydrQo0H4KJvcD5PkSEVy5Bbi--Il2U,342843
@@ -10,15 +10,15 @@ plancraft/data/train.json,sha256=pdArGse10i6Dg5Oa56EJPH_fOmotVzv2q5LPJpmS_bQ,342
10
10
  plancraft/data/val.json,sha256=bfVFVQ_dmDSTCLojRkv1XIlct5zkwSg4AzsMp0gUUGI,1654481
11
11
  plancraft/data/val.small.easy.json,sha256=vgBotEu-mH8441jUyCN_6DZIRX1O5SpZatdmK-I7yNA,240202
12
12
  plancraft/data/val.small.json,sha256=WO7xerSWVOPcnLH1_MBiWwdHmqWP0DDGMhuF2RycBRo,300799
13
- plancraft/environment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- plancraft/environment/actions.py,sha256=r2BYSSoGt4GnNLzbWSVR99lfZ5oSkc0ENNBRqSwEzJ0,4829
15
- plancraft/environment/env.py,sha256=SY4ur1uLUApLmcGtgqDDcuiotFh5Cu8zLofWB6WLAh4,16190
13
+ plancraft/environment/__init__.py,sha256=XFsFny4lH195AwAmL-WeCaF9ZCMgc7IgXIwhQ8FTdgE,505
14
+ plancraft/environment/actions.py,sha256=D9QqBW7yWsbWCjxNyWp61Xtb0c6EtyXk3PZ1I8SRoBQ,9381
15
+ plancraft/environment/env.py,sha256=_VQewLUv8YpKLaNp9uye25lq4HFHd9ddTQr1Lqv4eOs,16290
16
16
  plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
17
17
  plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
18
- plancraft/environment/prompts.py,sha256=qD7ezj-ASpl5XHGEsZ4UZkt5ubRRFgtXeCAFlRTjCok,7288
18
+ plancraft/environment/prompts.py,sha256=OKxiv02NIhRk5FZJUEDRLkVWVMc-aXKJi7i7X61uUmk,6633
19
19
  plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
20
20
  plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
21
- plancraft/environment/search.py,sha256=PUBrkgy9dgiZY8v1HyxVIxXW7n01xIQbdXXCfNHrYU4,1055
21
+ plancraft/environment/search.py,sha256=uFHpLvW40rMKOxDabcyWrpOrhKLDZqAJOF_jew4_WXk,1837
22
22
  plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
23
23
  plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
24
24
  plancraft/environment/assets/table.png,sha256=IKIViZKAPyR4FWnS0JP9AZ19vIEO3qoS5-YRGAO1ow8,5430
@@ -1917,10 +1917,10 @@ plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
1917
1917
  plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
1918
1918
  plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
1919
1919
  plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
1920
- plancraft/models/oracle.py,sha256=WkzupIoetppGzPst5kD0IRe_9VsQSRJzPB6N-_ULa-k,8750
1920
+ plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
1921
1921
  plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
1922
1922
  plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
1923
- plancraft-0.3.0.dist-info/METADATA,sha256=yPPTvrICB1iLuI3NquneaK6cPUuGH1w4Z8hxv5SGQnM,11119
1924
- plancraft-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
- plancraft-0.3.0.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
- plancraft-0.3.0.dist-info/RECORD,,
1923
+ plancraft-0.3.1.dist-info/METADATA,sha256=KKsWXHGTbWBXplk1E5F0b_AJvAAu7K91k5sR3eLtKM4,11306
1924
+ plancraft-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
+ plancraft-0.3.1.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
+ plancraft-0.3.1.dist-info/RECORD,,