plancraft 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
plancraft/config.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import Literal, Optional, Union
2
2
 
3
- from pydantic import BaseModel, model_validator
3
+ from pydantic import BaseModel
4
4
  from pydantic_settings import BaseSettings, SettingsConfigDict
5
5
 
6
6
  from plancraft.environment.recipes import RECIPES
@@ -40,15 +40,6 @@ class PlancraftConfig(BaseModel):
40
40
  False # whether to use multimodal content format
41
41
  )
42
42
 
43
- @model_validator(mode="after")
44
- def validate(self):
45
- assert set(
46
- self.valid_actions
47
- ).issubset(
48
- {"move", "smelt", "think", "search", "impossible"}
49
- ), "valid_actions should be subset of {'move', 'smelt', 'think', 'search', 'impossible'}"
50
- return self
51
-
52
43
 
53
44
  class WandbConfig(BaseModel):
54
45
  project: str
@@ -0,0 +1,21 @@
1
+ from .actions import (
2
+ ImpossibleActionHandler,
3
+ MoveActionHandler,
4
+ SmeltActionHandler,
5
+ ThinkActionHandler,
6
+ convert_from_slot_index,
7
+ convert_to_slot_index,
8
+ )
9
+ from .env import PlancraftEnvironment
10
+ from .search import GoldSearchActionHandler
11
+
12
+ __all__ = [
13
+ "ImpossibleActionHandler",
14
+ "MoveActionHandler",
15
+ "SmeltActionHandler",
16
+ "ThinkActionHandler",
17
+ "PlancraftEnvironment",
18
+ "GoldSearchActionHandler",
19
+ "convert_from_slot_index",
20
+ "convert_to_slot_index",
21
+ ]
@@ -1,3 +1,7 @@
1
+ import abc
2
+ import re
3
+ from typing import Optional
4
+
1
5
  from pydantic import BaseModel, field_validator, model_validator
2
6
 
3
7
 
@@ -40,6 +44,39 @@ def convert_from_slot_index(slot_index: int) -> str:
40
44
  return f"[I{slot_index-9}]"
41
45
 
42
46
 
47
+ class ActionHandlerBase(abc.ABC):
48
+ @property
49
+ @abc.abstractmethod
50
+ def prompt_description(self) -> str:
51
+ """
52
+ Return the prompt description for the model
53
+ """
54
+ raise NotImplementedError()
55
+
56
+ @property
57
+ @abc.abstractmethod
58
+ def prompt_format_example(self) -> str:
59
+ """
60
+ Return the prompt format example for the model
61
+ """
62
+ raise NotImplementedError()
63
+
64
+ @property
65
+ @abc.abstractmethod
66
+ def action_name(self) -> str:
67
+ """
68
+ Return the action name for the model
69
+ """
70
+ raise NotImplementedError()
71
+
72
+ @abc.abstractmethod
73
+ def match(self, generated_text: str):
74
+ """
75
+ Match the generated text to the action/tool
76
+ """
77
+ raise NotImplementedError()
78
+
79
+
43
80
  class MoveAction(BaseModel):
44
81
  """ "Moves an item from one slot to another"""
45
82
 
@@ -152,3 +189,118 @@ class StopAction(BaseModel):
152
189
 
153
190
  # when symbolic action is true, can either move objects around or smelt
154
191
  SymbolicAction = MoveAction | SmeltAction
192
+
193
+
194
+ class MoveActionHandler(ActionHandlerBase):
195
+ @property
196
+ def prompt_description(self) -> str:
197
+ return "Transfer a specific quantity of an item from one slot to another"
198
+
199
+ @property
200
+ def prompt_format_example(self) -> str:
201
+ return "`move: from [Source] to [Target] with quantity N`"
202
+
203
+ @property
204
+ def action_name(self) -> str:
205
+ return "move"
206
+
207
+ def match(self, generated_text: str) -> Optional[MoveAction | str]:
208
+ """
209
+ Parse the raw model response to a MoveAction
210
+ """
211
+ action_match = re.search(f"({self.action_name}):", generated_text)
212
+ if not action_match:
213
+ return
214
+ try:
215
+ slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
216
+ slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
217
+ quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
218
+ action = MoveAction(
219
+ slot_from=slot_from,
220
+ slot_to=slot_to,
221
+ quantity=quantity,
222
+ )
223
+ return action
224
+ except AttributeError as e:
225
+ return f"Format Error: {e}"
226
+
227
+
228
+ class SmeltActionHandler(ActionHandlerBase):
229
+ @property
230
+ def prompt_description(self) -> str:
231
+ return "Smelt an item in a furnace and moves the output to a specific slot"
232
+
233
+ @property
234
+ def prompt_format_example(self) -> str:
235
+ return "`smelt: from [Source] to [Target] with quantity N`"
236
+
237
+ @property
238
+ def action_name(self) -> str:
239
+ return "smelt"
240
+
241
+ def match(self, generated_text: str) -> Optional[SmeltAction | str]:
242
+ """
243
+ Parse the raw model response to a SmeltAction
244
+ """
245
+ action_match = re.search(f"({self.action_name}):", generated_text)
246
+ if not action_match:
247
+ return
248
+ try:
249
+ slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
250
+ slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
251
+ quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
252
+ action = SmeltAction(
253
+ slot_from=slot_from,
254
+ slot_to=slot_to,
255
+ quantity=quantity,
256
+ )
257
+ return action
258
+ except AttributeError as e:
259
+ return f"Format Error: {e}"
260
+
261
+
262
+ class ImpossibleActionHandler(ActionHandlerBase):
263
+ @property
264
+ def prompt_description(self) -> str:
265
+ return "Stop task if it is certain that it is impossible with given inventory"
266
+
267
+ @property
268
+ def prompt_format_example(self) -> str:
269
+ return "`impossible: <reason>`"
270
+
271
+ @property
272
+ def action_name(self) -> str:
273
+ return "impossible"
274
+
275
+ def match(self, generated_text) -> Optional[StopAction]:
276
+ """
277
+ Parse the raw model response to a StopAction
278
+ """
279
+ action_match = re.search(f"({self.action_name}):", generated_text)
280
+ if not action_match:
281
+ return
282
+ reason = re.search(r"impossible: (.*)", generated_text).group(1)
283
+ return StopAction(reason=reason)
284
+
285
+
286
+ class ThinkActionHandler(ActionHandlerBase):
287
+ @property
288
+ def prompt_description(self) -> str:
289
+ return "Generate thoughts to help you decide on the next action"
290
+
291
+ @property
292
+ def prompt_format_example(self) -> str:
293
+ return "`think: <thought message>`"
294
+
295
+ @property
296
+ def action_name(self) -> str:
297
+ return "think"
298
+
299
+ def match(self, generated_text) -> Optional[str]:
300
+ """
301
+ Parse the raw model response to a ThinkAction
302
+ """
303
+ action_match = re.search(f"({self.action_name}):", generated_text)
304
+ if not action_match:
305
+ return
306
+ return "Ok"
@@ -1,6 +1,5 @@
1
1
  import glob
2
2
  import os
3
- from collections import defaultdict
4
3
  from typing import Literal, Optional
5
4
 
6
5
  import numpy as np
@@ -323,6 +322,9 @@ class PlancraftEnvironment:
323
322
  # not enough
324
323
  if self.slot_empty(slot_from) or self.state[slot_from]["quantity"] < quantity:
325
324
  return
325
+ # if craft slot - must take all
326
+ if slot_from == 0 and self.state[slot_from]["quantity"] != quantity:
327
+ return
326
328
 
327
329
  item = self.state[slot_from]
328
330
 
@@ -2,31 +2,11 @@ import numpy as np
2
2
 
3
3
  from plancraft.environment.env import PlancraftEnvironment
4
4
  from plancraft.environment.search import gold_search_recipe
5
-
6
- VALID_ACTIONS = ["move", "smelt", "think", "search", "impossible"]
7
-
8
- ACTIONS_DESCRIPTIONS = {
9
- "move": {
10
- "description": "Transfer a specific quantity of an item from one slot to another",
11
- "format": "`move: from [Source] to [Target] with quantity N`",
12
- },
13
- "smelt": {
14
- "description": "Smelt an item in a furnace and moves the output to a specific slot",
15
- "format": "`smelt: from [Source] to [Target] with quantity N`",
16
- },
17
- "think": {
18
- "description": "Generate thoughts to help you decide on the next action",
19
- "format": "`think: <thought message>`",
20
- },
21
- "search": {
22
- "description": "Search for a recipe to craft a specific item",
23
- "format": "`search: <recipe name>`",
24
- },
25
- "impossible": {
26
- "description": "Stop task if it is certain that it is impossible with given inventory",
27
- "format": "`impossible: <reason>`",
28
- },
29
- }
5
+ from plancraft.environment.actions import (
6
+ ActionHandlerBase,
7
+ MoveActionHandler,
8
+ SmeltActionHandler,
9
+ )
30
10
 
31
11
  BASE_SYSTEM_PROMPT = """You are crafting in Minecraft. You need to decide on the next action.
32
12
 
@@ -48,23 +28,6 @@ Constraints:
48
28
  - If an item is not in slot [0] then the recipe is incorrect
49
29
  - You need to move items from [0] to a free inventory slot to complete the crafting process"""
50
30
 
51
-
52
- def get_system_prompt(actions: list[str]):
53
- assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
54
- assert "move" in actions, "move should be one of the actions"
55
- assert "smelt" in actions, "smelt should be one of the actions"
56
-
57
- descriptions = ""
58
- for action in actions:
59
- descriptions += f"\n\t- {action}: {ACTIONS_DESCRIPTIONS[action]['description']}"
60
-
61
- output_format = ""
62
- for action in actions:
63
- output_format += f"\n\t- {ACTIONS_DESCRIPTIONS[action]['format']}"
64
-
65
- return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
66
-
67
-
68
31
  CRAFTING_STEPS = [
69
32
  "Craft an item of type: andesite\ninventory:\n - diorite [I18] quantity 1\n - cobblestone [I30] quantity 1",
70
33
  "Craft an item of type: andesite\ninventory:\n - diorite [B1] quantity 1\n - cobblestone [I30] quantity 1",
@@ -94,8 +57,26 @@ SEARCH_STEPS = [
94
57
  ]
95
58
 
96
59
 
60
+ def get_system_prompt(
61
+ handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
62
+ ):
63
+ action_names = [handler.action_name for handler in handlers]
64
+ assert "move" in action_names, "MoveActionHandler should be one of the handlers"
65
+ assert "smelt" in action_names, "SmeltActionHandler should be one of the handlers"
66
+
67
+ descriptions = ""
68
+ for handler in handlers:
69
+ descriptions += f"\n\t- {handler.action_name}: {handler.prompt_description}"
70
+
71
+ output_format = ""
72
+ for handler in handlers:
73
+ output_format += f"\n\t- {handler.prompt_format_example}"
74
+
75
+ return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
76
+
77
+
97
78
  def get_prompt_example(
98
- actions: list[str],
79
+ handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
99
80
  use_text_inventory=True,
100
81
  use_multimodal_content_format=False,
101
82
  use_images=False,
@@ -103,10 +84,9 @@ def get_prompt_example(
103
84
  """
104
85
  Generates a few-shot prompt for the crafting task
105
86
  """
106
-
107
- assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
108
- assert "move" in actions, "move should be one of the actions"
109
- assert "smelt" in actions, "smelt should be one of the actions"
87
+ handler_names = [handler.action_name for handler in handlers]
88
+ assert "move" in handler_names, "move should be one of the actions"
89
+ assert "smelt" in handler_names, "smelt should be one of the actions"
110
90
 
111
91
  if use_images:
112
92
  assert (
@@ -120,12 +100,12 @@ def get_prompt_example(
120
100
  text = text.split("\ninventory:\n")[0]
121
101
 
122
102
  example_dialogue.append({"role": "user", "content": text})
123
- if "search" in actions and SEARCH_STEPS[i]:
103
+ if "search" in handler_names and SEARCH_STEPS[i]:
124
104
  example_dialogue.append({"role": "assistant", "content": SEARCH_STEPS[i]})
125
105
  search_target = text.split("seach: ")[-1].strip()
126
106
  search_response = gold_search_recipe(search_target)
127
107
  example_dialogue.append({"role": "user", "content": search_response})
128
- if "think" in actions:
108
+ if "think" in handler_names:
129
109
  example_dialogue.append({"role": "assistant", "content": THINK_STEPS[i]})
130
110
  example_dialogue.append({"role": "user", "content": "Ok"})
131
111
  example_dialogue.append({"role": "assistant", "content": BASE_ACTION_STEPS[i]})
@@ -1,4 +1,7 @@
1
- from plancraft.environment.actions import convert_from_slot_index
1
+ import re
2
+ from typing import Optional
3
+
4
+ from plancraft.environment.actions import convert_from_slot_index, ActionHandlerBase
2
5
  from plancraft.environment.recipes import RECIPES
3
6
 
4
7
 
@@ -24,3 +27,27 @@ def gold_search_recipe(recipe_name: str) -> str:
24
27
  recipe_instructions = f"smelt {r.ingredient}\n"
25
28
  out_string += f"recipe {i+1}:\n{recipe_instructions}"
26
29
  return out_string
30
+
31
+
32
+ class GoldSearchActionHandler(ActionHandlerBase):
33
+ @property
34
+ def prompt_description(self) -> str:
35
+ return "Search for recipes to craft a specific item"
36
+
37
+ @property
38
+ def prompt_format_example(self) -> str:
39
+ return "`search: <recipe name>`"
40
+
41
+ @property
42
+ def action_name(self) -> str:
43
+ return "search"
44
+
45
+ def match(self, generated_text) -> Optional[str]:
46
+ """
47
+ Parse the raw model response to a SearchAction
48
+ """
49
+ action_match = re.search(f"({self.action_name}):", generated_text)
50
+ if not action_match:
51
+ return
52
+ search_target = re.search(r"search: (\w+)", generated_text).group(1)
53
+ return gold_search_recipe(search_target)
plancraft/evaluator.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  import os
3
3
  import random
4
- import re
5
4
  import string
6
5
  import time
7
6
 
@@ -12,15 +11,19 @@ from tqdm import tqdm
12
11
 
13
12
  import wandb
14
13
  from plancraft.config import EvalConfig, PlancraftExample
15
- from plancraft.environment.actions import MoveAction, SmeltAction, StopAction
14
+ from plancraft.environment.actions import (
15
+ StopAction,
16
+ ActionHandlerBase,
17
+ MoveActionHandler,
18
+ SmeltActionHandler,
19
+ )
16
20
  from plancraft.environment.env import (
17
21
  PlancraftEnvironment,
18
22
  get_objective_str,
19
23
  target_and_inventory_to_text_obs,
20
24
  )
21
- from plancraft.environment.search import gold_search_recipe
22
- from plancraft.models import get_model
23
25
  from plancraft.utils import History
26
+ from plancraft.models.base import PlancraftBaseModel
24
27
 
25
28
 
26
29
  class Evaluator:
@@ -35,12 +38,18 @@ class Evaluator:
35
38
  Finally, it also saves the results of the evaluation and the images generated during the evaluation.
36
39
  """
37
40
 
38
- def __init__(self, cfg: EvalConfig):
41
+ def __init__(
42
+ self,
43
+ cfg: EvalConfig,
44
+ run_name: str,
45
+ model: PlancraftBaseModel,
46
+ actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
47
+ ):
39
48
  self.cfg = cfg
40
- self.output_dir = (
41
- f"{cfg.plancraft.output_dir}/{self.evaluator_name()}/{cfg.plancraft.split}"
42
- )
49
+ self.run_name = run_name
50
+ self.output_dir = f"{cfg.plancraft.output_dir}/{run_name}/{cfg.plancraft.split}"
43
51
  self.generation_number = 0
52
+ self.actions = actions
44
53
 
45
54
  # load all examples
46
55
  self.examples: list[PlancraftExample] = self.load_dataset(cfg.plancraft.split)
@@ -53,7 +62,7 @@ class Evaluator:
53
62
 
54
63
  # initialise history/dialogue tracking
55
64
  self.history = History(
56
- valid_actions=cfg.plancraft.valid_actions,
65
+ actions=actions,
57
66
  use_multimodal_content_format=cfg.plancraft.use_multimodal_content_format,
58
67
  use_images=cfg.plancraft.use_images,
59
68
  use_text_inventory=cfg.plancraft.use_text_inventory,
@@ -61,45 +70,7 @@ class Evaluator:
61
70
  )
62
71
 
63
72
  # load model
64
- self.model = get_model(cfg)
65
-
66
- def evaluator_name(self) -> str:
67
- if self.cfg.plancraft.use_text_inventory and self.cfg.plancraft.use_images:
68
- name_str = "both"
69
- elif self.cfg.plancraft.use_images:
70
- name_str = "images"
71
- elif self.cfg.plancraft.use_text_inventory:
72
- name_str = "text"
73
- else:
74
- raise ValueError(
75
- "At least one of use_text_inventory or use_images should be True"
76
- )
77
-
78
- if self.cfg.plancraft.use_fasterrcnn:
79
- name_str += "_fasterrcnn"
80
-
81
- model_name = self.cfg.plancraft.model.split("/")[-1]
82
- if self.cfg.plancraft.adapter != "":
83
- model_name = self.cfg.plancraft.adapter.split("/")[-1]
84
-
85
- mode = self.cfg.plancraft.mode
86
- if mode in ["dummy", "oracle"]:
87
- return f"{mode}_{name_str}"
88
-
89
- valid_actions_to_str = {
90
- "move": "m",
91
- "smelt": "s",
92
- "think": "t",
93
- "search": "se",
94
- "impossible": "i",
95
- }
96
- actions = "|".join(
97
- [
98
- valid_actions_to_str[action]
99
- for action in self.cfg.plancraft.valid_actions
100
- ]
101
- )
102
- return f"{self.cfg.plancraft.mode}_{name_str}_{model_name}_{actions}"
73
+ self.model = model
103
74
 
104
75
  def save_results_dict(self, example: PlancraftExample, results_dict: dict):
105
76
  output_dir = f"{self.output_dir}/{self.generation_number}"
@@ -152,48 +123,17 @@ class Evaluator:
152
123
  return True
153
124
  return False
154
125
 
155
- def parse_raw_model_response(
156
- self, content: str
157
- ) -> str | MoveAction | SmeltAction | StopAction:
126
+ def parse_raw_model_response(self, generated_text: str):
158
127
  """
159
- Given a message and set of valid actions, parse the content to return the action
128
+ Given a message and set of action handlers, parse the content to return the action
160
129
  or a message if the action is not valid/requires message response
161
130
  """
162
-
163
- action_match = re.search(
164
- f"({'|'.join(self.cfg.plancraft.valid_actions)}):", content
165
- )
166
- if action_match:
167
- action = action_match.group(1)
168
- if action == "think":
169
- return "Ok"
170
- elif action == "impossible":
171
- reason = re.search(r"impossible: (.*)", content).group(1)
172
- return StopAction(reason=reason)
173
- elif action == "search":
174
- search_target = re.search(r"search: (\w+)", content).group(1)
175
- return gold_search_recipe(search_target)
176
- else:
177
- try:
178
- slot_from = re.search(r" from (\[[ABCI]?\d+\])", content).group(1)
179
- slot_to = re.search(r" to (\[[ABCI]?\d+\])", content).group(1)
180
- quantity = re.search(r"with quantity (\d+)", content).group(1)
181
- if action == "move":
182
- action = MoveAction(
183
- slot_from=slot_from,
184
- slot_to=slot_to,
185
- quantity=quantity,
186
- )
187
- else:
188
- action = SmeltAction(
189
- slot_from=slot_from,
190
- slot_to=slot_to,
191
- quantity=quantity,
192
- )
193
- return action
194
- except AttributeError as e:
195
- return f"Format Error: {e}"
196
- return f"Only select actions from the following: {', '.join(self.cfg.plancraft.valid_actions)}"
131
+ for handler in self.actions:
132
+ match_output = handler.match(generated_text)
133
+ if match_output:
134
+ return match_output
135
+ action_names = [handler.action_name for handler in self.actions]
136
+ return f"Only select actions from the following: {', '.join(action_names)}"
197
137
 
198
138
  def convert_observation_to_message(
199
139
  self,
@@ -230,11 +170,8 @@ class Evaluator:
230
170
  return {"content": content_list}
231
171
 
232
172
  def eval_example(self, example: PlancraftExample) -> dict:
233
- """
234
- Given the loaded model and an example from Plancraft
235
- run the episode until success or termination.
236
- Termination can happen from: early stopping (stuck) / max_steps / stop_action
237
- """
173
+ """Given the loaded model and an example from Plancraft
174
+ run the episode until success or termination."""
238
175
  success = False
239
176
  num_non_env_actions = 0
240
177
  self.reset(example)
@@ -346,7 +283,7 @@ class Evaluator:
346
283
  f"Running evaluation over {len(self.examples)} examples {self.cfg.plancraft.num_generations} times."
347
284
  )
348
285
  run_name = (
349
- f"{self.evaluator_name()} {self.cfg.plancraft.split}".replace(" ", "_")
286
+ f"{self.run_name} {self.cfg.plancraft.split}".replace(" ", "_")
350
287
  .replace(".", "_")
351
288
  .strip()
352
289
  )
@@ -38,8 +38,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
38
38
  if slot == from_slot:
39
39
  continue
40
40
  item_type = item["type"]
41
- # if item["quantity"] == 0:
42
- # item_type = "air"
43
41
  if item_type not in type_to_slot:
44
42
  type_to_slot[item_type] = [slot]
45
43
  else:
@@ -57,12 +55,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
57
55
  <= MAX_STACK_SIZE[from_item_type]
58
56
  ):
59
57
  return slot
60
- # if there is a free slot with air
61
- # if "air" in type_to_slot:
62
- # for slot in type_to_slot["air"]:
63
- # if slot > 10:
64
- # return slot
65
-
66
58
  if len(empty_slots) > 0:
67
59
  return empty_slots.pop()
68
60
 
@@ -80,8 +72,6 @@ def get_inventory_counter(inventory: dict) -> Counter:
80
72
  for slot, item in inventory.items():
81
73
  if slot == 0:
82
74
  continue
83
- # if item["type"] == "air":
84
- # continue
85
75
  counter[item["type"]] += item["quantity"]
86
76
  return counter
87
77
 
@@ -170,7 +160,6 @@ class OracleModel(PlancraftBaseModel):
170
160
 
171
161
  if isinstance(plan_recipe, ShapelessRecipe):
172
162
  crafting_slot = 1
173
-
174
163
  # add each item to crafting slots
175
164
  for item, quantity in items_to_use_counter.items():
176
165
  n = 0
plancraft/utils.py CHANGED
@@ -7,6 +7,7 @@ import torch
7
7
  from loguru import logger
8
8
 
9
9
  from plancraft.environment.actions import (
10
+ ActionHandlerBase,
10
11
  MoveAction,
11
12
  SmeltAction,
12
13
  )
@@ -21,21 +22,21 @@ class History:
21
22
  """
22
23
  History class to keep track of dialogue, actions, inventory and images
23
24
  Args:
24
- valid_actions: list of valid actions
25
+ valid_actions: list of valid actions names
25
26
  initial_dialogue: list of dialogue messages
26
27
  use_multimodal_content_format: whether to use multimodal content format (list of content with types)
27
28
  """
28
29
 
29
30
  def __init__(
30
31
  self,
31
- valid_actions: list[str] = ["move", "smelt"],
32
+ actions: list[ActionHandlerBase] = [],
32
33
  use_multimodal_content_format=False,
33
34
  few_shot=False,
34
35
  use_images=False,
35
36
  use_text_inventory=False,
36
37
  resolution="high",
37
38
  ):
38
- self.valid_actions = valid_actions
39
+ self.action_handlers = actions
39
40
  self.use_multimodal_content_format = use_multimodal_content_format
40
41
  self.few_shot = few_shot
41
42
  self.use_images = use_images
@@ -58,7 +59,7 @@ class History:
58
59
 
59
60
  def system_prompt(self):
60
61
  # kept separate from dialogue history because certain models deal with system prompt differently
61
- system_prompt_text = get_system_prompt(self.valid_actions)
62
+ system_prompt_text = get_system_prompt(handlers=self.action_handlers)
62
63
  if self.use_multimodal_content_format:
63
64
  return {
64
65
  "role": "system",
@@ -75,7 +76,7 @@ class History:
75
76
 
76
77
  if self.few_shot:
77
78
  self.prompt_examples = get_prompt_example(
78
- self.valid_actions,
79
+ self.action_handlers,
79
80
  use_text_inventory=self.use_text_inventory,
80
81
  use_multimodal_content_format=self.use_multimodal_content_format,
81
82
  use_images=self.use_images,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plancraft
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Plancraft: an evaluation dataset for planning with LLM agents
5
5
  License: MIT License
6
6
 
@@ -25,7 +25,6 @@ License: MIT License
25
25
  SOFTWARE.
26
26
  License-File: LICENSE
27
27
  Requires-Python: >=3.9
28
- Requires-Dist: accelerate>=1.1.1
29
28
  Requires-Dist: hydra-core>=1.3.2
30
29
  Requires-Dist: imageio>=2.36.0
31
30
  Requires-Dist: loguru
@@ -65,7 +64,7 @@ Description-Content-Type: text/markdown
65
64
 
66
65
  [Paper](https://arxiv.org/abs/2412.21033) | [Website](https://gautierdag.github.io/plancraft/)
67
66
 
68
- Plancraft is a minecraft environment and agent that innovates on planning LLM agents with a retriever
67
+ Plancraft is a minecraft environment and agent that innovates on planning LLM agents with an oracle RAG retriever.
69
68
 
70
69
  You can install the package by running the following command:
71
70
 
@@ -123,8 +122,10 @@ from plancraft.config import EvalConfig
123
122
  def main():
124
123
  # Create the config
125
124
  config = EvalConfig(...)
125
+ # create model -- Note you can create your own model by subclassing PlancraftBaseModel
126
+ model = get_model(config)
126
127
  # Create the evaluator
127
- evaluator = Evaluator(config)
128
+ evaluator = Evaluator(config, model=model)
128
129
  # Evaluate the agent
129
130
  evaluator.eval_all_seeds()
130
131
  ```
@@ -207,7 +208,7 @@ The observation returned by the `Evaluator` class is a dictionary with the follo
207
208
 
208
209
  To implement a model, you need to subclass the `PlancraftBaseModel` class and implement the `step` and `reset` method. See the `plancraft.models.dummy` module for an example of how to implement a basic model.
209
210
 
210
- You will also need to modify the `get_model` function in the `plancraft.models` module to return an instance of your model when the correct config is passed.
211
+ You should then be able to use the `Evaluator` class to evaluate it.
211
212
 
212
213
  ## Reproducing the Results tables in the paper
213
214
 
@@ -225,10 +226,13 @@ The image is available on [Docker Hub](https://hub.docker.com/r/gautierdag/planc
225
226
 
226
227
  ## To Do
227
228
 
229
+ Non-exhaustive list of things to do from highest to lowest priority:
230
+
231
+ - [ ] Add minecraft wiki scrape and non-oracle search for pages
232
+ - [ ] Improve planner to bring closer to optimal (the oracle planner does not consider future crafting steps when moving items -- see paper for more details)
228
233
  - [ ] Rerun image models with better bounding box model
229
234
  - [ ] Track bounding box accuracy
230
- - [ ] Improve planner to bring closer to optimal (the oracle planner does not consider future crafting steps when moving items -- see paper for more details)
231
- - [ ] Add minecraft wiki scrape and non-oracle search for pages
235
+ - [ ] Implement a version of the image environment entirely on cuda/pytorch rather than cpu
232
236
 
233
237
  ## PRs Welcomed
234
238
 
@@ -1,8 +1,8 @@
1
1
  plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- plancraft/config.py,sha256=hsEv_YFk4St0xb6uiT2zuWqgUw4-fZAC1jB_U6DM7HU,4544
3
- plancraft/evaluator.py,sha256=iPONcYvozqGpJ-Wr2EvtbwMamyiZDhNf3HJCvXBtsBk,16391
2
+ plancraft/config.py,sha256=HNHFDewz_0IF1EiPoS8B_ND5JfQvWjE4-0MbX-xvsRQ,4215
3
+ plancraft/evaluator.py,sha256=0J1Mk-n5Y_7L-WhuH6UpoMWhMnGtdFAGW-aqZDhuhLk,13844
4
4
  plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
5
- plancraft/utils.py,sha256=Fs9ZVaSjKxhSqn9UNNZx-dioB5Oyb7hO5c8QMkmW020,7011
5
+ plancraft/utils.py,sha256=7VWKVlDhoMacRypRRSKM1K3hwwJ0nHR3zyx9jZH1C1g,7042
6
6
  plancraft/data/test.json,sha256=7ozxAb-PzoaOMQbMMh52RvN0pQBor6aAUwMrtc2C-y0,1670677
7
7
  plancraft/data/test.small.easy.json,sha256=IsrnRUACUWUdq2_BKGw_H2GptstqmFw66y0Grwmrwj8,238854
8
8
  plancraft/data/test.small.json,sha256=RnPJJf_wLhdUQydrQo0H4KJvcD5PkSEVy5Bbi--Il2U,342843
@@ -10,15 +10,15 @@ plancraft/data/train.json,sha256=pdArGse10i6Dg5Oa56EJPH_fOmotVzv2q5LPJpmS_bQ,342
10
10
  plancraft/data/val.json,sha256=bfVFVQ_dmDSTCLojRkv1XIlct5zkwSg4AzsMp0gUUGI,1654481
11
11
  plancraft/data/val.small.easy.json,sha256=vgBotEu-mH8441jUyCN_6DZIRX1O5SpZatdmK-I7yNA,240202
12
12
  plancraft/data/val.small.json,sha256=WO7xerSWVOPcnLH1_MBiWwdHmqWP0DDGMhuF2RycBRo,300799
13
- plancraft/environment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- plancraft/environment/actions.py,sha256=r2BYSSoGt4GnNLzbWSVR99lfZ5oSkc0ENNBRqSwEzJ0,4829
15
- plancraft/environment/env.py,sha256=SY4ur1uLUApLmcGtgqDDcuiotFh5Cu8zLofWB6WLAh4,16190
13
+ plancraft/environment/__init__.py,sha256=XFsFny4lH195AwAmL-WeCaF9ZCMgc7IgXIwhQ8FTdgE,505
14
+ plancraft/environment/actions.py,sha256=D9QqBW7yWsbWCjxNyWp61Xtb0c6EtyXk3PZ1I8SRoBQ,9381
15
+ plancraft/environment/env.py,sha256=_VQewLUv8YpKLaNp9uye25lq4HFHd9ddTQr1Lqv4eOs,16290
16
16
  plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
17
17
  plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
18
- plancraft/environment/prompts.py,sha256=qD7ezj-ASpl5XHGEsZ4UZkt5ubRRFgtXeCAFlRTjCok,7288
18
+ plancraft/environment/prompts.py,sha256=OKxiv02NIhRk5FZJUEDRLkVWVMc-aXKJi7i7X61uUmk,6633
19
19
  plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
20
20
  plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
21
- plancraft/environment/search.py,sha256=PUBrkgy9dgiZY8v1HyxVIxXW7n01xIQbdXXCfNHrYU4,1055
21
+ plancraft/environment/search.py,sha256=uFHpLvW40rMKOxDabcyWrpOrhKLDZqAJOF_jew4_WXk,1837
22
22
  plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
23
23
  plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
24
24
  plancraft/environment/assets/table.png,sha256=IKIViZKAPyR4FWnS0JP9AZ19vIEO3qoS5-YRGAO1ow8,5430
@@ -1917,10 +1917,10 @@ plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
1917
1917
  plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
1918
1918
  plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
1919
1919
  plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
1920
- plancraft/models/oracle.py,sha256=WkzupIoetppGzPst5kD0IRe_9VsQSRJzPB6N-_ULa-k,8750
1920
+ plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
1921
1921
  plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
1922
1922
  plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
1923
- plancraft-0.3.0.dist-info/METADATA,sha256=yPPTvrICB1iLuI3NquneaK6cPUuGH1w4Z8hxv5SGQnM,11119
1924
- plancraft-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
- plancraft-0.3.0.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
- plancraft-0.3.0.dist-info/RECORD,,
1923
+ plancraft-0.3.1.dist-info/METADATA,sha256=KKsWXHGTbWBXplk1E5F0b_AJvAAu7K91k5sR3eLtKM4,11306
1924
+ plancraft-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1925
+ plancraft-0.3.1.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
1926
+ plancraft-0.3.1.dist-info/RECORD,,