plancraft 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plancraft/config.py +1 -10
- plancraft/environment/__init__.py +21 -0
- plancraft/environment/actions.py +152 -0
- plancraft/environment/env.py +3 -1
- plancraft/environment/prompts.py +29 -49
- plancraft/environment/search.py +28 -1
- plancraft/evaluator.py +30 -93
- plancraft/models/oracle.py +0 -11
- plancraft/utils.py +6 -5
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/METADATA +11 -7
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/RECORD +13 -13
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/WHEEL +0 -0
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/licenses/LICENSE +0 -0
plancraft/config.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import Literal, Optional, Union
|
2
2
|
|
3
|
-
from pydantic import BaseModel
|
3
|
+
from pydantic import BaseModel
|
4
4
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
5
5
|
|
6
6
|
from plancraft.environment.recipes import RECIPES
|
@@ -40,15 +40,6 @@ class PlancraftConfig(BaseModel):
|
|
40
40
|
False # whether to use multimodal content format
|
41
41
|
)
|
42
42
|
|
43
|
-
@model_validator(mode="after")
|
44
|
-
def validate(self):
|
45
|
-
assert set(
|
46
|
-
self.valid_actions
|
47
|
-
).issubset(
|
48
|
-
{"move", "smelt", "think", "search", "impossible"}
|
49
|
-
), "valid_actions should be subset of {'move', 'smelt', 'think', 'search', 'impossible'}"
|
50
|
-
return self
|
51
|
-
|
52
43
|
|
53
44
|
class WandbConfig(BaseModel):
|
54
45
|
project: str
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from .actions import (
|
2
|
+
ImpossibleActionHandler,
|
3
|
+
MoveActionHandler,
|
4
|
+
SmeltActionHandler,
|
5
|
+
ThinkActionHandler,
|
6
|
+
convert_from_slot_index,
|
7
|
+
convert_to_slot_index,
|
8
|
+
)
|
9
|
+
from .env import PlancraftEnvironment
|
10
|
+
from .search import GoldSearchActionHandler
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
"ImpossibleActionHandler",
|
14
|
+
"MoveActionHandler",
|
15
|
+
"SmeltActionHandler",
|
16
|
+
"ThinkActionHandler",
|
17
|
+
"PlancraftEnvironment",
|
18
|
+
"GoldSearchActionHandler",
|
19
|
+
"convert_from_slot_index",
|
20
|
+
"convert_to_slot_index",
|
21
|
+
]
|
plancraft/environment/actions.py
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
import abc
|
2
|
+
import re
|
3
|
+
from typing import Optional
|
4
|
+
|
1
5
|
from pydantic import BaseModel, field_validator, model_validator
|
2
6
|
|
3
7
|
|
@@ -40,6 +44,39 @@ def convert_from_slot_index(slot_index: int) -> str:
|
|
40
44
|
return f"[I{slot_index-9}]"
|
41
45
|
|
42
46
|
|
47
|
+
class ActionHandlerBase(abc.ABC):
|
48
|
+
@property
|
49
|
+
@abc.abstractmethod
|
50
|
+
def prompt_description(self) -> str:
|
51
|
+
"""
|
52
|
+
Return the prompt description for the model
|
53
|
+
"""
|
54
|
+
raise NotImplementedError()
|
55
|
+
|
56
|
+
@property
|
57
|
+
@abc.abstractmethod
|
58
|
+
def prompt_format_example(self) -> str:
|
59
|
+
"""
|
60
|
+
Return the prompt format example for the model
|
61
|
+
"""
|
62
|
+
raise NotImplementedError()
|
63
|
+
|
64
|
+
@property
|
65
|
+
@abc.abstractmethod
|
66
|
+
def action_name(self) -> str:
|
67
|
+
"""
|
68
|
+
Return the action name for the model
|
69
|
+
"""
|
70
|
+
raise NotImplementedError()
|
71
|
+
|
72
|
+
@abc.abstractmethod
|
73
|
+
def match(self, generated_text: str):
|
74
|
+
"""
|
75
|
+
Match the generated text to the action/tool
|
76
|
+
"""
|
77
|
+
raise NotImplementedError()
|
78
|
+
|
79
|
+
|
43
80
|
class MoveAction(BaseModel):
|
44
81
|
""" "Moves an item from one slot to another"""
|
45
82
|
|
@@ -152,3 +189,118 @@ class StopAction(BaseModel):
|
|
152
189
|
|
153
190
|
# when symbolic action is true, can either move objects around or smelt
|
154
191
|
SymbolicAction = MoveAction | SmeltAction
|
192
|
+
|
193
|
+
|
194
|
+
class MoveActionHandler(ActionHandlerBase):
|
195
|
+
@property
|
196
|
+
def prompt_description(self) -> str:
|
197
|
+
return "Transfer a specific quantity of an item from one slot to another"
|
198
|
+
|
199
|
+
@property
|
200
|
+
def prompt_format_example(self) -> str:
|
201
|
+
return "`move: from [Source] to [Target] with quantity N`"
|
202
|
+
|
203
|
+
@property
|
204
|
+
def action_name(self) -> str:
|
205
|
+
return "move"
|
206
|
+
|
207
|
+
def match(self, generated_text: str) -> Optional[MoveAction | str]:
|
208
|
+
"""
|
209
|
+
Parse the raw model response to a MoveAction
|
210
|
+
"""
|
211
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
212
|
+
if not action_match:
|
213
|
+
return
|
214
|
+
try:
|
215
|
+
slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
|
216
|
+
slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
|
217
|
+
quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
|
218
|
+
action = MoveAction(
|
219
|
+
slot_from=slot_from,
|
220
|
+
slot_to=slot_to,
|
221
|
+
quantity=quantity,
|
222
|
+
)
|
223
|
+
return action
|
224
|
+
except AttributeError as e:
|
225
|
+
return f"Format Error: {e}"
|
226
|
+
|
227
|
+
|
228
|
+
class SmeltActionHandler(ActionHandlerBase):
|
229
|
+
@property
|
230
|
+
def prompt_description(self) -> str:
|
231
|
+
return "Smelt an item in a furnace and moves the output to a specific slot"
|
232
|
+
|
233
|
+
@property
|
234
|
+
def prompt_format_example(self) -> str:
|
235
|
+
return "`smelt: from [Source] to [Target] with quantity N`"
|
236
|
+
|
237
|
+
@property
|
238
|
+
def action_name(self) -> str:
|
239
|
+
return "smelt"
|
240
|
+
|
241
|
+
def match(self, generated_text: str) -> Optional[SmeltAction | str]:
|
242
|
+
"""
|
243
|
+
Parse the raw model response to a SmeltAction
|
244
|
+
"""
|
245
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
246
|
+
if not action_match:
|
247
|
+
return
|
248
|
+
try:
|
249
|
+
slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
|
250
|
+
slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
|
251
|
+
quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
|
252
|
+
action = SmeltAction(
|
253
|
+
slot_from=slot_from,
|
254
|
+
slot_to=slot_to,
|
255
|
+
quantity=quantity,
|
256
|
+
)
|
257
|
+
return action
|
258
|
+
except AttributeError as e:
|
259
|
+
return f"Format Error: {e}"
|
260
|
+
|
261
|
+
|
262
|
+
class ImpossibleActionHandler(ActionHandlerBase):
|
263
|
+
@property
|
264
|
+
def prompt_description(self) -> str:
|
265
|
+
return "Stop task if it is certain that it is impossible with given inventory"
|
266
|
+
|
267
|
+
@property
|
268
|
+
def prompt_format_example(self) -> str:
|
269
|
+
return "`impossible: <reason>`"
|
270
|
+
|
271
|
+
@property
|
272
|
+
def action_name(self) -> str:
|
273
|
+
return "impossible"
|
274
|
+
|
275
|
+
def match(self, generated_text) -> Optional[StopAction]:
|
276
|
+
"""
|
277
|
+
Parse the raw model response to a StopAction
|
278
|
+
"""
|
279
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
280
|
+
if not action_match:
|
281
|
+
return
|
282
|
+
reason = re.search(r"impossible: (.*)", generated_text).group(1)
|
283
|
+
return StopAction(reason=reason)
|
284
|
+
|
285
|
+
|
286
|
+
class ThinkActionHandler(ActionHandlerBase):
|
287
|
+
@property
|
288
|
+
def prompt_description(self) -> str:
|
289
|
+
return "Generate thoughts to help you decide on the next action"
|
290
|
+
|
291
|
+
@property
|
292
|
+
def prompt_format_example(self) -> str:
|
293
|
+
return "`think: <thought message>`"
|
294
|
+
|
295
|
+
@property
|
296
|
+
def action_name(self) -> str:
|
297
|
+
return "think"
|
298
|
+
|
299
|
+
def match(self, generated_text) -> Optional[str]:
|
300
|
+
"""
|
301
|
+
Parse the raw model response to a ThinkAction
|
302
|
+
"""
|
303
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
304
|
+
if not action_match:
|
305
|
+
return
|
306
|
+
return "Ok"
|
plancraft/environment/env.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import glob
|
2
2
|
import os
|
3
|
-
from collections import defaultdict
|
4
3
|
from typing import Literal, Optional
|
5
4
|
|
6
5
|
import numpy as np
|
@@ -323,6 +322,9 @@ class PlancraftEnvironment:
|
|
323
322
|
# not enough
|
324
323
|
if self.slot_empty(slot_from) or self.state[slot_from]["quantity"] < quantity:
|
325
324
|
return
|
325
|
+
# if craft slot - must take all
|
326
|
+
if slot_from == 0 and self.state[slot_from]["quantity"] != quantity:
|
327
|
+
return
|
326
328
|
|
327
329
|
item = self.state[slot_from]
|
328
330
|
|
plancraft/environment/prompts.py
CHANGED
@@ -2,31 +2,11 @@ import numpy as np
|
|
2
2
|
|
3
3
|
from plancraft.environment.env import PlancraftEnvironment
|
4
4
|
from plancraft.environment.search import gold_search_recipe
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
"description": "Transfer a specific quantity of an item from one slot to another",
|
11
|
-
"format": "`move: from [Source] to [Target] with quantity N`",
|
12
|
-
},
|
13
|
-
"smelt": {
|
14
|
-
"description": "Smelt an item in a furnace and moves the output to a specific slot",
|
15
|
-
"format": "`smelt: from [Source] to [Target] with quantity N`",
|
16
|
-
},
|
17
|
-
"think": {
|
18
|
-
"description": "Generate thoughts to help you decide on the next action",
|
19
|
-
"format": "`think: <thought message>`",
|
20
|
-
},
|
21
|
-
"search": {
|
22
|
-
"description": "Search for a recipe to craft a specific item",
|
23
|
-
"format": "`search: <recipe name>`",
|
24
|
-
},
|
25
|
-
"impossible": {
|
26
|
-
"description": "Stop task if it is certain that it is impossible with given inventory",
|
27
|
-
"format": "`impossible: <reason>`",
|
28
|
-
},
|
29
|
-
}
|
5
|
+
from plancraft.environment.actions import (
|
6
|
+
ActionHandlerBase,
|
7
|
+
MoveActionHandler,
|
8
|
+
SmeltActionHandler,
|
9
|
+
)
|
30
10
|
|
31
11
|
BASE_SYSTEM_PROMPT = """You are crafting in Minecraft. You need to decide on the next action.
|
32
12
|
|
@@ -48,23 +28,6 @@ Constraints:
|
|
48
28
|
- If an item is not in slot [0] then the recipe is incorrect
|
49
29
|
- You need to move items from [0] to a free inventory slot to complete the crafting process"""
|
50
30
|
|
51
|
-
|
52
|
-
def get_system_prompt(actions: list[str]):
|
53
|
-
assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
|
54
|
-
assert "move" in actions, "move should be one of the actions"
|
55
|
-
assert "smelt" in actions, "smelt should be one of the actions"
|
56
|
-
|
57
|
-
descriptions = ""
|
58
|
-
for action in actions:
|
59
|
-
descriptions += f"\n\t- {action}: {ACTIONS_DESCRIPTIONS[action]['description']}"
|
60
|
-
|
61
|
-
output_format = ""
|
62
|
-
for action in actions:
|
63
|
-
output_format += f"\n\t- {ACTIONS_DESCRIPTIONS[action]['format']}"
|
64
|
-
|
65
|
-
return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
|
66
|
-
|
67
|
-
|
68
31
|
CRAFTING_STEPS = [
|
69
32
|
"Craft an item of type: andesite\ninventory:\n - diorite [I18] quantity 1\n - cobblestone [I30] quantity 1",
|
70
33
|
"Craft an item of type: andesite\ninventory:\n - diorite [B1] quantity 1\n - cobblestone [I30] quantity 1",
|
@@ -94,8 +57,26 @@ SEARCH_STEPS = [
|
|
94
57
|
]
|
95
58
|
|
96
59
|
|
60
|
+
def get_system_prompt(
|
61
|
+
handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
|
62
|
+
):
|
63
|
+
action_names = [handler.action_name for handler in handlers]
|
64
|
+
assert "move" in action_names, "MoveActionHandler should be one of the handlers"
|
65
|
+
assert "smelt" in action_names, "SmeltActionHandler should be one of the handlers"
|
66
|
+
|
67
|
+
descriptions = ""
|
68
|
+
for handler in handlers:
|
69
|
+
descriptions += f"\n\t- {handler.action_name}: {handler.prompt_description}"
|
70
|
+
|
71
|
+
output_format = ""
|
72
|
+
for handler in handlers:
|
73
|
+
output_format += f"\n\t- {handler.prompt_format_example}"
|
74
|
+
|
75
|
+
return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
|
76
|
+
|
77
|
+
|
97
78
|
def get_prompt_example(
|
98
|
-
|
79
|
+
handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
|
99
80
|
use_text_inventory=True,
|
100
81
|
use_multimodal_content_format=False,
|
101
82
|
use_images=False,
|
@@ -103,10 +84,9 @@ def get_prompt_example(
|
|
103
84
|
"""
|
104
85
|
Generates a few-shot prompt for the crafting task
|
105
86
|
"""
|
106
|
-
|
107
|
-
assert
|
108
|
-
assert "
|
109
|
-
assert "smelt" in actions, "smelt should be one of the actions"
|
87
|
+
handler_names = [handler.action_name for handler in handlers]
|
88
|
+
assert "move" in handler_names, "move should be one of the actions"
|
89
|
+
assert "smelt" in handler_names, "smelt should be one of the actions"
|
110
90
|
|
111
91
|
if use_images:
|
112
92
|
assert (
|
@@ -120,12 +100,12 @@ def get_prompt_example(
|
|
120
100
|
text = text.split("\ninventory:\n")[0]
|
121
101
|
|
122
102
|
example_dialogue.append({"role": "user", "content": text})
|
123
|
-
if "search" in
|
103
|
+
if "search" in handler_names and SEARCH_STEPS[i]:
|
124
104
|
example_dialogue.append({"role": "assistant", "content": SEARCH_STEPS[i]})
|
125
105
|
search_target = text.split("seach: ")[-1].strip()
|
126
106
|
search_response = gold_search_recipe(search_target)
|
127
107
|
example_dialogue.append({"role": "user", "content": search_response})
|
128
|
-
if "think" in
|
108
|
+
if "think" in handler_names:
|
129
109
|
example_dialogue.append({"role": "assistant", "content": THINK_STEPS[i]})
|
130
110
|
example_dialogue.append({"role": "user", "content": "Ok"})
|
131
111
|
example_dialogue.append({"role": "assistant", "content": BASE_ACTION_STEPS[i]})
|
plancraft/environment/search.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
|
1
|
+
import re
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from plancraft.environment.actions import convert_from_slot_index, ActionHandlerBase
|
2
5
|
from plancraft.environment.recipes import RECIPES
|
3
6
|
|
4
7
|
|
@@ -24,3 +27,27 @@ def gold_search_recipe(recipe_name: str) -> str:
|
|
24
27
|
recipe_instructions = f"smelt {r.ingredient}\n"
|
25
28
|
out_string += f"recipe {i+1}:\n{recipe_instructions}"
|
26
29
|
return out_string
|
30
|
+
|
31
|
+
|
32
|
+
class GoldSearchActionHandler(ActionHandlerBase):
|
33
|
+
@property
|
34
|
+
def prompt_description(self) -> str:
|
35
|
+
return "Search for recipes to craft a specific item"
|
36
|
+
|
37
|
+
@property
|
38
|
+
def prompt_format_example(self) -> str:
|
39
|
+
return "`search: <recipe name>`"
|
40
|
+
|
41
|
+
@property
|
42
|
+
def action_name(self) -> str:
|
43
|
+
return "search"
|
44
|
+
|
45
|
+
def match(self, generated_text) -> Optional[str]:
|
46
|
+
"""
|
47
|
+
Parse the raw model response to a SearchAction
|
48
|
+
"""
|
49
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
50
|
+
if not action_match:
|
51
|
+
return
|
52
|
+
search_target = re.search(r"search: (\w+)", generated_text).group(1)
|
53
|
+
return gold_search_recipe(search_target)
|
plancraft/evaluator.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
3
|
import random
|
4
|
-
import re
|
5
4
|
import string
|
6
5
|
import time
|
7
6
|
|
@@ -12,15 +11,19 @@ from tqdm import tqdm
|
|
12
11
|
|
13
12
|
import wandb
|
14
13
|
from plancraft.config import EvalConfig, PlancraftExample
|
15
|
-
from plancraft.environment.actions import
|
14
|
+
from plancraft.environment.actions import (
|
15
|
+
StopAction,
|
16
|
+
ActionHandlerBase,
|
17
|
+
MoveActionHandler,
|
18
|
+
SmeltActionHandler,
|
19
|
+
)
|
16
20
|
from plancraft.environment.env import (
|
17
21
|
PlancraftEnvironment,
|
18
22
|
get_objective_str,
|
19
23
|
target_and_inventory_to_text_obs,
|
20
24
|
)
|
21
|
-
from plancraft.environment.search import gold_search_recipe
|
22
|
-
from plancraft.models import get_model
|
23
25
|
from plancraft.utils import History
|
26
|
+
from plancraft.models.base import PlancraftBaseModel
|
24
27
|
|
25
28
|
|
26
29
|
class Evaluator:
|
@@ -35,12 +38,18 @@ class Evaluator:
|
|
35
38
|
Finally, it also saves the results of the evaluation and the images generated during the evaluation.
|
36
39
|
"""
|
37
40
|
|
38
|
-
def __init__(
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
cfg: EvalConfig,
|
44
|
+
run_name: str,
|
45
|
+
model: PlancraftBaseModel,
|
46
|
+
actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
|
47
|
+
):
|
39
48
|
self.cfg = cfg
|
40
|
-
self.
|
41
|
-
|
42
|
-
)
|
49
|
+
self.run_name = run_name
|
50
|
+
self.output_dir = f"{cfg.plancraft.output_dir}/{run_name}/{cfg.plancraft.split}"
|
43
51
|
self.generation_number = 0
|
52
|
+
self.actions = actions
|
44
53
|
|
45
54
|
# load all examples
|
46
55
|
self.examples: list[PlancraftExample] = self.load_dataset(cfg.plancraft.split)
|
@@ -53,7 +62,7 @@ class Evaluator:
|
|
53
62
|
|
54
63
|
# initialise history/dialogue tracking
|
55
64
|
self.history = History(
|
56
|
-
|
65
|
+
actions=actions,
|
57
66
|
use_multimodal_content_format=cfg.plancraft.use_multimodal_content_format,
|
58
67
|
use_images=cfg.plancraft.use_images,
|
59
68
|
use_text_inventory=cfg.plancraft.use_text_inventory,
|
@@ -61,45 +70,7 @@ class Evaluator:
|
|
61
70
|
)
|
62
71
|
|
63
72
|
# load model
|
64
|
-
self.model =
|
65
|
-
|
66
|
-
def evaluator_name(self) -> str:
|
67
|
-
if self.cfg.plancraft.use_text_inventory and self.cfg.plancraft.use_images:
|
68
|
-
name_str = "both"
|
69
|
-
elif self.cfg.plancraft.use_images:
|
70
|
-
name_str = "images"
|
71
|
-
elif self.cfg.plancraft.use_text_inventory:
|
72
|
-
name_str = "text"
|
73
|
-
else:
|
74
|
-
raise ValueError(
|
75
|
-
"At least one of use_text_inventory or use_images should be True"
|
76
|
-
)
|
77
|
-
|
78
|
-
if self.cfg.plancraft.use_fasterrcnn:
|
79
|
-
name_str += "_fasterrcnn"
|
80
|
-
|
81
|
-
model_name = self.cfg.plancraft.model.split("/")[-1]
|
82
|
-
if self.cfg.plancraft.adapter != "":
|
83
|
-
model_name = self.cfg.plancraft.adapter.split("/")[-1]
|
84
|
-
|
85
|
-
mode = self.cfg.plancraft.mode
|
86
|
-
if mode in ["dummy", "oracle"]:
|
87
|
-
return f"{mode}_{name_str}"
|
88
|
-
|
89
|
-
valid_actions_to_str = {
|
90
|
-
"move": "m",
|
91
|
-
"smelt": "s",
|
92
|
-
"think": "t",
|
93
|
-
"search": "se",
|
94
|
-
"impossible": "i",
|
95
|
-
}
|
96
|
-
actions = "|".join(
|
97
|
-
[
|
98
|
-
valid_actions_to_str[action]
|
99
|
-
for action in self.cfg.plancraft.valid_actions
|
100
|
-
]
|
101
|
-
)
|
102
|
-
return f"{self.cfg.plancraft.mode}_{name_str}_{model_name}_{actions}"
|
73
|
+
self.model = model
|
103
74
|
|
104
75
|
def save_results_dict(self, example: PlancraftExample, results_dict: dict):
|
105
76
|
output_dir = f"{self.output_dir}/{self.generation_number}"
|
@@ -152,48 +123,17 @@ class Evaluator:
|
|
152
123
|
return True
|
153
124
|
return False
|
154
125
|
|
155
|
-
def parse_raw_model_response(
|
156
|
-
self, content: str
|
157
|
-
) -> str | MoveAction | SmeltAction | StopAction:
|
126
|
+
def parse_raw_model_response(self, generated_text: str):
|
158
127
|
"""
|
159
|
-
Given a message and set of
|
128
|
+
Given a message and set of action handlers, parse the content to return the action
|
160
129
|
or a message if the action is not valid/requires message response
|
161
130
|
"""
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
if action == "think":
|
169
|
-
return "Ok"
|
170
|
-
elif action == "impossible":
|
171
|
-
reason = re.search(r"impossible: (.*)", content).group(1)
|
172
|
-
return StopAction(reason=reason)
|
173
|
-
elif action == "search":
|
174
|
-
search_target = re.search(r"search: (\w+)", content).group(1)
|
175
|
-
return gold_search_recipe(search_target)
|
176
|
-
else:
|
177
|
-
try:
|
178
|
-
slot_from = re.search(r" from (\[[ABCI]?\d+\])", content).group(1)
|
179
|
-
slot_to = re.search(r" to (\[[ABCI]?\d+\])", content).group(1)
|
180
|
-
quantity = re.search(r"with quantity (\d+)", content).group(1)
|
181
|
-
if action == "move":
|
182
|
-
action = MoveAction(
|
183
|
-
slot_from=slot_from,
|
184
|
-
slot_to=slot_to,
|
185
|
-
quantity=quantity,
|
186
|
-
)
|
187
|
-
else:
|
188
|
-
action = SmeltAction(
|
189
|
-
slot_from=slot_from,
|
190
|
-
slot_to=slot_to,
|
191
|
-
quantity=quantity,
|
192
|
-
)
|
193
|
-
return action
|
194
|
-
except AttributeError as e:
|
195
|
-
return f"Format Error: {e}"
|
196
|
-
return f"Only select actions from the following: {', '.join(self.cfg.plancraft.valid_actions)}"
|
131
|
+
for handler in self.actions:
|
132
|
+
match_output = handler.match(generated_text)
|
133
|
+
if match_output:
|
134
|
+
return match_output
|
135
|
+
action_names = [handler.action_name for handler in self.actions]
|
136
|
+
return f"Only select actions from the following: {', '.join(action_names)}"
|
197
137
|
|
198
138
|
def convert_observation_to_message(
|
199
139
|
self,
|
@@ -230,11 +170,8 @@ class Evaluator:
|
|
230
170
|
return {"content": content_list}
|
231
171
|
|
232
172
|
def eval_example(self, example: PlancraftExample) -> dict:
|
233
|
-
"""
|
234
|
-
|
235
|
-
run the episode until success or termination.
|
236
|
-
Termination can happen from: early stopping (stuck) / max_steps / stop_action
|
237
|
-
"""
|
173
|
+
"""Given the loaded model and an example from Plancraft
|
174
|
+
run the episode until success or termination."""
|
238
175
|
success = False
|
239
176
|
num_non_env_actions = 0
|
240
177
|
self.reset(example)
|
@@ -346,7 +283,7 @@ class Evaluator:
|
|
346
283
|
f"Running evaluation over {len(self.examples)} examples {self.cfg.plancraft.num_generations} times."
|
347
284
|
)
|
348
285
|
run_name = (
|
349
|
-
f"{self.
|
286
|
+
f"{self.run_name} {self.cfg.plancraft.split}".replace(" ", "_")
|
350
287
|
.replace(".", "_")
|
351
288
|
.strip()
|
352
289
|
)
|
plancraft/models/oracle.py
CHANGED
@@ -38,8 +38,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
|
|
38
38
|
if slot == from_slot:
|
39
39
|
continue
|
40
40
|
item_type = item["type"]
|
41
|
-
# if item["quantity"] == 0:
|
42
|
-
# item_type = "air"
|
43
41
|
if item_type not in type_to_slot:
|
44
42
|
type_to_slot[item_type] = [slot]
|
45
43
|
else:
|
@@ -57,12 +55,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
|
|
57
55
|
<= MAX_STACK_SIZE[from_item_type]
|
58
56
|
):
|
59
57
|
return slot
|
60
|
-
# if there is a free slot with air
|
61
|
-
# if "air" in type_to_slot:
|
62
|
-
# for slot in type_to_slot["air"]:
|
63
|
-
# if slot > 10:
|
64
|
-
# return slot
|
65
|
-
|
66
58
|
if len(empty_slots) > 0:
|
67
59
|
return empty_slots.pop()
|
68
60
|
|
@@ -80,8 +72,6 @@ def get_inventory_counter(inventory: dict) -> Counter:
|
|
80
72
|
for slot, item in inventory.items():
|
81
73
|
if slot == 0:
|
82
74
|
continue
|
83
|
-
# if item["type"] == "air":
|
84
|
-
# continue
|
85
75
|
counter[item["type"]] += item["quantity"]
|
86
76
|
return counter
|
87
77
|
|
@@ -170,7 +160,6 @@ class OracleModel(PlancraftBaseModel):
|
|
170
160
|
|
171
161
|
if isinstance(plan_recipe, ShapelessRecipe):
|
172
162
|
crafting_slot = 1
|
173
|
-
|
174
163
|
# add each item to crafting slots
|
175
164
|
for item, quantity in items_to_use_counter.items():
|
176
165
|
n = 0
|
plancraft/utils.py
CHANGED
@@ -7,6 +7,7 @@ import torch
|
|
7
7
|
from loguru import logger
|
8
8
|
|
9
9
|
from plancraft.environment.actions import (
|
10
|
+
ActionHandlerBase,
|
10
11
|
MoveAction,
|
11
12
|
SmeltAction,
|
12
13
|
)
|
@@ -21,21 +22,21 @@ class History:
|
|
21
22
|
"""
|
22
23
|
History class to keep track of dialogue, actions, inventory and images
|
23
24
|
Args:
|
24
|
-
valid_actions: list of valid actions
|
25
|
+
valid_actions: list of valid actions names
|
25
26
|
initial_dialogue: list of dialogue messages
|
26
27
|
use_multimodal_content_format: whether to use multimodal content format (list of content with types)
|
27
28
|
"""
|
28
29
|
|
29
30
|
def __init__(
|
30
31
|
self,
|
31
|
-
|
32
|
+
actions: list[ActionHandlerBase] = [],
|
32
33
|
use_multimodal_content_format=False,
|
33
34
|
few_shot=False,
|
34
35
|
use_images=False,
|
35
36
|
use_text_inventory=False,
|
36
37
|
resolution="high",
|
37
38
|
):
|
38
|
-
self.
|
39
|
+
self.action_handlers = actions
|
39
40
|
self.use_multimodal_content_format = use_multimodal_content_format
|
40
41
|
self.few_shot = few_shot
|
41
42
|
self.use_images = use_images
|
@@ -58,7 +59,7 @@ class History:
|
|
58
59
|
|
59
60
|
def system_prompt(self):
|
60
61
|
# kept separate from dialogue history because certain models deal with system prompt differently
|
61
|
-
system_prompt_text = get_system_prompt(self.
|
62
|
+
system_prompt_text = get_system_prompt(handlers=self.action_handlers)
|
62
63
|
if self.use_multimodal_content_format:
|
63
64
|
return {
|
64
65
|
"role": "system",
|
@@ -75,7 +76,7 @@ class History:
|
|
75
76
|
|
76
77
|
if self.few_shot:
|
77
78
|
self.prompt_examples = get_prompt_example(
|
78
|
-
self.
|
79
|
+
self.action_handlers,
|
79
80
|
use_text_inventory=self.use_text_inventory,
|
80
81
|
use_multimodal_content_format=self.use_multimodal_content_format,
|
81
82
|
use_images=self.use_images,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: plancraft
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.1
|
4
4
|
Summary: Plancraft: an evaluation dataset for planning with LLM agents
|
5
5
|
License: MIT License
|
6
6
|
|
@@ -25,7 +25,6 @@ License: MIT License
|
|
25
25
|
SOFTWARE.
|
26
26
|
License-File: LICENSE
|
27
27
|
Requires-Python: >=3.9
|
28
|
-
Requires-Dist: accelerate>=1.1.1
|
29
28
|
Requires-Dist: hydra-core>=1.3.2
|
30
29
|
Requires-Dist: imageio>=2.36.0
|
31
30
|
Requires-Dist: loguru
|
@@ -65,7 +64,7 @@ Description-Content-Type: text/markdown
|
|
65
64
|
|
66
65
|
[Paper](https://arxiv.org/abs/2412.21033) | [Website](https://gautierdag.github.io/plancraft/)
|
67
66
|
|
68
|
-
Plancraft is a minecraft environment and agent that innovates on planning LLM agents with
|
67
|
+
Plancraft is a minecraft environment and agent that innovates on planning LLM agents with an oracle RAG retriever.
|
69
68
|
|
70
69
|
You can install the package by running the following command:
|
71
70
|
|
@@ -123,8 +122,10 @@ from plancraft.config import EvalConfig
|
|
123
122
|
def main():
|
124
123
|
# Create the config
|
125
124
|
config = EvalConfig(...)
|
125
|
+
# create model -- Note you can create your own model by subclassing PlancraftBaseModel
|
126
|
+
model = get_model(config)
|
126
127
|
# Create the evaluator
|
127
|
-
evaluator = Evaluator(config)
|
128
|
+
evaluator = Evaluator(config, model=model)
|
128
129
|
# Evaluate the agent
|
129
130
|
evaluator.eval_all_seeds()
|
130
131
|
```
|
@@ -207,7 +208,7 @@ The observation returned by the `Evaluator` class is a dictionary with the follo
|
|
207
208
|
|
208
209
|
To implement a model, you need to subclass the `PlancraftBaseModel` class and implement the `step` and `reset` method. See the `plancraft.models.dummy` module for an example of how to implement a basic model.
|
209
210
|
|
210
|
-
You
|
211
|
+
You should then be able to use the `Evaluator` class to evaluate it.
|
211
212
|
|
212
213
|
## Reproducing the Results tables in the paper
|
213
214
|
|
@@ -225,10 +226,13 @@ The image is available on [Docker Hub](https://hub.docker.com/r/gautierdag/planc
|
|
225
226
|
|
226
227
|
## To Do
|
227
228
|
|
229
|
+
Non-exhaustive list of things to do from highest to lowest priority:
|
230
|
+
|
231
|
+
- [ ] Add minecraft wiki scrape and non-oracle search for pages
|
232
|
+
- [ ] Improve planner to bring closer to optimal (the oracle planner does not consider future crafting steps when moving items -- see paper for more details)
|
228
233
|
- [ ] Rerun image models with better bounding box model
|
229
234
|
- [ ] Track bounding box accuracy
|
230
|
-
- [ ]
|
231
|
-
- [ ] Add minecraft wiki scrape and non-oracle search for pages
|
235
|
+
- [ ] Implement a version of the image environment entirely on cuda/pytorch rather than cpu
|
232
236
|
|
233
237
|
## PRs Welcomed
|
234
238
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
plancraft/config.py,sha256=
|
3
|
-
plancraft/evaluator.py,sha256=
|
2
|
+
plancraft/config.py,sha256=HNHFDewz_0IF1EiPoS8B_ND5JfQvWjE4-0MbX-xvsRQ,4215
|
3
|
+
plancraft/evaluator.py,sha256=0J1Mk-n5Y_7L-WhuH6UpoMWhMnGtdFAGW-aqZDhuhLk,13844
|
4
4
|
plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
|
5
|
-
plancraft/utils.py,sha256=
|
5
|
+
plancraft/utils.py,sha256=7VWKVlDhoMacRypRRSKM1K3hwwJ0nHR3zyx9jZH1C1g,7042
|
6
6
|
plancraft/data/test.json,sha256=7ozxAb-PzoaOMQbMMh52RvN0pQBor6aAUwMrtc2C-y0,1670677
|
7
7
|
plancraft/data/test.small.easy.json,sha256=IsrnRUACUWUdq2_BKGw_H2GptstqmFw66y0Grwmrwj8,238854
|
8
8
|
plancraft/data/test.small.json,sha256=RnPJJf_wLhdUQydrQo0H4KJvcD5PkSEVy5Bbi--Il2U,342843
|
@@ -10,15 +10,15 @@ plancraft/data/train.json,sha256=pdArGse10i6Dg5Oa56EJPH_fOmotVzv2q5LPJpmS_bQ,342
|
|
10
10
|
plancraft/data/val.json,sha256=bfVFVQ_dmDSTCLojRkv1XIlct5zkwSg4AzsMp0gUUGI,1654481
|
11
11
|
plancraft/data/val.small.easy.json,sha256=vgBotEu-mH8441jUyCN_6DZIRX1O5SpZatdmK-I7yNA,240202
|
12
12
|
plancraft/data/val.small.json,sha256=WO7xerSWVOPcnLH1_MBiWwdHmqWP0DDGMhuF2RycBRo,300799
|
13
|
-
plancraft/environment/__init__.py,sha256=
|
14
|
-
plancraft/environment/actions.py,sha256=
|
15
|
-
plancraft/environment/env.py,sha256=
|
13
|
+
plancraft/environment/__init__.py,sha256=XFsFny4lH195AwAmL-WeCaF9ZCMgc7IgXIwhQ8FTdgE,505
|
14
|
+
plancraft/environment/actions.py,sha256=D9QqBW7yWsbWCjxNyWp61Xtb0c6EtyXk3PZ1I8SRoBQ,9381
|
15
|
+
plancraft/environment/env.py,sha256=_VQewLUv8YpKLaNp9uye25lq4HFHd9ddTQr1Lqv4eOs,16290
|
16
16
|
plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
|
17
17
|
plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
|
18
|
-
plancraft/environment/prompts.py,sha256=
|
18
|
+
plancraft/environment/prompts.py,sha256=OKxiv02NIhRk5FZJUEDRLkVWVMc-aXKJi7i7X61uUmk,6633
|
19
19
|
plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
|
20
20
|
plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
|
21
|
-
plancraft/environment/search.py,sha256=
|
21
|
+
plancraft/environment/search.py,sha256=uFHpLvW40rMKOxDabcyWrpOrhKLDZqAJOF_jew4_WXk,1837
|
22
22
|
plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
|
23
23
|
plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
|
24
24
|
plancraft/environment/assets/table.png,sha256=IKIViZKAPyR4FWnS0JP9AZ19vIEO3qoS5-YRGAO1ow8,5430
|
@@ -1917,10 +1917,10 @@ plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
|
|
1917
1917
|
plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
|
1918
1918
|
plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
|
1919
1919
|
plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
|
1920
|
-
plancraft/models/oracle.py,sha256=
|
1920
|
+
plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
|
1921
1921
|
plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
|
1922
1922
|
plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
|
1923
|
-
plancraft-0.3.
|
1924
|
-
plancraft-0.3.
|
1925
|
-
plancraft-0.3.
|
1926
|
-
plancraft-0.3.
|
1923
|
+
plancraft-0.3.1.dist-info/METADATA,sha256=KKsWXHGTbWBXplk1E5F0b_AJvAAu7K91k5sR3eLtKM4,11306
|
1924
|
+
plancraft-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
1925
|
+
plancraft-0.3.1.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
|
1926
|
+
plancraft-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|