plancraft 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- plancraft/config.py +1 -10
- plancraft/environment/__init__.py +21 -0
- plancraft/environment/actions.py +152 -0
- plancraft/environment/env.py +3 -1
- plancraft/environment/prompts.py +29 -49
- plancraft/environment/search.py +28 -1
- plancraft/evaluator.py +30 -93
- plancraft/models/oracle.py +0 -11
- plancraft/utils.py +6 -5
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/METADATA +11 -7
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/RECORD +13 -13
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/WHEEL +0 -0
- {plancraft-0.3.0.dist-info → plancraft-0.3.1.dist-info}/licenses/LICENSE +0 -0
plancraft/config.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import Literal, Optional, Union
|
2
2
|
|
3
|
-
from pydantic import BaseModel
|
3
|
+
from pydantic import BaseModel
|
4
4
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
5
5
|
|
6
6
|
from plancraft.environment.recipes import RECIPES
|
@@ -40,15 +40,6 @@ class PlancraftConfig(BaseModel):
|
|
40
40
|
False # whether to use multimodal content format
|
41
41
|
)
|
42
42
|
|
43
|
-
@model_validator(mode="after")
|
44
|
-
def validate(self):
|
45
|
-
assert set(
|
46
|
-
self.valid_actions
|
47
|
-
).issubset(
|
48
|
-
{"move", "smelt", "think", "search", "impossible"}
|
49
|
-
), "valid_actions should be subset of {'move', 'smelt', 'think', 'search', 'impossible'}"
|
50
|
-
return self
|
51
|
-
|
52
43
|
|
53
44
|
class WandbConfig(BaseModel):
|
54
45
|
project: str
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from .actions import (
|
2
|
+
ImpossibleActionHandler,
|
3
|
+
MoveActionHandler,
|
4
|
+
SmeltActionHandler,
|
5
|
+
ThinkActionHandler,
|
6
|
+
convert_from_slot_index,
|
7
|
+
convert_to_slot_index,
|
8
|
+
)
|
9
|
+
from .env import PlancraftEnvironment
|
10
|
+
from .search import GoldSearchActionHandler
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
"ImpossibleActionHandler",
|
14
|
+
"MoveActionHandler",
|
15
|
+
"SmeltActionHandler",
|
16
|
+
"ThinkActionHandler",
|
17
|
+
"PlancraftEnvironment",
|
18
|
+
"GoldSearchActionHandler",
|
19
|
+
"convert_from_slot_index",
|
20
|
+
"convert_to_slot_index",
|
21
|
+
]
|
plancraft/environment/actions.py
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
import abc
|
2
|
+
import re
|
3
|
+
from typing import Optional
|
4
|
+
|
1
5
|
from pydantic import BaseModel, field_validator, model_validator
|
2
6
|
|
3
7
|
|
@@ -40,6 +44,39 @@ def convert_from_slot_index(slot_index: int) -> str:
|
|
40
44
|
return f"[I{slot_index-9}]"
|
41
45
|
|
42
46
|
|
47
|
+
class ActionHandlerBase(abc.ABC):
|
48
|
+
@property
|
49
|
+
@abc.abstractmethod
|
50
|
+
def prompt_description(self) -> str:
|
51
|
+
"""
|
52
|
+
Return the prompt description for the model
|
53
|
+
"""
|
54
|
+
raise NotImplementedError()
|
55
|
+
|
56
|
+
@property
|
57
|
+
@abc.abstractmethod
|
58
|
+
def prompt_format_example(self) -> str:
|
59
|
+
"""
|
60
|
+
Return the prompt format example for the model
|
61
|
+
"""
|
62
|
+
raise NotImplementedError()
|
63
|
+
|
64
|
+
@property
|
65
|
+
@abc.abstractmethod
|
66
|
+
def action_name(self) -> str:
|
67
|
+
"""
|
68
|
+
Return the action name for the model
|
69
|
+
"""
|
70
|
+
raise NotImplementedError()
|
71
|
+
|
72
|
+
@abc.abstractmethod
|
73
|
+
def match(self, generated_text: str):
|
74
|
+
"""
|
75
|
+
Match the generated text to the action/tool
|
76
|
+
"""
|
77
|
+
raise NotImplementedError()
|
78
|
+
|
79
|
+
|
43
80
|
class MoveAction(BaseModel):
|
44
81
|
""" "Moves an item from one slot to another"""
|
45
82
|
|
@@ -152,3 +189,118 @@ class StopAction(BaseModel):
|
|
152
189
|
|
153
190
|
# when symbolic action is true, can either move objects around or smelt
|
154
191
|
SymbolicAction = MoveAction | SmeltAction
|
192
|
+
|
193
|
+
|
194
|
+
class MoveActionHandler(ActionHandlerBase):
|
195
|
+
@property
|
196
|
+
def prompt_description(self) -> str:
|
197
|
+
return "Transfer a specific quantity of an item from one slot to another"
|
198
|
+
|
199
|
+
@property
|
200
|
+
def prompt_format_example(self) -> str:
|
201
|
+
return "`move: from [Source] to [Target] with quantity N`"
|
202
|
+
|
203
|
+
@property
|
204
|
+
def action_name(self) -> str:
|
205
|
+
return "move"
|
206
|
+
|
207
|
+
def match(self, generated_text: str) -> Optional[MoveAction | str]:
|
208
|
+
"""
|
209
|
+
Parse the raw model response to a MoveAction
|
210
|
+
"""
|
211
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
212
|
+
if not action_match:
|
213
|
+
return
|
214
|
+
try:
|
215
|
+
slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
|
216
|
+
slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
|
217
|
+
quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
|
218
|
+
action = MoveAction(
|
219
|
+
slot_from=slot_from,
|
220
|
+
slot_to=slot_to,
|
221
|
+
quantity=quantity,
|
222
|
+
)
|
223
|
+
return action
|
224
|
+
except AttributeError as e:
|
225
|
+
return f"Format Error: {e}"
|
226
|
+
|
227
|
+
|
228
|
+
class SmeltActionHandler(ActionHandlerBase):
|
229
|
+
@property
|
230
|
+
def prompt_description(self) -> str:
|
231
|
+
return "Smelt an item in a furnace and moves the output to a specific slot"
|
232
|
+
|
233
|
+
@property
|
234
|
+
def prompt_format_example(self) -> str:
|
235
|
+
return "`smelt: from [Source] to [Target] with quantity N`"
|
236
|
+
|
237
|
+
@property
|
238
|
+
def action_name(self) -> str:
|
239
|
+
return "smelt"
|
240
|
+
|
241
|
+
def match(self, generated_text: str) -> Optional[SmeltAction | str]:
|
242
|
+
"""
|
243
|
+
Parse the raw model response to a SmeltAction
|
244
|
+
"""
|
245
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
246
|
+
if not action_match:
|
247
|
+
return
|
248
|
+
try:
|
249
|
+
slot_from = re.search(r" from (\[[ABCI]?\d+\])", generated_text).group(1)
|
250
|
+
slot_to = re.search(r" to (\[[ABCI]?\d+\])", generated_text).group(1)
|
251
|
+
quantity = re.search(r"with quantity (\d+)", generated_text).group(1)
|
252
|
+
action = SmeltAction(
|
253
|
+
slot_from=slot_from,
|
254
|
+
slot_to=slot_to,
|
255
|
+
quantity=quantity,
|
256
|
+
)
|
257
|
+
return action
|
258
|
+
except AttributeError as e:
|
259
|
+
return f"Format Error: {e}"
|
260
|
+
|
261
|
+
|
262
|
+
class ImpossibleActionHandler(ActionHandlerBase):
|
263
|
+
@property
|
264
|
+
def prompt_description(self) -> str:
|
265
|
+
return "Stop task if it is certain that it is impossible with given inventory"
|
266
|
+
|
267
|
+
@property
|
268
|
+
def prompt_format_example(self) -> str:
|
269
|
+
return "`impossible: <reason>`"
|
270
|
+
|
271
|
+
@property
|
272
|
+
def action_name(self) -> str:
|
273
|
+
return "impossible"
|
274
|
+
|
275
|
+
def match(self, generated_text) -> Optional[StopAction]:
|
276
|
+
"""
|
277
|
+
Parse the raw model response to a StopAction
|
278
|
+
"""
|
279
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
280
|
+
if not action_match:
|
281
|
+
return
|
282
|
+
reason = re.search(r"impossible: (.*)", generated_text).group(1)
|
283
|
+
return StopAction(reason=reason)
|
284
|
+
|
285
|
+
|
286
|
+
class ThinkActionHandler(ActionHandlerBase):
|
287
|
+
@property
|
288
|
+
def prompt_description(self) -> str:
|
289
|
+
return "Generate thoughts to help you decide on the next action"
|
290
|
+
|
291
|
+
@property
|
292
|
+
def prompt_format_example(self) -> str:
|
293
|
+
return "`think: <thought message>`"
|
294
|
+
|
295
|
+
@property
|
296
|
+
def action_name(self) -> str:
|
297
|
+
return "think"
|
298
|
+
|
299
|
+
def match(self, generated_text) -> Optional[str]:
|
300
|
+
"""
|
301
|
+
Parse the raw model response to a ThinkAction
|
302
|
+
"""
|
303
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
304
|
+
if not action_match:
|
305
|
+
return
|
306
|
+
return "Ok"
|
plancraft/environment/env.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import glob
|
2
2
|
import os
|
3
|
-
from collections import defaultdict
|
4
3
|
from typing import Literal, Optional
|
5
4
|
|
6
5
|
import numpy as np
|
@@ -323,6 +322,9 @@ class PlancraftEnvironment:
|
|
323
322
|
# not enough
|
324
323
|
if self.slot_empty(slot_from) or self.state[slot_from]["quantity"] < quantity:
|
325
324
|
return
|
325
|
+
# if craft slot - must take all
|
326
|
+
if slot_from == 0 and self.state[slot_from]["quantity"] != quantity:
|
327
|
+
return
|
326
328
|
|
327
329
|
item = self.state[slot_from]
|
328
330
|
|
plancraft/environment/prompts.py
CHANGED
@@ -2,31 +2,11 @@ import numpy as np
|
|
2
2
|
|
3
3
|
from plancraft.environment.env import PlancraftEnvironment
|
4
4
|
from plancraft.environment.search import gold_search_recipe
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
"description": "Transfer a specific quantity of an item from one slot to another",
|
11
|
-
"format": "`move: from [Source] to [Target] with quantity N`",
|
12
|
-
},
|
13
|
-
"smelt": {
|
14
|
-
"description": "Smelt an item in a furnace and moves the output to a specific slot",
|
15
|
-
"format": "`smelt: from [Source] to [Target] with quantity N`",
|
16
|
-
},
|
17
|
-
"think": {
|
18
|
-
"description": "Generate thoughts to help you decide on the next action",
|
19
|
-
"format": "`think: <thought message>`",
|
20
|
-
},
|
21
|
-
"search": {
|
22
|
-
"description": "Search for a recipe to craft a specific item",
|
23
|
-
"format": "`search: <recipe name>`",
|
24
|
-
},
|
25
|
-
"impossible": {
|
26
|
-
"description": "Stop task if it is certain that it is impossible with given inventory",
|
27
|
-
"format": "`impossible: <reason>`",
|
28
|
-
},
|
29
|
-
}
|
5
|
+
from plancraft.environment.actions import (
|
6
|
+
ActionHandlerBase,
|
7
|
+
MoveActionHandler,
|
8
|
+
SmeltActionHandler,
|
9
|
+
)
|
30
10
|
|
31
11
|
BASE_SYSTEM_PROMPT = """You are crafting in Minecraft. You need to decide on the next action.
|
32
12
|
|
@@ -48,23 +28,6 @@ Constraints:
|
|
48
28
|
- If an item is not in slot [0] then the recipe is incorrect
|
49
29
|
- You need to move items from [0] to a free inventory slot to complete the crafting process"""
|
50
30
|
|
51
|
-
|
52
|
-
def get_system_prompt(actions: list[str]):
|
53
|
-
assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
|
54
|
-
assert "move" in actions, "move should be one of the actions"
|
55
|
-
assert "smelt" in actions, "smelt should be one of the actions"
|
56
|
-
|
57
|
-
descriptions = ""
|
58
|
-
for action in actions:
|
59
|
-
descriptions += f"\n\t- {action}: {ACTIONS_DESCRIPTIONS[action]['description']}"
|
60
|
-
|
61
|
-
output_format = ""
|
62
|
-
for action in actions:
|
63
|
-
output_format += f"\n\t- {ACTIONS_DESCRIPTIONS[action]['format']}"
|
64
|
-
|
65
|
-
return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
|
66
|
-
|
67
|
-
|
68
31
|
CRAFTING_STEPS = [
|
69
32
|
"Craft an item of type: andesite\ninventory:\n - diorite [I18] quantity 1\n - cobblestone [I30] quantity 1",
|
70
33
|
"Craft an item of type: andesite\ninventory:\n - diorite [B1] quantity 1\n - cobblestone [I30] quantity 1",
|
@@ -94,8 +57,26 @@ SEARCH_STEPS = [
|
|
94
57
|
]
|
95
58
|
|
96
59
|
|
60
|
+
def get_system_prompt(
|
61
|
+
handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
|
62
|
+
):
|
63
|
+
action_names = [handler.action_name for handler in handlers]
|
64
|
+
assert "move" in action_names, "MoveActionHandler should be one of the handlers"
|
65
|
+
assert "smelt" in action_names, "SmeltActionHandler should be one of the handlers"
|
66
|
+
|
67
|
+
descriptions = ""
|
68
|
+
for handler in handlers:
|
69
|
+
descriptions += f"\n\t- {handler.action_name}: {handler.prompt_description}"
|
70
|
+
|
71
|
+
output_format = ""
|
72
|
+
for handler in handlers:
|
73
|
+
output_format += f"\n\t- {handler.prompt_format_example}"
|
74
|
+
|
75
|
+
return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
|
76
|
+
|
77
|
+
|
97
78
|
def get_prompt_example(
|
98
|
-
|
79
|
+
handlers: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
|
99
80
|
use_text_inventory=True,
|
100
81
|
use_multimodal_content_format=False,
|
101
82
|
use_images=False,
|
@@ -103,10 +84,9 @@ def get_prompt_example(
|
|
103
84
|
"""
|
104
85
|
Generates a few-shot prompt for the crafting task
|
105
86
|
"""
|
106
|
-
|
107
|
-
assert
|
108
|
-
assert "
|
109
|
-
assert "smelt" in actions, "smelt should be one of the actions"
|
87
|
+
handler_names = [handler.action_name for handler in handlers]
|
88
|
+
assert "move" in handler_names, "move should be one of the actions"
|
89
|
+
assert "smelt" in handler_names, "smelt should be one of the actions"
|
110
90
|
|
111
91
|
if use_images:
|
112
92
|
assert (
|
@@ -120,12 +100,12 @@ def get_prompt_example(
|
|
120
100
|
text = text.split("\ninventory:\n")[0]
|
121
101
|
|
122
102
|
example_dialogue.append({"role": "user", "content": text})
|
123
|
-
if "search" in
|
103
|
+
if "search" in handler_names and SEARCH_STEPS[i]:
|
124
104
|
example_dialogue.append({"role": "assistant", "content": SEARCH_STEPS[i]})
|
125
105
|
search_target = text.split("seach: ")[-1].strip()
|
126
106
|
search_response = gold_search_recipe(search_target)
|
127
107
|
example_dialogue.append({"role": "user", "content": search_response})
|
128
|
-
if "think" in
|
108
|
+
if "think" in handler_names:
|
129
109
|
example_dialogue.append({"role": "assistant", "content": THINK_STEPS[i]})
|
130
110
|
example_dialogue.append({"role": "user", "content": "Ok"})
|
131
111
|
example_dialogue.append({"role": "assistant", "content": BASE_ACTION_STEPS[i]})
|
plancraft/environment/search.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
|
1
|
+
import re
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from plancraft.environment.actions import convert_from_slot_index, ActionHandlerBase
|
2
5
|
from plancraft.environment.recipes import RECIPES
|
3
6
|
|
4
7
|
|
@@ -24,3 +27,27 @@ def gold_search_recipe(recipe_name: str) -> str:
|
|
24
27
|
recipe_instructions = f"smelt {r.ingredient}\n"
|
25
28
|
out_string += f"recipe {i+1}:\n{recipe_instructions}"
|
26
29
|
return out_string
|
30
|
+
|
31
|
+
|
32
|
+
class GoldSearchActionHandler(ActionHandlerBase):
|
33
|
+
@property
|
34
|
+
def prompt_description(self) -> str:
|
35
|
+
return "Search for recipes to craft a specific item"
|
36
|
+
|
37
|
+
@property
|
38
|
+
def prompt_format_example(self) -> str:
|
39
|
+
return "`search: <recipe name>`"
|
40
|
+
|
41
|
+
@property
|
42
|
+
def action_name(self) -> str:
|
43
|
+
return "search"
|
44
|
+
|
45
|
+
def match(self, generated_text) -> Optional[str]:
|
46
|
+
"""
|
47
|
+
Parse the raw model response to a SearchAction
|
48
|
+
"""
|
49
|
+
action_match = re.search(f"({self.action_name}):", generated_text)
|
50
|
+
if not action_match:
|
51
|
+
return
|
52
|
+
search_target = re.search(r"search: (\w+)", generated_text).group(1)
|
53
|
+
return gold_search_recipe(search_target)
|
plancraft/evaluator.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
3
|
import random
|
4
|
-
import re
|
5
4
|
import string
|
6
5
|
import time
|
7
6
|
|
@@ -12,15 +11,19 @@ from tqdm import tqdm
|
|
12
11
|
|
13
12
|
import wandb
|
14
13
|
from plancraft.config import EvalConfig, PlancraftExample
|
15
|
-
from plancraft.environment.actions import
|
14
|
+
from plancraft.environment.actions import (
|
15
|
+
StopAction,
|
16
|
+
ActionHandlerBase,
|
17
|
+
MoveActionHandler,
|
18
|
+
SmeltActionHandler,
|
19
|
+
)
|
16
20
|
from plancraft.environment.env import (
|
17
21
|
PlancraftEnvironment,
|
18
22
|
get_objective_str,
|
19
23
|
target_and_inventory_to_text_obs,
|
20
24
|
)
|
21
|
-
from plancraft.environment.search import gold_search_recipe
|
22
|
-
from plancraft.models import get_model
|
23
25
|
from plancraft.utils import History
|
26
|
+
from plancraft.models.base import PlancraftBaseModel
|
24
27
|
|
25
28
|
|
26
29
|
class Evaluator:
|
@@ -35,12 +38,18 @@ class Evaluator:
|
|
35
38
|
Finally, it also saves the results of the evaluation and the images generated during the evaluation.
|
36
39
|
"""
|
37
40
|
|
38
|
-
def __init__(
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
cfg: EvalConfig,
|
44
|
+
run_name: str,
|
45
|
+
model: PlancraftBaseModel,
|
46
|
+
actions: list[ActionHandlerBase] = [MoveActionHandler(), SmeltActionHandler()],
|
47
|
+
):
|
39
48
|
self.cfg = cfg
|
40
|
-
self.
|
41
|
-
|
42
|
-
)
|
49
|
+
self.run_name = run_name
|
50
|
+
self.output_dir = f"{cfg.plancraft.output_dir}/{run_name}/{cfg.plancraft.split}"
|
43
51
|
self.generation_number = 0
|
52
|
+
self.actions = actions
|
44
53
|
|
45
54
|
# load all examples
|
46
55
|
self.examples: list[PlancraftExample] = self.load_dataset(cfg.plancraft.split)
|
@@ -53,7 +62,7 @@ class Evaluator:
|
|
53
62
|
|
54
63
|
# initialise history/dialogue tracking
|
55
64
|
self.history = History(
|
56
|
-
|
65
|
+
actions=actions,
|
57
66
|
use_multimodal_content_format=cfg.plancraft.use_multimodal_content_format,
|
58
67
|
use_images=cfg.plancraft.use_images,
|
59
68
|
use_text_inventory=cfg.plancraft.use_text_inventory,
|
@@ -61,45 +70,7 @@ class Evaluator:
|
|
61
70
|
)
|
62
71
|
|
63
72
|
# load model
|
64
|
-
self.model =
|
65
|
-
|
66
|
-
def evaluator_name(self) -> str:
|
67
|
-
if self.cfg.plancraft.use_text_inventory and self.cfg.plancraft.use_images:
|
68
|
-
name_str = "both"
|
69
|
-
elif self.cfg.plancraft.use_images:
|
70
|
-
name_str = "images"
|
71
|
-
elif self.cfg.plancraft.use_text_inventory:
|
72
|
-
name_str = "text"
|
73
|
-
else:
|
74
|
-
raise ValueError(
|
75
|
-
"At least one of use_text_inventory or use_images should be True"
|
76
|
-
)
|
77
|
-
|
78
|
-
if self.cfg.plancraft.use_fasterrcnn:
|
79
|
-
name_str += "_fasterrcnn"
|
80
|
-
|
81
|
-
model_name = self.cfg.plancraft.model.split("/")[-1]
|
82
|
-
if self.cfg.plancraft.adapter != "":
|
83
|
-
model_name = self.cfg.plancraft.adapter.split("/")[-1]
|
84
|
-
|
85
|
-
mode = self.cfg.plancraft.mode
|
86
|
-
if mode in ["dummy", "oracle"]:
|
87
|
-
return f"{mode}_{name_str}"
|
88
|
-
|
89
|
-
valid_actions_to_str = {
|
90
|
-
"move": "m",
|
91
|
-
"smelt": "s",
|
92
|
-
"think": "t",
|
93
|
-
"search": "se",
|
94
|
-
"impossible": "i",
|
95
|
-
}
|
96
|
-
actions = "|".join(
|
97
|
-
[
|
98
|
-
valid_actions_to_str[action]
|
99
|
-
for action in self.cfg.plancraft.valid_actions
|
100
|
-
]
|
101
|
-
)
|
102
|
-
return f"{self.cfg.plancraft.mode}_{name_str}_{model_name}_{actions}"
|
73
|
+
self.model = model
|
103
74
|
|
104
75
|
def save_results_dict(self, example: PlancraftExample, results_dict: dict):
|
105
76
|
output_dir = f"{self.output_dir}/{self.generation_number}"
|
@@ -152,48 +123,17 @@ class Evaluator:
|
|
152
123
|
return True
|
153
124
|
return False
|
154
125
|
|
155
|
-
def parse_raw_model_response(
|
156
|
-
self, content: str
|
157
|
-
) -> str | MoveAction | SmeltAction | StopAction:
|
126
|
+
def parse_raw_model_response(self, generated_text: str):
|
158
127
|
"""
|
159
|
-
Given a message and set of
|
128
|
+
Given a message and set of action handlers, parse the content to return the action
|
160
129
|
or a message if the action is not valid/requires message response
|
161
130
|
"""
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
if action == "think":
|
169
|
-
return "Ok"
|
170
|
-
elif action == "impossible":
|
171
|
-
reason = re.search(r"impossible: (.*)", content).group(1)
|
172
|
-
return StopAction(reason=reason)
|
173
|
-
elif action == "search":
|
174
|
-
search_target = re.search(r"search: (\w+)", content).group(1)
|
175
|
-
return gold_search_recipe(search_target)
|
176
|
-
else:
|
177
|
-
try:
|
178
|
-
slot_from = re.search(r" from (\[[ABCI]?\d+\])", content).group(1)
|
179
|
-
slot_to = re.search(r" to (\[[ABCI]?\d+\])", content).group(1)
|
180
|
-
quantity = re.search(r"with quantity (\d+)", content).group(1)
|
181
|
-
if action == "move":
|
182
|
-
action = MoveAction(
|
183
|
-
slot_from=slot_from,
|
184
|
-
slot_to=slot_to,
|
185
|
-
quantity=quantity,
|
186
|
-
)
|
187
|
-
else:
|
188
|
-
action = SmeltAction(
|
189
|
-
slot_from=slot_from,
|
190
|
-
slot_to=slot_to,
|
191
|
-
quantity=quantity,
|
192
|
-
)
|
193
|
-
return action
|
194
|
-
except AttributeError as e:
|
195
|
-
return f"Format Error: {e}"
|
196
|
-
return f"Only select actions from the following: {', '.join(self.cfg.plancraft.valid_actions)}"
|
131
|
+
for handler in self.actions:
|
132
|
+
match_output = handler.match(generated_text)
|
133
|
+
if match_output:
|
134
|
+
return match_output
|
135
|
+
action_names = [handler.action_name for handler in self.actions]
|
136
|
+
return f"Only select actions from the following: {', '.join(action_names)}"
|
197
137
|
|
198
138
|
def convert_observation_to_message(
|
199
139
|
self,
|
@@ -230,11 +170,8 @@ class Evaluator:
|
|
230
170
|
return {"content": content_list}
|
231
171
|
|
232
172
|
def eval_example(self, example: PlancraftExample) -> dict:
|
233
|
-
"""
|
234
|
-
|
235
|
-
run the episode until success or termination.
|
236
|
-
Termination can happen from: early stopping (stuck) / max_steps / stop_action
|
237
|
-
"""
|
173
|
+
"""Given the loaded model and an example from Plancraft
|
174
|
+
run the episode until success or termination."""
|
238
175
|
success = False
|
239
176
|
num_non_env_actions = 0
|
240
177
|
self.reset(example)
|
@@ -346,7 +283,7 @@ class Evaluator:
|
|
346
283
|
f"Running evaluation over {len(self.examples)} examples {self.cfg.plancraft.num_generations} times."
|
347
284
|
)
|
348
285
|
run_name = (
|
349
|
-
f"{self.
|
286
|
+
f"{self.run_name} {self.cfg.plancraft.split}".replace(" ", "_")
|
350
287
|
.replace(".", "_")
|
351
288
|
.strip()
|
352
289
|
)
|
plancraft/models/oracle.py
CHANGED
@@ -38,8 +38,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
|
|
38
38
|
if slot == from_slot:
|
39
39
|
continue
|
40
40
|
item_type = item["type"]
|
41
|
-
# if item["quantity"] == 0:
|
42
|
-
# item_type = "air"
|
43
41
|
if item_type not in type_to_slot:
|
44
42
|
type_to_slot[item_type] = [slot]
|
45
43
|
else:
|
@@ -57,12 +55,6 @@ def find_free_inventory_slot(inventory: dict, from_slot: int) -> int:
|
|
57
55
|
<= MAX_STACK_SIZE[from_item_type]
|
58
56
|
):
|
59
57
|
return slot
|
60
|
-
# if there is a free slot with air
|
61
|
-
# if "air" in type_to_slot:
|
62
|
-
# for slot in type_to_slot["air"]:
|
63
|
-
# if slot > 10:
|
64
|
-
# return slot
|
65
|
-
|
66
58
|
if len(empty_slots) > 0:
|
67
59
|
return empty_slots.pop()
|
68
60
|
|
@@ -80,8 +72,6 @@ def get_inventory_counter(inventory: dict) -> Counter:
|
|
80
72
|
for slot, item in inventory.items():
|
81
73
|
if slot == 0:
|
82
74
|
continue
|
83
|
-
# if item["type"] == "air":
|
84
|
-
# continue
|
85
75
|
counter[item["type"]] += item["quantity"]
|
86
76
|
return counter
|
87
77
|
|
@@ -170,7 +160,6 @@ class OracleModel(PlancraftBaseModel):
|
|
170
160
|
|
171
161
|
if isinstance(plan_recipe, ShapelessRecipe):
|
172
162
|
crafting_slot = 1
|
173
|
-
|
174
163
|
# add each item to crafting slots
|
175
164
|
for item, quantity in items_to_use_counter.items():
|
176
165
|
n = 0
|
plancraft/utils.py
CHANGED
@@ -7,6 +7,7 @@ import torch
|
|
7
7
|
from loguru import logger
|
8
8
|
|
9
9
|
from plancraft.environment.actions import (
|
10
|
+
ActionHandlerBase,
|
10
11
|
MoveAction,
|
11
12
|
SmeltAction,
|
12
13
|
)
|
@@ -21,21 +22,21 @@ class History:
|
|
21
22
|
"""
|
22
23
|
History class to keep track of dialogue, actions, inventory and images
|
23
24
|
Args:
|
24
|
-
valid_actions: list of valid actions
|
25
|
+
valid_actions: list of valid actions names
|
25
26
|
initial_dialogue: list of dialogue messages
|
26
27
|
use_multimodal_content_format: whether to use multimodal content format (list of content with types)
|
27
28
|
"""
|
28
29
|
|
29
30
|
def __init__(
|
30
31
|
self,
|
31
|
-
|
32
|
+
actions: list[ActionHandlerBase] = [],
|
32
33
|
use_multimodal_content_format=False,
|
33
34
|
few_shot=False,
|
34
35
|
use_images=False,
|
35
36
|
use_text_inventory=False,
|
36
37
|
resolution="high",
|
37
38
|
):
|
38
|
-
self.
|
39
|
+
self.action_handlers = actions
|
39
40
|
self.use_multimodal_content_format = use_multimodal_content_format
|
40
41
|
self.few_shot = few_shot
|
41
42
|
self.use_images = use_images
|
@@ -58,7 +59,7 @@ class History:
|
|
58
59
|
|
59
60
|
def system_prompt(self):
|
60
61
|
# kept separate from dialogue history because certain models deal with system prompt differently
|
61
|
-
system_prompt_text = get_system_prompt(self.
|
62
|
+
system_prompt_text = get_system_prompt(handlers=self.action_handlers)
|
62
63
|
if self.use_multimodal_content_format:
|
63
64
|
return {
|
64
65
|
"role": "system",
|
@@ -75,7 +76,7 @@ class History:
|
|
75
76
|
|
76
77
|
if self.few_shot:
|
77
78
|
self.prompt_examples = get_prompt_example(
|
78
|
-
self.
|
79
|
+
self.action_handlers,
|
79
80
|
use_text_inventory=self.use_text_inventory,
|
80
81
|
use_multimodal_content_format=self.use_multimodal_content_format,
|
81
82
|
use_images=self.use_images,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: plancraft
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.1
|
4
4
|
Summary: Plancraft: an evaluation dataset for planning with LLM agents
|
5
5
|
License: MIT License
|
6
6
|
|
@@ -25,7 +25,6 @@ License: MIT License
|
|
25
25
|
SOFTWARE.
|
26
26
|
License-File: LICENSE
|
27
27
|
Requires-Python: >=3.9
|
28
|
-
Requires-Dist: accelerate>=1.1.1
|
29
28
|
Requires-Dist: hydra-core>=1.3.2
|
30
29
|
Requires-Dist: imageio>=2.36.0
|
31
30
|
Requires-Dist: loguru
|
@@ -65,7 +64,7 @@ Description-Content-Type: text/markdown
|
|
65
64
|
|
66
65
|
[Paper](https://arxiv.org/abs/2412.21033) | [Website](https://gautierdag.github.io/plancraft/)
|
67
66
|
|
68
|
-
Plancraft is a minecraft environment and agent that innovates on planning LLM agents with
|
67
|
+
Plancraft is a minecraft environment and agent that innovates on planning LLM agents with an oracle RAG retriever.
|
69
68
|
|
70
69
|
You can install the package by running the following command:
|
71
70
|
|
@@ -123,8 +122,10 @@ from plancraft.config import EvalConfig
|
|
123
122
|
def main():
|
124
123
|
# Create the config
|
125
124
|
config = EvalConfig(...)
|
125
|
+
# create model -- Note you can create your own model by subclassing PlancraftBaseModel
|
126
|
+
model = get_model(config)
|
126
127
|
# Create the evaluator
|
127
|
-
evaluator = Evaluator(config)
|
128
|
+
evaluator = Evaluator(config, model=model)
|
128
129
|
# Evaluate the agent
|
129
130
|
evaluator.eval_all_seeds()
|
130
131
|
```
|
@@ -207,7 +208,7 @@ The observation returned by the `Evaluator` class is a dictionary with the follo
|
|
207
208
|
|
208
209
|
To implement a model, you need to subclass the `PlancraftBaseModel` class and implement the `step` and `reset` method. See the `plancraft.models.dummy` module for an example of how to implement a basic model.
|
209
210
|
|
210
|
-
You
|
211
|
+
You should then be able to use the `Evaluator` class to evaluate it.
|
211
212
|
|
212
213
|
## Reproducing the Results tables in the paper
|
213
214
|
|
@@ -225,10 +226,13 @@ The image is available on [Docker Hub](https://hub.docker.com/r/gautierdag/planc
|
|
225
226
|
|
226
227
|
## To Do
|
227
228
|
|
229
|
+
Non-exhaustive list of things to do from highest to lowest priority:
|
230
|
+
|
231
|
+
- [ ] Add minecraft wiki scrape and non-oracle search for pages
|
232
|
+
- [ ] Improve planner to bring closer to optimal (the oracle planner does not consider future crafting steps when moving items -- see paper for more details)
|
228
233
|
- [ ] Rerun image models with better bounding box model
|
229
234
|
- [ ] Track bounding box accuracy
|
230
|
-
- [ ]
|
231
|
-
- [ ] Add minecraft wiki scrape and non-oracle search for pages
|
235
|
+
- [ ] Implement a version of the image environment entirely on cuda/pytorch rather than cpu
|
232
236
|
|
233
237
|
## PRs Welcomed
|
234
238
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
plancraft/config.py,sha256=
|
3
|
-
plancraft/evaluator.py,sha256=
|
2
|
+
plancraft/config.py,sha256=HNHFDewz_0IF1EiPoS8B_ND5JfQvWjE4-0MbX-xvsRQ,4215
|
3
|
+
plancraft/evaluator.py,sha256=0J1Mk-n5Y_7L-WhuH6UpoMWhMnGtdFAGW-aqZDhuhLk,13844
|
4
4
|
plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
|
5
|
-
plancraft/utils.py,sha256=
|
5
|
+
plancraft/utils.py,sha256=7VWKVlDhoMacRypRRSKM1K3hwwJ0nHR3zyx9jZH1C1g,7042
|
6
6
|
plancraft/data/test.json,sha256=7ozxAb-PzoaOMQbMMh52RvN0pQBor6aAUwMrtc2C-y0,1670677
|
7
7
|
plancraft/data/test.small.easy.json,sha256=IsrnRUACUWUdq2_BKGw_H2GptstqmFw66y0Grwmrwj8,238854
|
8
8
|
plancraft/data/test.small.json,sha256=RnPJJf_wLhdUQydrQo0H4KJvcD5PkSEVy5Bbi--Il2U,342843
|
@@ -10,15 +10,15 @@ plancraft/data/train.json,sha256=pdArGse10i6Dg5Oa56EJPH_fOmotVzv2q5LPJpmS_bQ,342
|
|
10
10
|
plancraft/data/val.json,sha256=bfVFVQ_dmDSTCLojRkv1XIlct5zkwSg4AzsMp0gUUGI,1654481
|
11
11
|
plancraft/data/val.small.easy.json,sha256=vgBotEu-mH8441jUyCN_6DZIRX1O5SpZatdmK-I7yNA,240202
|
12
12
|
plancraft/data/val.small.json,sha256=WO7xerSWVOPcnLH1_MBiWwdHmqWP0DDGMhuF2RycBRo,300799
|
13
|
-
plancraft/environment/__init__.py,sha256=
|
14
|
-
plancraft/environment/actions.py,sha256=
|
15
|
-
plancraft/environment/env.py,sha256=
|
13
|
+
plancraft/environment/__init__.py,sha256=XFsFny4lH195AwAmL-WeCaF9ZCMgc7IgXIwhQ8FTdgE,505
|
14
|
+
plancraft/environment/actions.py,sha256=D9QqBW7yWsbWCjxNyWp61Xtb0c6EtyXk3PZ1I8SRoBQ,9381
|
15
|
+
plancraft/environment/env.py,sha256=_VQewLUv8YpKLaNp9uye25lq4HFHd9ddTQr1Lqv4eOs,16290
|
16
16
|
plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
|
17
17
|
plancraft/environment/planner.py,sha256=eJExz3OxSzurIEdH9LOtMwFH9ApqMQ3CokVhmbV6Px0,3953
|
18
|
-
plancraft/environment/prompts.py,sha256=
|
18
|
+
plancraft/environment/prompts.py,sha256=OKxiv02NIhRk5FZJUEDRLkVWVMc-aXKJi7i7X61uUmk,6633
|
19
19
|
plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
|
20
20
|
plancraft/environment/sampler.py,sha256=IZT-XjmWSZrs0zDyRTMjYytXxewdwYf5YGGdKsR5ll4,7643
|
21
|
-
plancraft/environment/search.py,sha256=
|
21
|
+
plancraft/environment/search.py,sha256=uFHpLvW40rMKOxDabcyWrpOrhKLDZqAJOF_jew4_WXk,1837
|
22
22
|
plancraft/environment/assets/constants.json,sha256=kyOIOh82CTTMMGEIS60k5k6M-6fkEmYDoGAnvi3Zx5k,1379016
|
23
23
|
plancraft/environment/assets/minecraft_font.ttf,sha256=AzoK9cgggXwjFPHtIO7uz-YaDrminl3nvB-VsaTvTAk,60992
|
24
24
|
plancraft/environment/assets/table.png,sha256=IKIViZKAPyR4FWnS0JP9AZ19vIEO3qoS5-YRGAO1ow8,5430
|
@@ -1917,10 +1917,10 @@ plancraft/models/base.py,sha256=uhG1tRmsBerJzW8qHoLyLEYpveDv0co7AAhi4mSfyO4,661
|
|
1917
1917
|
plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
|
1918
1918
|
plancraft/models/dummy.py,sha256=HVuX5Y9CPNDP8Ne4BNTe2qyWdxyhIgvPIIV3OhXxzD8,1062
|
1919
1919
|
plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
|
1920
|
-
plancraft/models/oracle.py,sha256=
|
1920
|
+
plancraft/models/oracle.py,sha256=jDCE6zVFvbwFpDzQZTkHIlRwMud1yMJ4LVIdfpt5ddU,8449
|
1921
1921
|
plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
|
1922
1922
|
plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
|
1923
|
-
plancraft-0.3.
|
1924
|
-
plancraft-0.3.
|
1925
|
-
plancraft-0.3.
|
1926
|
-
plancraft-0.3.
|
1923
|
+
plancraft-0.3.1.dist-info/METADATA,sha256=KKsWXHGTbWBXplk1E5F0b_AJvAAu7K91k5sR3eLtKM4,11306
|
1924
|
+
plancraft-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
1925
|
+
plancraft-0.3.1.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
|
1926
|
+
plancraft-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|