plancraft 0.3.33__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plancraft/config.py +0 -14
- plancraft/environment/prompts.py +14 -17
- plancraft/evaluator.py +20 -5
- plancraft/models/act.py +16 -10
- plancraft/models/bbox_model.py +6 -4
- plancraft/models/generators.py +151 -125
- plancraft/models/utils.py +3 -3
- plancraft/simple.py +186 -0
- plancraft/utils.py +6 -36
- {plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/METADATA +69 -30
- {plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/RECORD +13 -12
- {plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/WHEEL +0 -0
- {plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/licenses/LICENSE +0 -0
plancraft/config.py
CHANGED
@@ -47,18 +47,6 @@ class WandbConfig(BaseModel):
|
|
47
47
|
mode: str
|
48
48
|
|
49
49
|
|
50
|
-
class LaunchConfig(BaseModel):
|
51
|
-
command: str
|
52
|
-
job_name: str
|
53
|
-
gpu_limit: int
|
54
|
-
gpu_product: str
|
55
|
-
cpu_request: int
|
56
|
-
ram_request: str
|
57
|
-
interactive: bool = False
|
58
|
-
namespace: str = "informatics"
|
59
|
-
env_vars: dict[str, dict[str, str]]
|
60
|
-
|
61
|
-
|
62
50
|
class LocalEnvSettings(BaseSettings):
|
63
51
|
hf_token: str = ""
|
64
52
|
openai_api_key: str = ""
|
@@ -72,7 +60,6 @@ class LocalEnvSettings(BaseSettings):
|
|
72
60
|
class EvalConfig(BaseModel):
|
73
61
|
plancraft: PlancraftConfig
|
74
62
|
wandb: WandbConfig
|
75
|
-
launch: LaunchConfig
|
76
63
|
env_variables: LocalEnvSettings = LocalEnvSettings()
|
77
64
|
|
78
65
|
|
@@ -107,7 +94,6 @@ class TrainingArgs(BaseModel):
|
|
107
94
|
class TrainConfig(BaseModel):
|
108
95
|
training: TrainingArgs
|
109
96
|
wandb: WandbConfig
|
110
|
-
launch: LaunchConfig
|
111
97
|
env_variables: LocalEnvSettings
|
112
98
|
|
113
99
|
|
plancraft/environment/prompts.py
CHANGED
@@ -3,6 +3,7 @@ import numpy as np
|
|
3
3
|
from plancraft.environment.env import PlancraftEnvironment
|
4
4
|
from plancraft.environment.search import gold_search_recipe
|
5
5
|
from plancraft.environment.actions import (
|
6
|
+
MoveAction,
|
6
7
|
ActionHandlerBase,
|
7
8
|
MoveActionHandler,
|
8
9
|
SmeltActionHandler,
|
@@ -113,7 +114,7 @@ def get_prompt_example(
|
|
113
114
|
example_dialogue.append({"role": "user", "content": text})
|
114
115
|
if "search" in handler_names and SEARCH_STEPS[i]:
|
115
116
|
example_dialogue.append({"role": "assistant", "content": SEARCH_STEPS[i]})
|
116
|
-
search_target =
|
117
|
+
search_target = SEARCH_STEPS[i].split("search: ")[-1].strip()
|
117
118
|
search_response = gold_search_recipe(search_target)
|
118
119
|
example_dialogue.append({"role": "user", "content": search_response})
|
119
120
|
if "think" in handler_names:
|
@@ -160,29 +161,25 @@ def load_prompt_images(resolution: str) -> list[np.ndarray]:
|
|
160
161
|
"""
|
161
162
|
Generates the images for the few-shot prompt in prompt.py
|
162
163
|
"""
|
163
|
-
starting_inv =
|
164
|
-
{"type": "diorite", "
|
165
|
-
{"type": "cobblestone", "
|
166
|
-
|
167
|
-
|
164
|
+
starting_inv = {
|
165
|
+
27: {"type": "diorite", "quantity": 1},
|
166
|
+
39: {"type": "cobblestone", "quantity": 1},
|
167
|
+
}
|
168
168
|
env = PlancraftEnvironment(inventory=starting_inv, resolution=resolution)
|
169
169
|
actions = [
|
170
|
-
|
171
|
-
|
172
|
-
|
170
|
+
None,
|
171
|
+
MoveAction(slot_from=27, slot_to=4, quantity=1),
|
172
|
+
MoveAction(slot_from=39, slot_to=5, quantity=1),
|
173
173
|
]
|
174
174
|
images = []
|
175
175
|
for action in actions:
|
176
176
|
obs = env.step(action)
|
177
177
|
images.append(obs["image"])
|
178
|
-
|
179
|
-
|
180
|
-
{"type": "
|
181
|
-
|
182
|
-
]
|
183
|
-
new_actions = [
|
184
|
-
{"move": [0, 0, 0]},
|
185
|
-
]
|
178
|
+
second_inv = {
|
179
|
+
45: {"type": "iron_ore", "quantity": 1},
|
180
|
+
39: {"type": "cobblestone", "quantity": 1},
|
181
|
+
}
|
182
|
+
new_actions = [None]
|
186
183
|
env.reset(new_inventory=second_inv)
|
187
184
|
for action in new_actions:
|
188
185
|
obs = env.step(action)
|
plancraft/evaluator.py
CHANGED
@@ -34,6 +34,13 @@ class Evaluator:
|
|
34
34
|
|
35
35
|
It is also responsible for early stopping and verifying the target object has been craft.
|
36
36
|
Finally, it also saves the results of the evaluation and the images generated during the evaluation.
|
37
|
+
|
38
|
+
This evaluator is designed to work with a PlancraftBaseModel and a set of ActionHandlerBase instances.
|
39
|
+
It supports multimodal content format and image-based inventory.
|
40
|
+
|
41
|
+
Importantly, it tracks the history of the dialogue and the environment state to provide a trace of the model's actions.
|
42
|
+
|
43
|
+
If you would want a simpler interface that just wraps the environment and actions to evaluate a single Plancraft example, you should use the PlancraftGymWrapper class.
|
37
44
|
"""
|
38
45
|
|
39
46
|
def __init__(
|
@@ -59,6 +66,7 @@ class Evaluator:
|
|
59
66
|
self.resume = resume
|
60
67
|
self.use_fasterrcnn = use_fasterrcnn
|
61
68
|
self.generation_number = 0
|
69
|
+
|
62
70
|
self.use_multimodal_content_format = use_multimodal_content_format
|
63
71
|
self.use_images = use_images
|
64
72
|
self.use_text_inventory = use_text_inventory
|
@@ -73,7 +81,14 @@ class Evaluator:
|
|
73
81
|
|
74
82
|
def create_history(self) -> HistoryBase:
|
75
83
|
"""Create a new History instance with current configuration"""
|
76
|
-
return self.history_class(
|
84
|
+
return self.history_class(
|
85
|
+
actions=self.actions,
|
86
|
+
config=self.history_config,
|
87
|
+
resolution=self.resolution,
|
88
|
+
use_multimodal_content_format=self.use_multimodal_content_format,
|
89
|
+
use_images=self.use_images,
|
90
|
+
use_text_inventory=self.use_text_inventory,
|
91
|
+
)
|
77
92
|
|
78
93
|
def save_results_dict(self, example: PlancraftExample, results_dict: dict):
|
79
94
|
output_dir = f"{self.output_dir}/{self.generation_number}"
|
@@ -170,7 +185,7 @@ class Evaluator:
|
|
170
185
|
content_list.append({"type": "image"})
|
171
186
|
return {"content": content_list}
|
172
187
|
|
173
|
-
def _init_environment(self, example: PlancraftExample) -> tuple:
|
188
|
+
def _init_environment(self, example: PlancraftExample, model=None) -> tuple:
|
174
189
|
"""Initialize environment and history for an example"""
|
175
190
|
environment = PlancraftEnvironment(
|
176
191
|
inventory=deepcopy(example.slotted_inventory),
|
@@ -179,7 +194,7 @@ class Evaluator:
|
|
179
194
|
history = self.create_history()
|
180
195
|
obs = environment.step()
|
181
196
|
obs["target"] = example.target
|
182
|
-
obs["message"] = self.convert_observation_to_message(obs)
|
197
|
+
obs["message"] = self.convert_observation_to_message(obs, model=model)
|
183
198
|
return environment, history, obs
|
184
199
|
|
185
200
|
def _process_model_output(
|
@@ -241,7 +256,7 @@ class Evaluator:
|
|
241
256
|
"number_of_steps": history.num_steps,
|
242
257
|
"model_trace": history.trace(),
|
243
258
|
"example_id": example.id,
|
244
|
-
"images": history.
|
259
|
+
"images": history.trace_images(),
|
245
260
|
}
|
246
261
|
|
247
262
|
def eval_example(
|
@@ -249,7 +264,7 @@ class Evaluator:
|
|
249
264
|
example: PlancraftExample,
|
250
265
|
model: PlancraftBaseModel,
|
251
266
|
) -> dict:
|
252
|
-
environment, history, observation = self._init_environment(example)
|
267
|
+
environment, history, observation = self._init_environment(example, model=model)
|
253
268
|
success = False
|
254
269
|
|
255
270
|
while history.num_steps < self.max_steps:
|
plancraft/models/act.py
CHANGED
@@ -6,8 +6,8 @@ from plancraft.models.bbox_model import IntegratedBoundingBoxModel
|
|
6
6
|
from plancraft.models.generators import (
|
7
7
|
OpenAIGenerator,
|
8
8
|
TransformersGenerator,
|
9
|
+
VLLMGenerator,
|
9
10
|
)
|
10
|
-
|
11
11
|
from plancraft.utils import History
|
12
12
|
|
13
13
|
|
@@ -42,15 +42,21 @@ class ActModel(PlancraftBaseModel):
|
|
42
42
|
api_key=cfg.env_variables.openai_api_key,
|
43
43
|
)
|
44
44
|
else:
|
45
|
-
#
|
46
|
-
self.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
45
|
+
# if adapter name is provided then use TransformersGenerator
|
46
|
+
if self.use_images or cfg.plancraft.adapter != "":
|
47
|
+
# model is transformers based
|
48
|
+
self.llm = TransformersGenerator(
|
49
|
+
model_name=cfg.plancraft.model,
|
50
|
+
tokenizer_name=cfg.plancraft.tokenizer,
|
51
|
+
quantize=cfg.plancraft.quantize,
|
52
|
+
use_hot_cache=cfg.plancraft.hot_cache,
|
53
|
+
adapter_name=cfg.plancraft.adapter,
|
54
|
+
hf_token=cfg.env_variables.hf_token,
|
55
|
+
use_images=self.use_images,
|
56
|
+
)
|
57
|
+
else:
|
58
|
+
# use standard VLLM for text-only models
|
59
|
+
self.llm = VLLMGenerator(model_name=cfg.plancraft.model)
|
54
60
|
self.max_messages_window = cfg.plancraft.max_message_window
|
55
61
|
self.kv_cache = None
|
56
62
|
|
plancraft/models/bbox_model.py
CHANGED
@@ -455,7 +455,7 @@ class IntegratedBoundingBoxModel(nn.Module, PyTorchModelHubMixin):
|
|
455
455
|
preds = self.model(x)
|
456
456
|
return preds
|
457
457
|
|
458
|
-
def get_inventory(self, pil_image, resolution="high") -> dict:
|
458
|
+
def get_inventory(self, pil_image, resolution="high", threshold=0.25) -> dict:
|
459
459
|
"""
|
460
460
|
Predict boxes and quantities
|
461
461
|
"""
|
@@ -464,10 +464,12 @@ class IntegratedBoundingBoxModel(nn.Module, PyTorchModelHubMixin):
|
|
464
464
|
img_tensor = img_tensor.cuda()
|
465
465
|
with torch.no_grad():
|
466
466
|
predictions = self.model(img_tensor.unsqueeze(0))
|
467
|
-
return self.prediction_to_inventory(
|
467
|
+
return self.prediction_to_inventory(
|
468
|
+
predictions[0], resolution=resolution, threshold=threshold
|
469
|
+
)
|
468
470
|
|
469
471
|
@staticmethod
|
470
|
-
def prediction_to_inventory(prediction, threshold=0.
|
472
|
+
def prediction_to_inventory(prediction, threshold=0.25, resolution="high") -> dict:
|
471
473
|
inventory = {}
|
472
474
|
seen_slots = set()
|
473
475
|
for bbox, score, label, quantity in zip(
|
@@ -483,7 +485,7 @@ class IntegratedBoundingBoxModel(nn.Module, PyTorchModelHubMixin):
|
|
483
485
|
continue
|
484
486
|
label = ALL_ITEMS[label.item()]
|
485
487
|
quantity = quantity.item()
|
486
|
-
inventory[
|
488
|
+
inventory[slot] = {"type": label, "quantity": quantity}
|
487
489
|
seen_slots.add(slot)
|
488
490
|
return inventory
|
489
491
|
|
plancraft/models/generators.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import time
|
2
|
+
import copy
|
2
3
|
|
3
4
|
import torch
|
4
5
|
from loguru import logger
|
@@ -6,12 +7,15 @@ from openai import OpenAI
|
|
6
7
|
from PIL import Image
|
7
8
|
from transformers import (
|
8
9
|
AutoModelForCausalLM,
|
9
|
-
AutoModelForVision2Seq,
|
10
|
-
AutoProcessor,
|
11
10
|
AutoTokenizer,
|
12
11
|
BitsAndBytesConfig,
|
13
12
|
)
|
14
|
-
|
13
|
+
|
14
|
+
try:
|
15
|
+
from vllm import LLM, SamplingParams
|
16
|
+
from vllm.lora.request import LoRARequest
|
17
|
+
except ImportError:
|
18
|
+
logger.warning("vLLM not installed. Please install vLLM to use vLLM")
|
15
19
|
|
16
20
|
from plancraft.models.utils import (
|
17
21
|
get_downloaded_models,
|
@@ -28,13 +32,12 @@ class TransformersGenerator:
|
|
28
32
|
tokenizer_name: str = "same",
|
29
33
|
quantize=False,
|
30
34
|
use_images=False,
|
31
|
-
use_hot_cache=True,
|
32
35
|
adapter_name="",
|
33
36
|
hf_token=None,
|
34
37
|
**kwargs,
|
35
38
|
):
|
36
39
|
self.model_name = model_name
|
37
|
-
self.use_hot_cache = use_hot_cache
|
40
|
+
# self.use_hot_cache = use_hot_cache
|
38
41
|
self.hf_token = hf_token
|
39
42
|
|
40
43
|
if tokenizer_name == "same":
|
@@ -45,57 +48,36 @@ class TransformersGenerator:
|
|
45
48
|
model_name, quantize=quantize
|
46
49
|
)
|
47
50
|
self.processor = None
|
48
|
-
|
49
|
-
|
50
|
-
self.
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
self.pad_token_id = self.tokenizer.tokenizer.pad_token_id
|
66
|
-
else:
|
67
|
-
self.pad_token_id = self.tokenizer.tokenizer.eos_token_id
|
68
|
-
else:
|
51
|
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
52
|
+
tokenizer_name,
|
53
|
+
token=self.hf_token, # trust_remote_code=True
|
54
|
+
padding_side="left", # ensure that the padding is on the left
|
55
|
+
)
|
56
|
+
logger.info("Loading model")
|
57
|
+
time_now = time.time()
|
58
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
59
|
+
model_name,
|
60
|
+
device_map="auto",
|
61
|
+
**model_kwargs,
|
62
|
+
)
|
63
|
+
logger.info(f"Model loaded in {time.time() - time_now:.2f} seconds")
|
64
|
+
|
65
|
+
# load OA adapter
|
66
|
+
if adapter_name != "":
|
67
|
+
logger.info(f"Loading adapter and tokenizer from {adapter_name}")
|
69
68
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
70
|
-
|
71
|
-
|
72
|
-
padding_side="left", # ensure that the padding is on the left
|
73
|
-
)
|
74
|
-
logger.info("Loading model")
|
75
|
-
time_now = time.time()
|
76
|
-
self.model = AutoModelForCausalLM.from_pretrained(
|
77
|
-
model_name,
|
78
|
-
device_map="auto",
|
79
|
-
**model_kwargs,
|
69
|
+
adapter_name,
|
70
|
+
padding_side="left",
|
80
71
|
)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
self.model.resize_token_embeddings(len(self.tokenizer))
|
91
|
-
self.model.load_adapter(adapter_name)
|
92
|
-
|
93
|
-
# set pad_token_id
|
94
|
-
if self.tokenizer.pad_token_id:
|
95
|
-
self.pad_token_id = self.tokenizer.pad_token_id
|
96
|
-
else:
|
97
|
-
self.tokenizer.pad_token = self.tokenizer.eos_token
|
98
|
-
self.pad_token_id = self.tokenizer.eos_token_id
|
72
|
+
self.model.resize_token_embeddings(len(self.tokenizer))
|
73
|
+
self.model.load_adapter(adapter_name)
|
74
|
+
|
75
|
+
# set pad_token_id
|
76
|
+
if self.tokenizer.pad_token_id:
|
77
|
+
self.pad_token_id = self.tokenizer.pad_token_id
|
78
|
+
else:
|
79
|
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
80
|
+
self.pad_token_id = self.tokenizer.eos_token_id
|
99
81
|
|
100
82
|
# compile
|
101
83
|
time_now = time.time()
|
@@ -111,54 +93,6 @@ class TransformersGenerator:
|
|
111
93
|
self.past_key_values_kwargs = {}
|
112
94
|
self.past_token_ids = None
|
113
95
|
|
114
|
-
def truncate_kv_cache(self, new_token_ids: torch.Tensor):
|
115
|
-
"""
|
116
|
-
Truncate the key-value cache to the size which overlap the past_ids with the new_ids.
|
117
|
-
Uses:
|
118
|
-
past_ids: torch.Tensor [B, T]
|
119
|
-
new_ids: torch.Tensor [B, T]
|
120
|
-
kv_cache: tuple[tuple[torch.Tensor]]: tuple of key-value cache tensors
|
121
|
-
|
122
|
-
NOTE: this essentially implements System Prompt in the worst case when using batch_size==1
|
123
|
-
"""
|
124
|
-
if (
|
125
|
-
self.past_token_ids is None
|
126
|
-
or "past_key_values" not in self.past_key_values_kwargs
|
127
|
-
):
|
128
|
-
return
|
129
|
-
|
130
|
-
# caching doesn't seem to work with multimodal models
|
131
|
-
if self.use_images:
|
132
|
-
self.past_key_values_kwargs = {}
|
133
|
-
return
|
134
|
-
|
135
|
-
past_batch_size, past_seq_len = self.past_token_ids.shape
|
136
|
-
new_batch_size, new_seq_len = new_token_ids.shape
|
137
|
-
|
138
|
-
# If the batch size has changed, reset the cache
|
139
|
-
if past_batch_size != new_batch_size:
|
140
|
-
self.past_key_values_kwargs = {}
|
141
|
-
return
|
142
|
-
|
143
|
-
min_shape = min(past_seq_len, new_seq_len)
|
144
|
-
compare_past = (
|
145
|
-
self.past_token_ids[:, :min_shape] != new_token_ids[:, :min_shape]
|
146
|
-
)
|
147
|
-
|
148
|
-
# All tokens are the same - no need to truncate
|
149
|
-
if not compare_past.any():
|
150
|
-
return
|
151
|
-
|
152
|
-
# Find the first token that is different between the past and new tokens
|
153
|
-
seq_min = torch.argmax(compare_past.double(), dim=1).min()
|
154
|
-
|
155
|
-
# Truncate the key-value cache to the size which overlap the past_ids with the new_ids.
|
156
|
-
# assumes shape is [num_layers, num_heads, seq_len, hidden_size]
|
157
|
-
self.past_key_values_kwargs["past_key_values"] = [
|
158
|
-
[kv[:, :, :seq_min, :] for kv in kvs]
|
159
|
-
for kvs in self.past_key_values_kwargs["past_key_values"]
|
160
|
-
]
|
161
|
-
|
162
96
|
def build_model_kwargs(self, model_name: str, **kwargs) -> tuple[str, dict]:
|
163
97
|
model_kwargs = {
|
164
98
|
"token": self.hf_token,
|
@@ -255,20 +189,6 @@ class TransformersGenerator:
|
|
255
189
|
k: v.to(self.model.device) for k, v in tokenized_messages.items()
|
256
190
|
}
|
257
191
|
|
258
|
-
# Truncate the key-value cache
|
259
|
-
self.truncate_kv_cache(tokenized_messages["input_ids"])
|
260
|
-
|
261
|
-
if (
|
262
|
-
"past_key_values" in self.past_key_values_kwargs
|
263
|
-
and self.past_key_values_kwargs["past_key_values"][0][0].shape[-2]
|
264
|
-
> tokenized_messages["input_ids"].shape[-1]
|
265
|
-
):
|
266
|
-
raise ValueError("Past key values are larger than the input_ids")
|
267
|
-
|
268
|
-
past_key_values = self.past_key_values_kwargs.get("past_key_values", None)
|
269
|
-
if past_key_values is not None:
|
270
|
-
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
|
271
|
-
|
272
192
|
generated_sequences = self.model.generate(
|
273
193
|
**tokenized_messages,
|
274
194
|
do_sample=True,
|
@@ -276,16 +196,7 @@ class TransformersGenerator:
|
|
276
196
|
max_new_tokens=max_tokens,
|
277
197
|
pad_token_id=self.pad_token_id,
|
278
198
|
return_dict_in_generate=True,
|
279
|
-
use_cache=True,
|
280
|
-
past_key_values=past_key_values,
|
281
|
-
return_legacy_cache=True,
|
282
199
|
)
|
283
|
-
# Cache the past key values
|
284
|
-
if self.use_hot_cache:
|
285
|
-
self.past_key_values_kwargs["past_key_values"] = (
|
286
|
-
generated_sequences.past_key_values
|
287
|
-
)
|
288
|
-
self.past_token_ids = generated_sequences.sequences
|
289
200
|
|
290
201
|
# Decode the output
|
291
202
|
text_responses = self.tokenizer.batch_decode(
|
@@ -301,6 +212,119 @@ class TransformersGenerator:
|
|
301
212
|
return text_responses, total_tokens_used
|
302
213
|
|
303
214
|
|
215
|
+
class VLLMGenerator:
|
216
|
+
def __init__(
|
217
|
+
self,
|
218
|
+
model_name: str,
|
219
|
+
adapter_name="",
|
220
|
+
**kwargs,
|
221
|
+
):
|
222
|
+
self.model_name = model_name
|
223
|
+
# Initialize vLLM model
|
224
|
+
logger.info(f"Loading model {model_name} with vLLM")
|
225
|
+
time_now = time.time()
|
226
|
+
|
227
|
+
# Get downloaded models
|
228
|
+
downloaded_models = get_downloaded_models()
|
229
|
+
if model_name in downloaded_models:
|
230
|
+
model_name = downloaded_models[model_name]
|
231
|
+
logger.info(f"Using local model {model_name}")
|
232
|
+
|
233
|
+
self.llm = LLM(
|
234
|
+
model=model_name,
|
235
|
+
trust_remote_code=True,
|
236
|
+
tensor_parallel_size=torch.cuda.device_count(),
|
237
|
+
gpu_memory_utilization=0.95,
|
238
|
+
max_model_len=16384,
|
239
|
+
dtype=torch.bfloat16,
|
240
|
+
enable_lora=True if adapter_name != "" else False,
|
241
|
+
)
|
242
|
+
|
243
|
+
# Load adapter
|
244
|
+
self.lora_request = None
|
245
|
+
if adapter_name != "":
|
246
|
+
from huggingface_hub import snapshot_download
|
247
|
+
|
248
|
+
logger.info(f"Loading adapter from {adapter_name}")
|
249
|
+
lora_path = snapshot_download(repo_id=adapter_name)
|
250
|
+
self.lora_request = LoRARequest(
|
251
|
+
adapter_name,
|
252
|
+
lora_int_id=0,
|
253
|
+
lora_path=lora_path,
|
254
|
+
)
|
255
|
+
|
256
|
+
logger.info(f"Model loaded in {time.time() - time_now:.2f} seconds")
|
257
|
+
|
258
|
+
def reset(self):
|
259
|
+
# vLLM handles state automatically, no need to reset
|
260
|
+
pass
|
261
|
+
|
262
|
+
def prepare_messages(
|
263
|
+
self,
|
264
|
+
history: History,
|
265
|
+
max_messages_window: int,
|
266
|
+
) -> tuple[list[dict], list]:
|
267
|
+
"""
|
268
|
+
Prepare the messages using a history
|
269
|
+
"""
|
270
|
+
message_window = history.dialogue_history[-max_messages_window:]
|
271
|
+
# remove the first assistant message if it is present
|
272
|
+
if len(message_window) > 0 and message_window[0]["role"] == "assistant":
|
273
|
+
message_window = message_window[1:]
|
274
|
+
# add the system prompt if the first message is not a system message
|
275
|
+
if len(message_window) > 0 and message_window[0]["role"] != "system":
|
276
|
+
message_window = [history.system_prompt_dialogue] + message_window
|
277
|
+
|
278
|
+
# vLLM doesn't use images
|
279
|
+
return message_window, []
|
280
|
+
|
281
|
+
@torch.inference_mode()
|
282
|
+
def generate_unconstrained(
|
283
|
+
self,
|
284
|
+
batch_messages: list[list[dict]],
|
285
|
+
max_tokens: int = 256,
|
286
|
+
temperature=0.6,
|
287
|
+
top_p=1.0,
|
288
|
+
frequency_penalty=0.0,
|
289
|
+
presence_penalty=0.0,
|
290
|
+
stop=["\n", "\n\n"],
|
291
|
+
**kwargs,
|
292
|
+
) -> tuple[list[str], int]:
|
293
|
+
"""
|
294
|
+
Generate unconstrained text based on the batch of messages using vLLM.
|
295
|
+
"""
|
296
|
+
# Create sampling parameters for vLLM
|
297
|
+
sampling_params = SamplingParams(
|
298
|
+
temperature=temperature,
|
299
|
+
max_tokens=max_tokens,
|
300
|
+
top_p=top_p,
|
301
|
+
frequency_penalty=frequency_penalty,
|
302
|
+
presence_penalty=presence_penalty,
|
303
|
+
stop=stop if isinstance(stop, list) else [stop] if stop else None,
|
304
|
+
)
|
305
|
+
|
306
|
+
# Generate completions with vLLM
|
307
|
+
outputs = self.llm.chat(
|
308
|
+
batch_messages,
|
309
|
+
sampling_params=sampling_params,
|
310
|
+
use_tqdm=False,
|
311
|
+
lora_request=self.lora_request,
|
312
|
+
)
|
313
|
+
|
314
|
+
# Extract responses
|
315
|
+
text_responses = []
|
316
|
+
total_tokens_used = 0
|
317
|
+
|
318
|
+
for output in outputs:
|
319
|
+
text_responses.append(output.outputs[0].text)
|
320
|
+
# Sum prompt tokens and output tokens for the total
|
321
|
+
total_tokens_used += len(output.prompt_token_ids) + len(
|
322
|
+
output.outputs[0].token_ids
|
323
|
+
)
|
324
|
+
|
325
|
+
return text_responses, total_tokens_used
|
326
|
+
|
327
|
+
|
304
328
|
class OpenAIGenerator:
|
305
329
|
def __init__(self, use_images=False, model_name="gpt-4o-mini", api_key=None):
|
306
330
|
self.client = OpenAI(api_key=api_key)
|
@@ -327,6 +351,8 @@ class OpenAIGenerator:
|
|
327
351
|
message_window = [history.system_prompt_dialogue] + message_window
|
328
352
|
|
329
353
|
if self.use_images:
|
354
|
+
message_window = copy.deepcopy(message_window)
|
355
|
+
# copy the images to the history
|
330
356
|
img_idx = -1
|
331
357
|
seen_images = 0
|
332
358
|
# iterate through the messages in reverse order to assign images
|
plancraft/models/utils.py
CHANGED
@@ -39,10 +39,10 @@ def get_downloaded_models() -> dict:
|
|
39
39
|
"""
|
40
40
|
downloaded_models = {}
|
41
41
|
# known models on NFS partition
|
42
|
-
if pathlib.Path("/
|
43
|
-
local_models = glob.glob("/
|
42
|
+
if pathlib.Path("/public").exists():
|
43
|
+
local_models = glob.glob("/public/hf/models/*/*")
|
44
44
|
downloaded_models = {
|
45
|
-
model.replace("/
|
45
|
+
model.replace("/public/hf/models/", ""): model for model in local_models
|
46
46
|
}
|
47
47
|
return downloaded_models
|
48
48
|
|
plancraft/simple.py
ADDED
@@ -0,0 +1,186 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from plancraft.config import PlancraftExample
|
6
|
+
from plancraft.environment.actions import (
|
7
|
+
ActionHandlerBase,
|
8
|
+
MoveActionHandler,
|
9
|
+
SmeltActionHandler,
|
10
|
+
ImpossibleActionHandler,
|
11
|
+
StopAction,
|
12
|
+
)
|
13
|
+
from plancraft.environment.env import (
|
14
|
+
PlancraftEnvironment,
|
15
|
+
get_objective_str,
|
16
|
+
target_and_inventory_to_text_obs,
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
def get_plancraft_examples(split: str = "train") -> list[PlancraftExample]:
|
21
|
+
"""
|
22
|
+
Load examples from the data directory
|
23
|
+
"""
|
24
|
+
data_dir = os.path.join(os.path.dirname(__file__), "data")
|
25
|
+
with open(os.path.join(data_dir, f"{split}.json"), "r") as f:
|
26
|
+
examples = json.load(f)
|
27
|
+
return [PlancraftExample(**example) for example in examples]
|
28
|
+
|
29
|
+
|
30
|
+
class PlancraftGymWrapper:
|
31
|
+
"""
|
32
|
+
This wrapper class just wraps the environment and actions to evaluate a single example
|
33
|
+
|
34
|
+
This is useful if you want to bring your own agent/model to interact with the environment and not rely on the History class
|
35
|
+
and model class in the plancraft package.
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(
|
39
|
+
self,
|
40
|
+
example: PlancraftExample,
|
41
|
+
actions: list[ActionHandlerBase] = [
|
42
|
+
MoveActionHandler(),
|
43
|
+
SmeltActionHandler(),
|
44
|
+
ImpossibleActionHandler(),
|
45
|
+
],
|
46
|
+
max_steps: int = 30,
|
47
|
+
resolution: str = "high",
|
48
|
+
use_text_inventory: bool = True,
|
49
|
+
):
|
50
|
+
self.actions = actions
|
51
|
+
self.max_steps = max_steps
|
52
|
+
# whether to convert the inventory to text observation
|
53
|
+
# if False, only the objective string is returned
|
54
|
+
self.use_text_inventory = use_text_inventory
|
55
|
+
self.current_step = 0
|
56
|
+
self.stopped = False
|
57
|
+
self.success = False
|
58
|
+
self.example = example
|
59
|
+
self.resolution = resolution
|
60
|
+
self.environment = PlancraftEnvironment(
|
61
|
+
example.slotted_inventory, resolution=self.resolution
|
62
|
+
)
|
63
|
+
if example.impossible:
|
64
|
+
assert "impossible" in [action.action_name for action in actions]
|
65
|
+
|
66
|
+
def check_done(self, inventory: dict, target: str):
|
67
|
+
"""
|
68
|
+
Check that target object is obtained
|
69
|
+
"""
|
70
|
+
for slot, item in inventory.items():
|
71
|
+
# ensure the target is in the inventory (not in slot 0)
|
72
|
+
if target == item["type"] and slot != 0:
|
73
|
+
return True
|
74
|
+
return False
|
75
|
+
|
76
|
+
def parse_raw_model_response(self, generated_text: str) -> str:
|
77
|
+
"""
|
78
|
+
Given a message and set of action handlers, parse the content to return the action
|
79
|
+
or a message if the action is not valid/requires message response
|
80
|
+
"""
|
81
|
+
for handler in self.actions:
|
82
|
+
match_output = handler.match(generated_text)
|
83
|
+
if match_output:
|
84
|
+
return match_output
|
85
|
+
action_names = [handler.action_name for handler in self.actions]
|
86
|
+
return f"Only select actions from the following: {', '.join(action_names)}"
|
87
|
+
|
88
|
+
def step(
|
89
|
+
self, action: str
|
90
|
+
) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
|
91
|
+
"""
|
92
|
+
Execute action and return next observation, reward, termination status, truncation status, and info
|
93
|
+
|
94
|
+
Returns:
|
95
|
+
observation: The environment observation after the action, observation is a dictionary with keys:
|
96
|
+
- text: The text observation (always present)
|
97
|
+
- inventory: The inventory after the action (if action was successful)
|
98
|
+
- target: The target object (if action was successful)
|
99
|
+
- image: The image observation (if action was successful)
|
100
|
+
reward: Reward for the current action (1.0 for success, 0.0 otherwise)
|
101
|
+
terminated: Whether the episode is done due to task completion or task failure
|
102
|
+
truncated: Whether the episode is done due to external limits (e.g. max steps reached)
|
103
|
+
info: Additional diagnostic information (helpful for debugging)
|
104
|
+
"""
|
105
|
+
action = self.parse_raw_model_response(action)
|
106
|
+
self.current_step += 1
|
107
|
+
|
108
|
+
# Initialize return values
|
109
|
+
reward = 0.0
|
110
|
+
terminated = False
|
111
|
+
truncated = False
|
112
|
+
info = {"steps": self.current_step}
|
113
|
+
|
114
|
+
# Handle already stopped case
|
115
|
+
if self.stopped:
|
116
|
+
return (
|
117
|
+
{"text": "Plancraft environment is terminated"},
|
118
|
+
reward,
|
119
|
+
True,
|
120
|
+
True,
|
121
|
+
info,
|
122
|
+
)
|
123
|
+
|
124
|
+
# Handle max steps reached (truncate with no reward)
|
125
|
+
if self.current_step > self.max_steps:
|
126
|
+
self.success = False
|
127
|
+
truncated = True
|
128
|
+
info["reason"] = "max_steps_reached"
|
129
|
+
return (
|
130
|
+
{"text": f"Max steps ({self.max_steps}) reached"},
|
131
|
+
reward,
|
132
|
+
terminated,
|
133
|
+
truncated,
|
134
|
+
info,
|
135
|
+
)
|
136
|
+
|
137
|
+
# Handle stop action
|
138
|
+
if isinstance(action, StopAction):
|
139
|
+
self.stopped = True
|
140
|
+
terminated = True
|
141
|
+
# success is True if example was truly impossible
|
142
|
+
self.success = self.example.impossible
|
143
|
+
if self.success:
|
144
|
+
reward = 1.0
|
145
|
+
info["reason"] = "correctly_identified_impossible"
|
146
|
+
else:
|
147
|
+
info["reason"] = "incorrect_stop"
|
148
|
+
observation = {
|
149
|
+
"text": "Plancraft environment is terminate due to stop action"
|
150
|
+
}
|
151
|
+
|
152
|
+
# Handle invalid action or non-env action
|
153
|
+
elif isinstance(action, str):
|
154
|
+
observation = self.environment.step()
|
155
|
+
observation["target"] = self.example.target
|
156
|
+
observation["text"] = action
|
157
|
+
|
158
|
+
# Handle regular action execution
|
159
|
+
# NOTE: if the action is valid but does not do anything
|
160
|
+
# the environment will return the same observation
|
161
|
+
else:
|
162
|
+
observation = self.environment.step(action)
|
163
|
+
observation["target"] = self.example.target
|
164
|
+
|
165
|
+
# Generate text observation
|
166
|
+
if self.use_text_inventory:
|
167
|
+
text = target_and_inventory_to_text_obs(
|
168
|
+
target=self.example.target, inventory=observation["inventory"]
|
169
|
+
)
|
170
|
+
else:
|
171
|
+
text = get_objective_str(self.example.target)
|
172
|
+
|
173
|
+
observation["text"] = text
|
174
|
+
|
175
|
+
self.success = self.check_done(
|
176
|
+
observation["inventory"], self.example.target
|
177
|
+
)
|
178
|
+
|
179
|
+
# Set reward and termination for successful completion
|
180
|
+
if self.success:
|
181
|
+
reward = 1.0
|
182
|
+
terminated = True
|
183
|
+
self.stopped = True
|
184
|
+
info["reason"] = "success"
|
185
|
+
|
186
|
+
return observation, reward, terminated, truncated, info
|
plancraft/utils.py
CHANGED
@@ -1,12 +1,7 @@
|
|
1
|
-
import glob
|
2
|
-
import pathlib
|
3
|
-
from copy import copy
|
4
|
-
from typing import Optional
|
5
1
|
import abc
|
2
|
+
from copy import copy
|
6
3
|
from dataclasses import dataclass, field
|
7
|
-
|
8
|
-
import torch
|
9
|
-
from loguru import logger
|
4
|
+
from typing import Optional
|
10
5
|
|
11
6
|
from plancraft.environment.actions import ActionHandlerBase
|
12
7
|
from plancraft.environment.prompts import (
|
@@ -170,7 +165,6 @@ class History(HistoryBase):
|
|
170
165
|
self.initial_dialogue_length = len(self.dialogue_history)
|
171
166
|
|
172
167
|
self.inventory_history = []
|
173
|
-
|
174
168
|
self.tokens_used = 0
|
175
169
|
|
176
170
|
def trace(self):
|
@@ -182,6 +176,10 @@ class History(HistoryBase):
|
|
182
176
|
"tokens_used": copy(self.tokens_used),
|
183
177
|
}
|
184
178
|
|
179
|
+
def trace_images(self):
|
180
|
+
# return only the images added after the initial dialogue
|
181
|
+
return self._images[len(self.prompt_images) :]
|
182
|
+
|
185
183
|
@property
|
186
184
|
def num_steps(self):
|
187
185
|
return (len(self.dialogue_history) - self.initial_dialogue_length) // 2
|
@@ -193,31 +191,3 @@ class History(HistoryBase):
|
|
193
191
|
@images.setter
|
194
192
|
def images(self, value: list) -> None:
|
195
193
|
self._images = value
|
196
|
-
|
197
|
-
|
198
|
-
def get_downloaded_models() -> dict:
|
199
|
-
"""
|
200
|
-
Get the list of downloaded models on the NFS partition (EIDF).
|
201
|
-
"""
|
202
|
-
downloaded_models = {}
|
203
|
-
# known models on NFS partition
|
204
|
-
if pathlib.Path("/nfs").exists():
|
205
|
-
local_models = glob.glob("/nfs/public/hf/models/*/*")
|
206
|
-
downloaded_models = {
|
207
|
-
model.replace("/nfs/public/hf/models/", ""): model for model in local_models
|
208
|
-
}
|
209
|
-
return downloaded_models
|
210
|
-
|
211
|
-
|
212
|
-
def get_torch_device() -> torch.device:
|
213
|
-
device = torch.device("cpu")
|
214
|
-
if torch.cuda.is_available():
|
215
|
-
device = torch.device("cuda", 0)
|
216
|
-
elif torch.backends.mps.is_available():
|
217
|
-
if not torch.backends.mps.is_built():
|
218
|
-
logger.info(
|
219
|
-
"MPS not available because the current PyTorch install was not built with MPS enabled."
|
220
|
-
)
|
221
|
-
else:
|
222
|
-
device = torch.device("mps")
|
223
|
-
return device
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: plancraft
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Plancraft: an evaluation dataset for planning with LLM agents
|
5
5
|
License: MIT License
|
6
6
|
|
@@ -24,7 +24,7 @@ License: MIT License
|
|
24
24
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
25
25
|
SOFTWARE.
|
26
26
|
License-File: LICENSE
|
27
|
-
Requires-Python: >=3.
|
27
|
+
Requires-Python: >=3.10
|
28
28
|
Requires-Dist: hydra-core>=1.3.2
|
29
29
|
Requires-Dist: imageio>=2.36.0
|
30
30
|
Requires-Dist: loguru
|
@@ -50,12 +50,13 @@ Requires-Dist: seaborn; extra == 'full'
|
|
50
50
|
Requires-Dist: torch>=2.5.0; extra == 'full'
|
51
51
|
Requires-Dist: torchvision>=0.20.0; extra == 'full'
|
52
52
|
Requires-Dist: transformers>=4.43.3; extra == 'full'
|
53
|
+
Requires-Dist: vllm>=0.7.3; extra == 'full'
|
53
54
|
Description-Content-Type: text/markdown
|
54
55
|
|
55
56
|
# plancraft
|
56
57
|
|
57
58
|
[](https://github.com/gautierdag/plancraft/actions/workflows/test.yaml)
|
58
|
-

|
59
60
|

|
60
61
|
[](https://pypi.org/project/plancraft/)
|
61
62
|
[](https://hub.docker.com/r/gautierdag/plancraft)
|
@@ -64,7 +65,7 @@ Description-Content-Type: text/markdown
|
|
64
65
|
|
65
66
|
[Paper](https://arxiv.org/abs/2412.21033) | [Website](https://gautierdag.github.io/plancraft/)
|
66
67
|
|
67
|
-
Plancraft is a minecraft environment
|
68
|
+
Plancraft is a minecraft environment that benchmarks planning in LLM agents with an oracle RAG retriever.
|
68
69
|
|
69
70
|
You can install the package by running the following command:
|
70
71
|
|
@@ -78,7 +79,6 @@ Or:
|
|
78
79
|
uv add plancraft
|
79
80
|
```
|
80
81
|
|
81
|
-
|
82
82
|

|
83
83
|

|
84
84
|

|
@@ -88,7 +88,45 @@ The package provides a multimodal environment and dataset for evaluating plannin
|
|
88
88
|
|
89
89
|
## Usage
|
90
90
|
|
91
|
-
|
91
|
+
### Quick Start with PlancraftGymWrapper
|
92
|
+
|
93
|
+
The package provides an `PlancraftGymWrapper` class that offers a simple interface for integrating your own agent with the Plancraft environment. This is the recommended way to get started if you want to use your own model implementation:
|
94
|
+
|
95
|
+
```python
|
96
|
+
from plancraft.simple import PlancraftGymWrapper, get_plancraft_examples
|
97
|
+
|
98
|
+
# Load examples from the dataset
|
99
|
+
examples = get_plancraft_examples(split="train")
|
100
|
+
example = examples[0] # Get the first example
|
101
|
+
|
102
|
+
# Create the environment wrapper for this example
|
103
|
+
env_wrapper = PlancraftGymWrapper(
|
104
|
+
example=example,
|
105
|
+
max_steps=30,
|
106
|
+
resolution="high",
|
107
|
+
use_text_inventory=True
|
108
|
+
)
|
109
|
+
|
110
|
+
# Simple agent loop
|
111
|
+
# Initialize environment
|
112
|
+
observation, reward, terminated, truncated, info = env_wrapper.step("")
|
113
|
+
while not (terminated or truncated):
|
114
|
+
# Your agent decides the next action based on observation
|
115
|
+
action = your_agent_function(observation["text"])
|
116
|
+
|
117
|
+
# Execute action in environment
|
118
|
+
observation, reward, terminated, truncated, info = env_wrapper.step(action)
|
119
|
+
|
120
|
+
# Check if successful
|
121
|
+
if reward > 0:
|
122
|
+
print("Success!")
|
123
|
+
```
|
124
|
+
|
125
|
+
The `PlancraftGymWrapper` follows the standard Gym API format and simplifies the interaction with the environment. It doesn't rely on the `History` class or the `PlancraftBaseModel` interface, making it easier to integrate with your existing agent implementations.
|
126
|
+
|
127
|
+
### PlancraftEnvironment
|
128
|
+
|
129
|
+
For lower-level control, you can use the `PlancraftEnvironment` class directly:
|
92
130
|
|
93
131
|
```python
|
94
132
|
from plancraft.environments.env import PlancraftEnvironment
|
@@ -118,23 +156,32 @@ def main():
|
|
118
156
|
|
119
157
|
Note that the environment is deterministic and stateful, so the same action will always lead to the same observation and the environment will keep track of the state of the inventory.
|
120
158
|
|
121
|
-
### Evaluator
|
159
|
+
### Advanced Usage: Evaluator
|
122
160
|
|
123
|
-
|
161
|
+
For more advanced use cases, the package provides an `Evaluator` class for systematic evaluation of models on our dataset. Note that using the Evaluator requires following specific assumptions about model structure and history tracking:
|
124
162
|
|
125
163
|
```python
|
126
164
|
from plancraft.evaluator import Evaluator
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
165
|
+
from plancraft.models.base import PlancraftBaseModel
|
166
|
+
|
167
|
+
# Create a model by subclassing PlancraftBaseModel
|
168
|
+
class MyModel(PlancraftBaseModel):
|
169
|
+
def step(self, observation, dialogue_history):
|
170
|
+
# Your model implementation
|
171
|
+
pass
|
172
|
+
|
173
|
+
def reset(self):
|
174
|
+
# Reset model state
|
175
|
+
pass
|
176
|
+
|
177
|
+
# Create the evaluator with your model
|
178
|
+
model = MyModel()
|
179
|
+
evaluator = Evaluator(run_name="my_experiment")
|
180
|
+
# Evaluate the agent
|
181
|
+
results = evaluator.eval_all_examples(model=model)
|
135
182
|
```
|
136
183
|
|
137
|
-
The
|
184
|
+
The `Evaluator` class handles the environment loop and model interaction. It is responsible for early stopping, verifying task completion, and saving results and images generated during evaluation.
|
138
185
|
|
139
186
|
#### The Evaluator interactive loop
|
140
187
|
|
@@ -203,11 +250,13 @@ The observation returned by the `PlancraftEnvironment` class is a dictionary wit
|
|
203
250
|
|
204
251
|
The observation returned by the `Evaluator` class is a dictionary with the following keys: `inventory`, `image`, `message`, and `target`. The `message` key contains a string representing the environment formatted in text (we follow the annotation scheme described in our paper). The `target` key contains a string representing the target object to be crafted.
|
205
252
|
|
253
|
+
When using `PlancraftGymWrapper`, the observation contains at minimum a `text` key with the text observation, and may include `inventory`, `target`, and `image` keys depending on the action result.
|
254
|
+
|
206
255
|
### Implementing a Model
|
207
256
|
|
208
|
-
To implement a model
|
257
|
+
To implement a model for use with the `Evaluator`, you need to subclass the `PlancraftBaseModel` class and implement the `step` and `reset` method. See the `plancraft.models.dummy` module for an example of how to implement a basic model.
|
209
258
|
|
210
|
-
|
259
|
+
For use with `PlancraftGymWrapper`, you can implement any agent function that processes the observation and returns an action string.
|
211
260
|
|
212
261
|
## Reproducing the Results tables in the paper
|
213
262
|
|
@@ -215,7 +264,7 @@ To reproduce the results tables in the paper, you can use the `exps.sh` script i
|
|
215
264
|
|
216
265
|
## Docker
|
217
266
|
|
218
|
-
There is a docker image built to incorporate the latest code and its dependencies.
|
267
|
+
There is a docker image built to incorporate the latest code and its dependencies. It's built by running the following command:
|
219
268
|
|
220
269
|
```bash
|
221
270
|
docker buildx build --platform linux/amd64,linux/arm64 -t gautierdag/plancraft --push .
|
@@ -223,16 +272,6 @@ docker buildx build --platform linux/amd64,linux/arm64 -t gautierdag/plancraft -
|
|
223
272
|
|
224
273
|
The image is available on [Docker Hub](https://hub.docker.com/r/gautierdag/plancraft). Note that, unlike the package, the docker image includes everything in the repo.
|
225
274
|
|
226
|
-
## To Do
|
227
|
-
|
228
|
-
Non-exhaustive list of things to do from highest to lowest priority:
|
229
|
-
|
230
|
-
- [ ] Add minecraft wiki scrape and non-oracle search for pages
|
231
|
-
- [ ] Improve planner to bring closer to optimal (the oracle planner does not consider future crafting steps when moving items -- see paper for more details)
|
232
|
-
- [ ] Rerun image models with better bounding box model
|
233
|
-
- [ ] Track bounding box accuracy
|
234
|
-
- [ ] Implement a version of the image environment entirely on cuda/pytorch rather than cpu
|
235
|
-
|
236
275
|
## PRs Welcomed
|
237
276
|
|
238
277
|
If you would like to contribute to the project, please feel free to open a PR. I am happy to review and merge PRs that improve the project. If you have any questions, feel free to create an issue or reach out to me directly.
|
@@ -1,8 +1,9 @@
|
|
1
1
|
plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
plancraft/config.py,sha256=
|
3
|
-
plancraft/evaluator.py,sha256=
|
2
|
+
plancraft/config.py,sha256=oyn8I_k0Slh-Nyg2javomFertZ5ZHiY_ndAVqfJYQvQ,4010
|
3
|
+
plancraft/evaluator.py,sha256=UQujiltf88rCnbNwoglM5tJe5gW9XASew-jLaEbtJZo,15525
|
4
4
|
plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
|
5
|
-
plancraft/
|
5
|
+
plancraft/simple.py,sha256=QlXsCd6n5lIaehSFjeBlxTm40FuGCGHPEEsuGIMEJqk,6745
|
6
|
+
plancraft/utils.py,sha256=hCE1oQ-77Me39Vo-sCL7iZPdO-WWYZnBjP41lZWRi20,6339
|
6
7
|
plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
|
7
8
|
plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
|
8
9
|
plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
|
@@ -15,7 +16,7 @@ plancraft/environment/actions.py,sha256=Pub21caxM5iZ9IaX-ny1-xxr_peJIwwV_QAx3BVS
|
|
15
16
|
plancraft/environment/env.py,sha256=A4532st7JFBYBF_Nh0CEEi3ZTLJAeaB3t9PAIVSemj0,16390
|
16
17
|
plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
|
17
18
|
plancraft/environment/planner.py,sha256=uIOJjIoyT_4pxeWeTKb8BkLJyKZG0-AMoEOkZs6Ua9A,19340
|
18
|
-
plancraft/environment/prompts.py,sha256=
|
19
|
+
plancraft/environment/prompts.py,sha256=NU9YHAz3id-IgaukQvEi5uLlpEstpE5_Hccvvq1At2Y,6950
|
19
20
|
plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
|
20
21
|
plancraft/environment/sampler.py,sha256=79hLpTU0ajvMPoBsvSe8tE88x31c8Vlczb3tJZJcau0,7441
|
21
22
|
plancraft/environment/search.py,sha256=z31eEwQBY7WJaYVBEEwulFS8P3h1Nwo1Th9BaCTxk5M,2085
|
@@ -1912,15 +1913,15 @@ plancraft/environment/tags/wooden_stairs.json,sha256=GCr2_5UGPMYZECqQ_5NYSvbwuwt
|
|
1912
1913
|
plancraft/environment/tags/wooden_trapdoors.json,sha256=DbjfwoHJL8VuYWV61A1uDqW7LJsGlOP4eoxcGIQVYr4,303
|
1913
1914
|
plancraft/environment/tags/wool.json,sha256=Z59l4mdPztVZBFaglJ4mV9H2OnyCVzhqQRi2dduak78,496
|
1914
1915
|
plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,510
|
1915
|
-
plancraft/models/act.py,sha256=
|
1916
|
+
plancraft/models/act.py,sha256=e5YZ1hre_5CZ-tSpWTZ-6AQ0RLVGd0QuKetXfLaTqW0,3077
|
1916
1917
|
plancraft/models/base.py,sha256=S8EdkqWpn8nE1WcrqDoA4Hx4p52qEttGxnqjIPWvl3Q,852
|
1917
|
-
plancraft/models/bbox_model.py,sha256=
|
1918
|
+
plancraft/models/bbox_model.py,sha256=D1fOhYuy7ohCqqRRgxEO6N89B7v4CILfrMACpvooHiQ,17149
|
1918
1919
|
plancraft/models/dummy.py,sha256=_NUTviv5ye6KGzODRt0Zykk8shsek0QBqWCeZW3ldSQ,1495
|
1919
|
-
plancraft/models/generators.py,sha256=
|
1920
|
+
plancraft/models/generators.py,sha256=7COMLjjx_HbTWJqINNLqqExQv7gLikfLTViacAdSt5M,13963
|
1920
1921
|
plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
|
1921
|
-
plancraft/models/utils.py,sha256=
|
1922
|
+
plancraft/models/utils.py,sha256=xgkP5jqCeFfkKe3Xd4ZYfTqiEJ-dA-qgFAC-J35ub3E,4029
|
1922
1923
|
plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
|
1923
|
-
plancraft-0.
|
1924
|
-
plancraft-0.
|
1925
|
-
plancraft-0.
|
1926
|
-
plancraft-0.
|
1924
|
+
plancraft-0.4.0.dist-info/METADATA,sha256=Tt3DlKXtDxZ0M6s2zlEXydCB5dmxkeKI80wao62e-z4,12391
|
1925
|
+
plancraft-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
1926
|
+
plancraft-0.4.0.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
|
1927
|
+
plancraft-0.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|