PyPI - plancraft - Versions diffs - 0.3.33__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

plancraft 0.3.33py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

plancraft/config.py +0 -14
plancraft/environment/prompts.py +14 -17
plancraft/evaluator.py +20 -5
plancraft/models/act.py +16 -10
plancraft/models/bbox_model.py +6 -4
plancraft/models/generators.py +151 -125
plancraft/models/utils.py +3 -3
plancraft/simple.py +186 -0
plancraft/utils.py +6 -36
{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/METADATA +69 -30
{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/RECORD +13 -12
{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/WHEEL +0 -0
{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/licenses/LICENSE +0 -0

plancraft/config.py CHANGED Viewed

@@ -47,18 +47,6 @@ class WandbConfig(BaseModel):
     mode: str
-class LaunchConfig(BaseModel):
-    command: str
-    job_name: str
-    gpu_limit: int
-    gpu_product: str
-    cpu_request: int
-    ram_request: str
-    interactive: bool = False
-    namespace: str = "informatics"
-    env_vars: dict[str, dict[str, str]]
 class LocalEnvSettings(BaseSettings):
     hf_token: str = ""
     openai_api_key: str = ""
@@ -72,7 +60,6 @@ class LocalEnvSettings(BaseSettings):
 class EvalConfig(BaseModel):
     plancraft: PlancraftConfig
     wandb: WandbConfig
-    launch: LaunchConfig
     env_variables: LocalEnvSettings = LocalEnvSettings()
@@ -107,7 +94,6 @@ class TrainingArgs(BaseModel):
 class TrainConfig(BaseModel):
     training: TrainingArgs
     wandb: WandbConfig
-    launch: LaunchConfig
     env_variables: LocalEnvSettings

plancraft/environment/prompts.py CHANGED Viewed

@@ -3,6 +3,7 @@ import numpy as np
 from plancraft.environment.env import PlancraftEnvironment
 from plancraft.environment.search import gold_search_recipe
 from plancraft.environment.actions import (
+    MoveAction,
     ActionHandlerBase,
     MoveActionHandler,
     SmeltActionHandler,
@@ -113,7 +114,7 @@ def get_prompt_example(
         example_dialogue.append({"role": "user", "content": text})
         if "search" in handler_names and SEARCH_STEPS[i]:
             example_dialogue.append({"role": "assistant", "content": SEARCH_STEPS[i]})
-            search_target = text.split("seach: ")[-1].strip()
+            search_target = SEARCH_STEPS[i].split("search: ")[-1].strip()
             search_response = gold_search_recipe(search_target)
             example_dialogue.append({"role": "user", "content": search_response})
         if "think" in handler_names:
@@ -160,29 +161,25 @@ def load_prompt_images(resolution: str) -> list[np.ndarray]:
     """
     Generates the images for the few-shot prompt in prompt.py
     """
-    starting_inv = [
-        {"type": "diorite", "slot": 27, "quantity": 1},
-        {"type": "cobblestone", "slot": 39, "quantity": 1},
-    ]
+    starting_inv = {
+        27: {"type": "diorite", "quantity": 1},
+        39: {"type": "cobblestone", "quantity": 1},
+    }
     env = PlancraftEnvironment(inventory=starting_inv, resolution=resolution)
     actions = [
-        {"move": [0, 0, 0]},
-        {"move": [27, 4, 1]},
-        {"move": [39, 5, 1]},
+        None,
+        MoveAction(slot_from=27, slot_to=4, quantity=1),
+        MoveAction(slot_from=39, slot_to=5, quantity=1),
     ]
     images = []
     for action in actions:
         obs = env.step(action)
         images.append(obs["image"])
-    second_inv = [
-        {"type": "iron_ore", "slot": 45, "quantity": 1},
-        {"type": "cobblestone", "slot": 39, "quantity": 1},
-    ]
-    new_actions = [
-        {"move": [0, 0, 0]},
-    ]
+    second_inv = {
+        45: {"type": "iron_ore", "quantity": 1},
+        39: {"type": "cobblestone", "quantity": 1},
+    }
+    new_actions = [None]
     env.reset(new_inventory=second_inv)
     for action in new_actions:
         obs = env.step(action)

plancraft/evaluator.py CHANGED Viewed

@@ -34,6 +34,13 @@ class Evaluator:
     It is also responsible for early stopping and verifying the target object has been craft.
     Finally, it also saves the results of the evaluation and the images generated during the evaluation.
+    This evaluator is designed to work with a PlancraftBaseModel and a set of ActionHandlerBase instances.
+    It supports multimodal content format and image-based inventory.
+    Importantly, it tracks the history of the dialogue and the environment state to provide a trace of the model's actions.
+    If you would want a simpler interface that just wraps the environment and actions to evaluate a single Plancraft example, you should use the PlancraftGymWrapper class.
     """
     def __init__(
@@ -59,6 +66,7 @@ class Evaluator:
         self.resume = resume
         self.use_fasterrcnn = use_fasterrcnn
         self.generation_number = 0
         self.use_multimodal_content_format = use_multimodal_content_format
         self.use_images = use_images
         self.use_text_inventory = use_text_inventory
@@ -73,7 +81,14 @@ class Evaluator:
     def create_history(self) -> HistoryBase:
         """Create a new History instance with current configuration"""
-        return self.history_class(actions=self.actions, config=self.history_config)
+        return self.history_class(
+            actions=self.actions,
+            config=self.history_config,
+            resolution=self.resolution,
+            use_multimodal_content_format=self.use_multimodal_content_format,
+            use_images=self.use_images,
+            use_text_inventory=self.use_text_inventory,
+        )
     def save_results_dict(self, example: PlancraftExample, results_dict: dict):
         output_dir = f"{self.output_dir}/{self.generation_number}"
@@ -170,7 +185,7 @@ class Evaluator:
             content_list.append({"type": "image"})
         return {"content": content_list}
-    def _init_environment(self, example: PlancraftExample) -> tuple:
+    def _init_environment(self, example: PlancraftExample, model=None) -> tuple:
         """Initialize environment and history for an example"""
         environment = PlancraftEnvironment(
             inventory=deepcopy(example.slotted_inventory),
@@ -179,7 +194,7 @@ class Evaluator:
         history = self.create_history()
         obs = environment.step()
         obs["target"] = example.target
-        obs["message"] = self.convert_observation_to_message(obs)
+        obs["message"] = self.convert_observation_to_message(obs, model=model)
         return environment, history, obs
     def _process_model_output(
@@ -241,7 +256,7 @@ class Evaluator:
             "number_of_steps": history.num_steps,
             "model_trace": history.trace(),
             "example_id": example.id,
-            "images": history.images,
+            "images": history.trace_images(),
         }
     def eval_example(
@@ -249,7 +264,7 @@ class Evaluator:
         example: PlancraftExample,
         model: PlancraftBaseModel,
     ) -> dict:
-        environment, history, observation = self._init_environment(example)
+        environment, history, observation = self._init_environment(example, model=model)
         success = False
         while history.num_steps < self.max_steps:

plancraft/models/act.py CHANGED Viewed

@@ -6,8 +6,8 @@ from plancraft.models.bbox_model import IntegratedBoundingBoxModel
 from plancraft.models.generators import (
     OpenAIGenerator,
     TransformersGenerator,
+    VLLMGenerator,
 )
 from plancraft.utils import History
@@ -42,15 +42,21 @@ class ActModel(PlancraftBaseModel):
                 api_key=cfg.env_variables.openai_api_key,
             )
         else:
-            # model is transformers based
-            self.llm = TransformersGenerator(
-                model_name=cfg.plancraft.model,
-                tokenizer_name=cfg.plancraft.tokenizer,
-                quantize=cfg.plancraft.quantize,
-                use_hot_cache=cfg.plancraft.hot_cache,
-                adapter_name=cfg.plancraft.adapter,
-                hf_token=cfg.env_variables.hf_token,
-            )
+            # if adapter name is provided then use TransformersGenerator
+            if self.use_images or cfg.plancraft.adapter != "":
+                # model is transformers based
+                self.llm = TransformersGenerator(
+                    model_name=cfg.plancraft.model,
+                    tokenizer_name=cfg.plancraft.tokenizer,
+                    quantize=cfg.plancraft.quantize,
+                    use_hot_cache=cfg.plancraft.hot_cache,
+                    adapter_name=cfg.plancraft.adapter,
+                    hf_token=cfg.env_variables.hf_token,
+                    use_images=self.use_images,
+                )
+            else:
+                # use standard VLLM for text-only models
+                self.llm = VLLMGenerator(model_name=cfg.plancraft.model)
         self.max_messages_window = cfg.plancraft.max_message_window
         self.kv_cache = None

plancraft/models/bbox_model.py CHANGED Viewed

@@ -455,7 +455,7 @@ class IntegratedBoundingBoxModel(nn.Module, PyTorchModelHubMixin):
             preds = self.model(x)
             return preds
-    def get_inventory(self, pil_image, resolution="high") -> dict:
+    def get_inventory(self, pil_image, resolution="high", threshold=0.25) -> dict:
         """
         Predict boxes and quantities
         """
@@ -464,10 +464,12 @@ class IntegratedBoundingBoxModel(nn.Module, PyTorchModelHubMixin):
             img_tensor = img_tensor.cuda()
         with torch.no_grad():
             predictions = self.model(img_tensor.unsqueeze(0))
-        return self.prediction_to_inventory(predictions[0], resolution=resolution)
+        return self.prediction_to_inventory(
+            predictions[0], resolution=resolution, threshold=threshold
+        )
     @staticmethod
-    def prediction_to_inventory(prediction, threshold=0.9, resolution="high") -> dict:
+    def prediction_to_inventory(prediction, threshold=0.25, resolution="high") -> dict:
         inventory = {}
         seen_slots = set()
         for bbox, score, label, quantity in zip(
@@ -483,7 +485,7 @@ class IntegratedBoundingBoxModel(nn.Module, PyTorchModelHubMixin):
                 continue
             label = ALL_ITEMS[label.item()]
             quantity = quantity.item()
-            inventory["slot"] = {"type": label, "quantity": quantity}
+            inventory[slot] = {"type": label, "quantity": quantity}
             seen_slots.add(slot)
         return inventory

plancraft/models/generators.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import time
+import copy
 import torch
 from loguru import logger
@@ -6,12 +7,15 @@ from openai import OpenAI
 from PIL import Image
 from transformers import (
     AutoModelForCausalLM,
-    AutoModelForVision2Seq,
-    AutoProcessor,
     AutoTokenizer,
     BitsAndBytesConfig,
 )
-from transformers.cache_utils import DynamicCache
+try:
+    from vllm import LLM, SamplingParams
+    from vllm.lora.request import LoRARequest
+except ImportError:
+    logger.warning("vLLM not installed. Please install vLLM to use vLLM")
 from plancraft.models.utils import (
     get_downloaded_models,
@@ -28,13 +32,12 @@ class TransformersGenerator:
         tokenizer_name: str = "same",
         quantize=False,
         use_images=False,
-        use_hot_cache=True,
         adapter_name="",
         hf_token=None,
         **kwargs,
     ):
         self.model_name = model_name
-        self.use_hot_cache = use_hot_cache
+        # self.use_hot_cache = use_hot_cache
         self.hf_token = hf_token
         if tokenizer_name == "same":
@@ -45,57 +48,36 @@ class TransformersGenerator:
             model_name, quantize=quantize
         )
         self.processor = None
-        if "idefics" in model_name:
-            assert use_images, "Idefics model requires multimodal input"
-            self.tokenizer = AutoProcessor.from_pretrained(
-                tokenizer_name,
-                **model_kwargs,
-            )
-            self.tokenizer.eos_token_id = self.tokenizer.tokenizer.eos_token_id
-            logger.info("Loading model")
-            time_now = time.time()
-            self.model = AutoModelForVision2Seq.from_pretrained(
-                model_name,
-                device_map="auto",
-                **model_kwargs,
-            )
-            logger.info(f"Model loaded in {time.time() - time_now:.2f} seconds")
-            # set pad_token_id
-            if self.tokenizer.tokenizer.pad_token_id:
-                self.pad_token_id = self.tokenizer.tokenizer.pad_token_id
-            else:
-                self.pad_token_id = self.tokenizer.tokenizer.eos_token_id
-        else:
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_name,
+            token=self.hf_token,  # trust_remote_code=True
+            padding_side="left",  # ensure that the padding is on the left
+        )
+        logger.info("Loading model")
+        time_now = time.time()
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map="auto",
+            **model_kwargs,
+        )
+        logger.info(f"Model loaded in {time.time() - time_now:.2f} seconds")
+        # load OA adapter
+        if adapter_name != "":
+            logger.info(f"Loading adapter and tokenizer from {adapter_name}")
             self.tokenizer = AutoTokenizer.from_pretrained(
-                tokenizer_name,
-                token=self.hf_token,  # trust_remote_code=True
-                padding_side="left",  # ensure that the padding is on the left
-            )
-            logger.info("Loading model")
-            time_now = time.time()
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                device_map="auto",
-                **model_kwargs,
+                adapter_name,
+                padding_side="left",
             )
-            logger.info(f"Model loaded in {time.time() - time_now:.2f} seconds")
-            # load OA adapter
-            if adapter_name != "":
-                logger.info(f"Loading adapter and tokenizer from {adapter_name}")
-                self.tokenizer = AutoTokenizer.from_pretrained(
-                    adapter_name,
-                    padding_side="left",
-                )
-                self.model.resize_token_embeddings(len(self.tokenizer))
-                self.model.load_adapter(adapter_name)
-            # set pad_token_id
-            if self.tokenizer.pad_token_id:
-                self.pad_token_id = self.tokenizer.pad_token_id
-            else:
-                self.tokenizer.pad_token = self.tokenizer.eos_token
-                self.pad_token_id = self.tokenizer.eos_token_id
+            self.model.resize_token_embeddings(len(self.tokenizer))
+            self.model.load_adapter(adapter_name)
+        # set pad_token_id
+        if self.tokenizer.pad_token_id:
+            self.pad_token_id = self.tokenizer.pad_token_id
+        else:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.pad_token_id = self.tokenizer.eos_token_id
         # compile
         time_now = time.time()
@@ -111,54 +93,6 @@ class TransformersGenerator:
         self.past_key_values_kwargs = {}
         self.past_token_ids = None
-    def truncate_kv_cache(self, new_token_ids: torch.Tensor):
-        """
-        Truncate the key-value cache to the size which overlap the past_ids with the new_ids.
-        Uses:
-            past_ids: torch.Tensor [B, T]
-            new_ids: torch.Tensor [B, T]
-            kv_cache: tuple[tuple[torch.Tensor]]: tuple of key-value cache tensors
-        NOTE: this essentially implements System Prompt in the worst case when using batch_size==1
-        """
-        if (
-            self.past_token_ids is None
-            or "past_key_values" not in self.past_key_values_kwargs
-        ):
-            return
-        # caching doesn't seem to work with multimodal models
-        if self.use_images:
-            self.past_key_values_kwargs = {}
-            return
-        past_batch_size, past_seq_len = self.past_token_ids.shape
-        new_batch_size, new_seq_len = new_token_ids.shape
-        # If the batch size has changed, reset the cache
-        if past_batch_size != new_batch_size:
-            self.past_key_values_kwargs = {}
-            return
-        min_shape = min(past_seq_len, new_seq_len)
-        compare_past = (
-            self.past_token_ids[:, :min_shape] != new_token_ids[:, :min_shape]
-        )
-        # All tokens are the same - no need to truncate
-        if not compare_past.any():
-            return
-        # Find the first token that is different between the past and new tokens
-        seq_min = torch.argmax(compare_past.double(), dim=1).min()
-        # Truncate the key-value cache to the size which overlap the past_ids with the new_ids.
-        # assumes shape is [num_layers, num_heads, seq_len, hidden_size]
-        self.past_key_values_kwargs["past_key_values"] = [
-            [kv[:, :, :seq_min, :] for kv in kvs]
-            for kvs in self.past_key_values_kwargs["past_key_values"]
-        ]
     def build_model_kwargs(self, model_name: str, **kwargs) -> tuple[str, dict]:
         model_kwargs = {
             "token": self.hf_token,
@@ -255,20 +189,6 @@ class TransformersGenerator:
             k: v.to(self.model.device) for k, v in tokenized_messages.items()
         }
-        # Truncate the key-value cache
-        self.truncate_kv_cache(tokenized_messages["input_ids"])
-        if (
-            "past_key_values" in self.past_key_values_kwargs
-            and self.past_key_values_kwargs["past_key_values"][0][0].shape[-2]
-            > tokenized_messages["input_ids"].shape[-1]
-        ):
-            raise ValueError("Past key values are larger than the input_ids")
-        past_key_values = self.past_key_values_kwargs.get("past_key_values", None)
-        if past_key_values is not None:
-            past_key_values = DynamicCache.from_legacy_cache(past_key_values)
         generated_sequences = self.model.generate(
             **tokenized_messages,
             do_sample=True,
@@ -276,16 +196,7 @@ class TransformersGenerator:
             max_new_tokens=max_tokens,
             pad_token_id=self.pad_token_id,
             return_dict_in_generate=True,
-            use_cache=True,
-            past_key_values=past_key_values,
-            return_legacy_cache=True,
         )
-        # Cache the past key values
-        if self.use_hot_cache:
-            self.past_key_values_kwargs["past_key_values"] = (
-                generated_sequences.past_key_values
-            )
-        self.past_token_ids = generated_sequences.sequences
         # Decode the output
         text_responses = self.tokenizer.batch_decode(
@@ -301,6 +212,119 @@ class TransformersGenerator:
         return text_responses, total_tokens_used
+class VLLMGenerator:
+    def __init__(
+        self,
+        model_name: str,
+        adapter_name="",
+        **kwargs,
+    ):
+        self.model_name = model_name
+        # Initialize vLLM model
+        logger.info(f"Loading model {model_name} with vLLM")
+        time_now = time.time()
+        # Get downloaded models
+        downloaded_models = get_downloaded_models()
+        if model_name in downloaded_models:
+            model_name = downloaded_models[model_name]
+            logger.info(f"Using local model {model_name}")
+        self.llm = LLM(
+            model=model_name,
+            trust_remote_code=True,
+            tensor_parallel_size=torch.cuda.device_count(),
+            gpu_memory_utilization=0.95,
+            max_model_len=16384,
+            dtype=torch.bfloat16,
+            enable_lora=True if adapter_name != "" else False,
+        )
+        # Load adapter
+        self.lora_request = None
+        if adapter_name != "":
+            from huggingface_hub import snapshot_download
+            logger.info(f"Loading adapter from {adapter_name}")
+            lora_path = snapshot_download(repo_id=adapter_name)
+            self.lora_request = LoRARequest(
+                adapter_name,
+                lora_int_id=0,
+                lora_path=lora_path,
+            )
+        logger.info(f"Model loaded in {time.time() - time_now:.2f} seconds")
+    def reset(self):
+        # vLLM handles state automatically, no need to reset
+        pass
+    def prepare_messages(
+        self,
+        history: History,
+        max_messages_window: int,
+    ) -> tuple[list[dict], list]:
+        """
+        Prepare the messages using a history
+        """
+        message_window = history.dialogue_history[-max_messages_window:]
+        # remove the first assistant message if it is present
+        if len(message_window) > 0 and message_window[0]["role"] == "assistant":
+            message_window = message_window[1:]
+        # add the system prompt if the first message is not a system message
+        if len(message_window) > 0 and message_window[0]["role"] != "system":
+            message_window = [history.system_prompt_dialogue] + message_window
+        # vLLM doesn't use images
+        return message_window, []
+    @torch.inference_mode()
+    def generate_unconstrained(
+        self,
+        batch_messages: list[list[dict]],
+        max_tokens: int = 256,
+        temperature=0.6,
+        top_p=1.0,
+        frequency_penalty=0.0,
+        presence_penalty=0.0,
+        stop=["\n", "\n\n"],
+        **kwargs,
+    ) -> tuple[list[str], int]:
+        """
+        Generate unconstrained text based on the batch of messages using vLLM.
+        """
+        # Create sampling parameters for vLLM
+        sampling_params = SamplingParams(
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            stop=stop if isinstance(stop, list) else [stop] if stop else None,
+        )
+        # Generate completions with vLLM
+        outputs = self.llm.chat(
+            batch_messages,
+            sampling_params=sampling_params,
+            use_tqdm=False,
+            lora_request=self.lora_request,
+        )
+        # Extract responses
+        text_responses = []
+        total_tokens_used = 0
+        for output in outputs:
+            text_responses.append(output.outputs[0].text)
+            # Sum prompt tokens and output tokens for the total
+            total_tokens_used += len(output.prompt_token_ids) + len(
+                output.outputs[0].token_ids
+            )
+        return text_responses, total_tokens_used
 class OpenAIGenerator:
     def __init__(self, use_images=False, model_name="gpt-4o-mini", api_key=None):
         self.client = OpenAI(api_key=api_key)
@@ -327,6 +351,8 @@ class OpenAIGenerator:
             message_window = [history.system_prompt_dialogue] + message_window
         if self.use_images:
+            message_window = copy.deepcopy(message_window)
+            # copy the images to the history
             img_idx = -1
             seen_images = 0
             # iterate through the messages in reverse order to assign images

plancraft/models/utils.py CHANGED Viewed

@@ -39,10 +39,10 @@ def get_downloaded_models() -> dict:
     """
     downloaded_models = {}
     # known models on NFS partition
-    if pathlib.Path("/nfs").exists():
-        local_models = glob.glob("/nfs/public/hf/models/*/*")
+    if pathlib.Path("/public").exists():
+        local_models = glob.glob("/public/hf/models/*/*")
         downloaded_models = {
-            model.replace("/nfs/public/hf/models/", ""): model for model in local_models
+            model.replace("/public/hf/models/", ""): model for model in local_models
         }
     return downloaded_models

plancraft/simple.py ADDED Viewed

@@ -0,0 +1,186 @@
+import json
+import os
+from typing import Any
+from plancraft.config import PlancraftExample
+from plancraft.environment.actions import (
+    ActionHandlerBase,
+    MoveActionHandler,
+    SmeltActionHandler,
+    ImpossibleActionHandler,
+    StopAction,
+)
+from plancraft.environment.env import (
+    PlancraftEnvironment,
+    get_objective_str,
+    target_and_inventory_to_text_obs,
+)
+def get_plancraft_examples(split: str = "train") -> list[PlancraftExample]:
+    """
+    Load examples from the data directory
+    """
+    data_dir = os.path.join(os.path.dirname(__file__), "data")
+    with open(os.path.join(data_dir, f"{split}.json"), "r") as f:
+        examples = json.load(f)
+    return [PlancraftExample(**example) for example in examples]
+class PlancraftGymWrapper:
+    """
+    This wrapper class just wraps the environment and actions to evaluate a single example
+    This is useful if you want to bring your own agent/model to interact with the environment and not rely on the History class
+    and model class in the plancraft package.
+    """
+    def __init__(
+        self,
+        example: PlancraftExample,
+        actions: list[ActionHandlerBase] = [
+            MoveActionHandler(),
+            SmeltActionHandler(),
+            ImpossibleActionHandler(),
+        ],
+        max_steps: int = 30,
+        resolution: str = "high",
+        use_text_inventory: bool = True,
+    ):
+        self.actions = actions
+        self.max_steps = max_steps
+        # whether to convert the inventory to text observation
+        # if False, only the objective string is returned
+        self.use_text_inventory = use_text_inventory
+        self.current_step = 0
+        self.stopped = False
+        self.success = False
+        self.example = example
+        self.resolution = resolution
+        self.environment = PlancraftEnvironment(
+            example.slotted_inventory, resolution=self.resolution
+        )
+        if example.impossible:
+            assert "impossible" in [action.action_name for action in actions]
+    def check_done(self, inventory: dict, target: str):
+        """
+        Check that target object is obtained
+        """
+        for slot, item in inventory.items():
+            # ensure the target is in the inventory (not in slot 0)
+            if target == item["type"] and slot != 0:
+                return True
+        return False
+    def parse_raw_model_response(self, generated_text: str) -> str:
+        """
+        Given a message and set of action handlers, parse the content to return the action
+        or a message if the action is not valid/requires message response
+        """
+        for handler in self.actions:
+            match_output = handler.match(generated_text)
+            if match_output:
+                return match_output
+        action_names = [handler.action_name for handler in self.actions]
+        return f"Only select actions from the following: {', '.join(action_names)}"
+    def step(
+        self, action: str
+    ) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
+        """
+        Execute action and return next observation, reward, termination status, truncation status, and info
+        Returns:
+            observation: The environment observation after the action, observation is a dictionary with keys:
+                - text: The text observation (always present)
+                - inventory: The inventory after the action (if action was successful)
+                - target: The target object (if action was successful)
+                - image: The image observation (if action was successful)
+            reward: Reward for the current action (1.0 for success, 0.0 otherwise)
+            terminated: Whether the episode is done due to task completion or task failure
+            truncated: Whether the episode is done due to external limits (e.g. max steps reached)
+            info: Additional diagnostic information (helpful for debugging)
+        """
+        action = self.parse_raw_model_response(action)
+        self.current_step += 1
+        # Initialize return values
+        reward = 0.0
+        terminated = False
+        truncated = False
+        info = {"steps": self.current_step}
+        # Handle already stopped case
+        if self.stopped:
+            return (
+                {"text": "Plancraft environment is terminated"},
+                reward,
+                True,
+                True,
+                info,
+            )
+        # Handle max steps reached (truncate with no reward)
+        if self.current_step > self.max_steps:
+            self.success = False
+            truncated = True
+            info["reason"] = "max_steps_reached"
+            return (
+                {"text": f"Max steps ({self.max_steps}) reached"},
+                reward,
+                terminated,
+                truncated,
+                info,
+            )
+        # Handle stop action
+        if isinstance(action, StopAction):
+            self.stopped = True
+            terminated = True
+            #  success is True if example was truly impossible
+            self.success = self.example.impossible
+            if self.success:
+                reward = 1.0
+                info["reason"] = "correctly_identified_impossible"
+            else:
+                info["reason"] = "incorrect_stop"
+            observation = {
+                "text": "Plancraft environment is terminate due to stop action"
+            }
+        # Handle invalid action or non-env action
+        elif isinstance(action, str):
+            observation = self.environment.step()
+            observation["target"] = self.example.target
+            observation["text"] = action
+        # Handle regular action execution
+        # NOTE: if the action is valid but does not do anything
+        # the environment will return the same observation
+        else:
+            observation = self.environment.step(action)
+            observation["target"] = self.example.target
+            # Generate text observation
+            if self.use_text_inventory:
+                text = target_and_inventory_to_text_obs(
+                    target=self.example.target, inventory=observation["inventory"]
+                )
+            else:
+                text = get_objective_str(self.example.target)
+            observation["text"] = text
+            self.success = self.check_done(
+                observation["inventory"], self.example.target
+            )
+            # Set reward and termination for successful completion
+            if self.success:
+                reward = 1.0
+                terminated = True
+                self.stopped = True
+                info["reason"] = "success"
+        return observation, reward, terminated, truncated, info

plancraft/utils.py CHANGED Viewed

@@ -1,12 +1,7 @@
-import glob
-import pathlib
-from copy import copy
-from typing import Optional
 import abc
+from copy import copy
 from dataclasses import dataclass, field
-import torch
-from loguru import logger
+from typing import Optional
 from plancraft.environment.actions import ActionHandlerBase
 from plancraft.environment.prompts import (
@@ -170,7 +165,6 @@ class History(HistoryBase):
         self.initial_dialogue_length = len(self.dialogue_history)
         self.inventory_history = []
         self.tokens_used = 0
     def trace(self):
@@ -182,6 +176,10 @@ class History(HistoryBase):
             "tokens_used": copy(self.tokens_used),
         }
+    def trace_images(self):
+        # return only the images added after the initial dialogue
+        return self._images[len(self.prompt_images) :]
     @property
     def num_steps(self):
         return (len(self.dialogue_history) - self.initial_dialogue_length) // 2
@@ -193,31 +191,3 @@ class History(HistoryBase):
     @images.setter
     def images(self, value: list) -> None:
         self._images = value
-def get_downloaded_models() -> dict:
-    """
-    Get the list of downloaded models on the NFS partition (EIDF).
-    """
-    downloaded_models = {}
-    # known models on NFS partition
-    if pathlib.Path("/nfs").exists():
-        local_models = glob.glob("/nfs/public/hf/models/*/*")
-        downloaded_models = {
-            model.replace("/nfs/public/hf/models/", ""): model for model in local_models
-        }
-    return downloaded_models
-def get_torch_device() -> torch.device:
-    device = torch.device("cpu")
-    if torch.cuda.is_available():
-        device = torch.device("cuda", 0)
-    elif torch.backends.mps.is_available():
-        if not torch.backends.mps.is_built():
-            logger.info(
-                "MPS not available because the current PyTorch install was not built with MPS enabled."
-            )
-        else:
-            device = torch.device("mps")
-    return device

{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: plancraft
-Version: 0.3.33
+Version: 0.4.0
 Summary: Plancraft: an evaluation dataset for planning with LLM agents
 License: MIT License
@@ -24,7 +24,7 @@ License: MIT License
         OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         SOFTWARE.
 License-File: LICENSE
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Requires-Dist: hydra-core>=1.3.2
 Requires-Dist: imageio>=2.36.0
 Requires-Dist: loguru
@@ -50,12 +50,13 @@ Requires-Dist: seaborn; extra == 'full'
 Requires-Dist: torch>=2.5.0; extra == 'full'
 Requires-Dist: torchvision>=0.20.0; extra == 'full'
 Requires-Dist: transformers>=4.43.3; extra == 'full'
+Requires-Dist: vllm>=0.7.3; extra == 'full'
 Description-Content-Type: text/markdown
 # plancraft
 [![Test](https://github.com/gautierdag/plancraft/actions/workflows/test.yaml/badge.svg)](https://github.com/gautierdag/plancraft/actions/workflows/test.yaml)
-![Python Version](https://img.shields.io/badge/python-3.9+-blue)
+![Python Version](https://img.shields.io/badge/python-3.10+-blue)
 ![Ruff](https://img.shields.io/badge/linter-ruff-blue)
 [![PyPI Version](https://img.shields.io/pypi/v/plancraft)](https://pypi.org/project/plancraft/)
 [![Docker Pulls](https://img.shields.io/docker/pulls/gautierdag/plancraft)](https://hub.docker.com/r/gautierdag/plancraft)
@@ -64,7 +65,7 @@ Description-Content-Type: text/markdown
 [Paper](https://arxiv.org/abs/2412.21033) | [Website](https://gautierdag.github.io/plancraft/)
-Plancraft is a minecraft environment and agent that innovates on planning LLM agents with an oracle RAG retriever.
+Plancraft is a minecraft environment that benchmarks planning in LLM agents with an oracle RAG retriever.
 You can install the package by running the following command:
@@ -78,7 +79,6 @@ Or:
 uv add plancraft
 ```
 ![gif-example3](docs/images/train_images/TRAIN0010.gif)
 ![gif-example1](docs/images/train_images/TRAIN1133.gif)
 ![gif-example2](docs/images/train_images/TRAIN0383.gif)
@@ -88,7 +88,45 @@ The package provides a multimodal environment and dataset for evaluating plannin
 ## Usage
-The package provides a `PlancraftEnvironment` class that can be used to interact with the environment. Here is an example of how to use it:
+### Quick Start with PlancraftGymWrapper
+The package provides an `PlancraftGymWrapper` class that offers a simple interface for integrating your own agent with the Plancraft environment. This is the recommended way to get started if you want to use your own model implementation:
+```python
+from plancraft.simple import PlancraftGymWrapper, get_plancraft_examples
+# Load examples from the dataset
+examples = get_plancraft_examples(split="train")
+example = examples[0]  # Get the first example
+# Create the environment wrapper for this example
+env_wrapper = PlancraftGymWrapper(
+    example=example,
+    max_steps=30,
+    resolution="high",
+    use_text_inventory=True
+)
+# Simple agent loop
+# Initialize environment
+observation, reward, terminated, truncated, info = env_wrapper.step("")
+while not (terminated or truncated):
+    # Your agent decides the next action based on observation
+    action = your_agent_function(observation["text"])
+    # Execute action in environment
+    observation, reward, terminated, truncated, info = env_wrapper.step(action)
+    # Check if successful
+    if reward > 0:
+        print("Success!")
+```
+The `PlancraftGymWrapper` follows the standard Gym API format and simplifies the interaction with the environment. It doesn't rely on the `History` class or the `PlancraftBaseModel` interface, making it easier to integrate with your existing agent implementations.
+### PlancraftEnvironment
+For lower-level control, you can use the `PlancraftEnvironment` class directly:
 ```python
 from plancraft.environments.env import PlancraftEnvironment
@@ -118,23 +156,32 @@ def main():
 Note that the environment is deterministic and stateful, so the same action will always lead to the same observation and the environment will keep track of the state of the inventory.
-### Evaluator
+### Advanced Usage: Evaluator
-The package also provides an `Evaluator` class that can be used to evaluate the performance of an agent on our specific dataset. Here is an example of how to use it:
+For more advanced use cases, the package provides an `Evaluator` class for systematic evaluation of models on our dataset. Note that using the Evaluator requires following specific assumptions about model structure and history tracking:
 ```python
 from plancraft.evaluator import Evaluator
-def main():
-    # create model -- Note you can create your own model by subclassing PlancraftBaseModel
-    model = get_model("dummy")
-    # Create the evaluator
-    evaluator = Evaluator(run_name="dummy", model=model)
-    # Evaluate the agent
-    evaluator.eval_all_examples()
+from plancraft.models.base import PlancraftBaseModel
+# Create a model by subclassing PlancraftBaseModel
+class MyModel(PlancraftBaseModel):
+    def step(self, observation, dialogue_history):
+        # Your model implementation
+        pass
+    def reset(self):
+        # Reset model state
+        pass
+# Create the evaluator with your model
+model = MyModel()
+evaluator = Evaluator(run_name="my_experiment")
+# Evaluate the agent
+results = evaluator.eval_all_examples(model=model)
 ```
-The evaluator class handles the environment loop and model interaction. The environment is created based on the configuration and the examples are loaded from the dataset. The `Evaluator` uses the dataset examples and initializes the environment with the example's inventory. It is also responsible for early stopping and verifying the target object has been craft. Finally, it also saves the results of the evaluation and the images generated during the evaluation.
+The `Evaluator` class handles the environment loop and model interaction. It is responsible for early stopping, verifying task completion, and saving results and images generated during evaluation.
 #### The Evaluator interactive loop
@@ -203,11 +250,13 @@ The observation returned by the `PlancraftEnvironment` class is a dictionary wit
 The observation returned by the `Evaluator` class is a dictionary with the following keys: `inventory`, `image`, `message`, and `target`. The `message` key contains a string representing the environment formatted in text (we follow the annotation scheme described in our paper). The `target` key contains a string representing the target object to be crafted.
+When using `PlancraftGymWrapper`, the observation contains at minimum a `text` key with the text observation, and may include `inventory`, `target`, and `image` keys depending on the action result.
 ### Implementing a Model
-To implement a model, you need to subclass the `PlancraftBaseModel` class and implement the `step` and `reset` method. See the `plancraft.models.dummy` module for an example of how to implement a basic model.
+To implement a model for use with the `Evaluator`, you need to subclass the `PlancraftBaseModel` class and implement the `step` and `reset` method. See the `plancraft.models.dummy` module for an example of how to implement a basic model.
-You should then be able to use the `Evaluator` class to evaluate it.
+For use with `PlancraftGymWrapper`, you can implement any agent function that processes the observation and returns an action string.
 ## Reproducing the Results tables in the paper
@@ -215,7 +264,7 @@ To reproduce the results tables in the paper, you can use the `exps.sh` script i
 ## Docker
-There is a docker image built to incorporate the latest code and its dependencies. I build it by running the following command:
+There is a docker image built to incorporate the latest code and its dependencies. It's built by running the following command:
 ```bash
 docker buildx build --platform linux/amd64,linux/arm64 -t gautierdag/plancraft --push .
@@ -223,16 +272,6 @@ docker buildx build --platform linux/amd64,linux/arm64 -t gautierdag/plancraft -
 The image is available on [Docker Hub](https://hub.docker.com/r/gautierdag/plancraft). Note that, unlike the package, the docker image includes everything in the repo.
-## To Do
-Non-exhaustive list of things to do from highest to lowest priority:
-- [ ] Add minecraft wiki scrape and non-oracle search for pages
-- [ ] Improve planner to bring closer to optimal (the oracle planner does not consider  future crafting steps when moving items -- see paper for more details)
-- [ ] Rerun image models with better bounding box model
-  - [ ] Track bounding box accuracy
-- [ ] Implement a version of the image environment entirely on cuda/pytorch rather than cpu
 ## PRs Welcomed
 If you would like to contribute to the project, please feel free to open a PR. I am happy to review and merge PRs that improve the project. If you have any questions, feel free to create an issue or reach out to me directly.

{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,9 @@
 plancraft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-plancraft/config.py,sha256=ShsFRlJ7plsl3ToD9fiO_4LDQuXdbjNV6Xp6o3Yk2Yg,4315
-plancraft/evaluator.py,sha256=mxzvbGpEDkiKW8u79QgYz5Q4wnZvkQSXiAvi0OVu4Qs,14754
+plancraft/config.py,sha256=oyn8I_k0Slh-Nyg2javomFertZ5ZHiY_ndAVqfJYQvQ,4010
+plancraft/evaluator.py,sha256=UQujiltf88rCnbNwoglM5tJe5gW9XASew-jLaEbtJZo,15525
 plancraft/generate_dataset.py,sha256=DlrU-PmvWqSNJD1g1-8Lpb8n3N-Ogw3rje1nrRzjGKs,2382
-plancraft/utils.py,sha256=VhnxMihh6pRhNjQTK5HDc0FYWmF9_EcQyRP_a7fbIZA,7156
+plancraft/simple.py,sha256=QlXsCd6n5lIaehSFjeBlxTm40FuGCGHPEEsuGIMEJqk,6745
+plancraft/utils.py,sha256=hCE1oQ-77Me39Vo-sCL7iZPdO-WWYZnBjP41lZWRi20,6339
 plancraft/data/test.json,sha256=4jWfYMAVuZCFmGB4iZJAjlh9_8jXECdaGp8xn7_tAM4,1317131
 plancraft/data/test.small.easy.json,sha256=5NZEJ2PqIgmHQecJOIVQyM1D6GFKyJq7GVmgRudaqQk,189304
 plancraft/data/test.small.json,sha256=eULAG1rdolRMXPrecV-7YoDIheKGyIT5MVpWdISV0wg,270089
@@ -15,7 +16,7 @@ plancraft/environment/actions.py,sha256=Pub21caxM5iZ9IaX-ny1-xxr_peJIwwV_QAx3BVS
 plancraft/environment/env.py,sha256=A4532st7JFBYBF_Nh0CEEi3ZTLJAeaB3t9PAIVSemj0,16390
 plancraft/environment/items.py,sha256=Z9rhSyVDEoHF1pxRvhyiT94tyQJaWHi3wUHVcamz82o,221
 plancraft/environment/planner.py,sha256=uIOJjIoyT_4pxeWeTKb8BkLJyKZG0-AMoEOkZs6Ua9A,19340
-plancraft/environment/prompts.py,sha256=8QXclX0ygpL02uZichE1AVkbdn_0HGteD5bzo0FZGOU,6947
+plancraft/environment/prompts.py,sha256=NU9YHAz3id-IgaukQvEi5uLlpEstpE5_Hccvvq1At2Y,6950
 plancraft/environment/recipes.py,sha256=0vwzOU86eZmGN2EpZVSIvzxpx0AOBWNPxTtAOFBN2A0,19570
 plancraft/environment/sampler.py,sha256=79hLpTU0ajvMPoBsvSe8tE88x31c8Vlczb3tJZJcau0,7441
 plancraft/environment/search.py,sha256=z31eEwQBY7WJaYVBEEwulFS8P3h1Nwo1Th9BaCTxk5M,2085
@@ -1912,15 +1913,15 @@ plancraft/environment/tags/wooden_stairs.json,sha256=GCr2_5UGPMYZECqQ_5NYSvbwuwt
 plancraft/environment/tags/wooden_trapdoors.json,sha256=DbjfwoHJL8VuYWV61A1uDqW7LJsGlOP4eoxcGIQVYr4,303
 plancraft/environment/tags/wool.json,sha256=Z59l4mdPztVZBFaglJ4mV9H2OnyCVzhqQRi2dduak78,496
 plancraft/models/__init__.py,sha256=TBrarn93qt4IFJRNqtzOfaA8jGMPCgD7DFs-M84ipmk,510
-plancraft/models/act.py,sha256=6Xb8rylg3OngOraVFgduH_hQR62VcoyTeFntN4q3hsQ,2691
+plancraft/models/act.py,sha256=e5YZ1hre_5CZ-tSpWTZ-6AQ0RLVGd0QuKetXfLaTqW0,3077
 plancraft/models/base.py,sha256=S8EdkqWpn8nE1WcrqDoA4Hx4p52qEttGxnqjIPWvl3Q,852
-plancraft/models/bbox_model.py,sha256=3b1IEspoHiVUR6GOWjEbp4YoxRhGkzKt-eOiwaN8NXo,17091
+plancraft/models/bbox_model.py,sha256=D1fOhYuy7ohCqqRRgxEO6N89B7v4CILfrMACpvooHiQ,17149
 plancraft/models/dummy.py,sha256=_NUTviv5ye6KGzODRt0Zykk8shsek0QBqWCeZW3ldSQ,1495
-plancraft/models/generators.py,sha256=F76_iPiqxUjDIrQwF58tzM0bLM91OkZJ0sBqBuki5wY,13939
+plancraft/models/generators.py,sha256=7COMLjjx_HbTWJqINNLqqExQv7gLikfLTViacAdSt5M,13963
 plancraft/models/oracle.py,sha256=f-0KWlBuHy6wcxmDsxM3MQ_QwfBstzfbA26mlk1MgLA,1657
-plancraft/models/utils.py,sha256=E-sZohvolWgGbpHQKgAgkgIfUJoVnT5pMt6JP8xLHKg,4034
+plancraft/models/utils.py,sha256=xgkP5jqCeFfkKe3Xd4ZYfTqiEJ-dA-qgFAC-J35ub3E,4029
 plancraft/train/dataset.py,sha256=oFqEd4LG9oEQ-71teh0Wf7-jJbtybT2ZibfM2bBdBkM,5474
-plancraft-0.3.33.dist-info/METADATA,sha256=PWko_VcNKDQCx-4HTseZWqiRQMhOYXlzvCK13OFCQ78,11148
-plancraft-0.3.33.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-plancraft-0.3.33.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
-plancraft-0.3.33.dist-info/RECORD,,
+plancraft-0.4.0.dist-info/METADATA,sha256=Tt3DlKXtDxZ0M6s2zlEXydCB5dmxkeKI80wao62e-z4,12391
+plancraft-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+plancraft-0.4.0.dist-info/licenses/LICENSE,sha256=YGR8ehDB4t-T-lOQKMfKNR-2zsOU7E3E5NA8t25HKE0,1070
+plancraft-0.4.0.dist-info/RECORD,,

{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{plancraft-0.3.33.dist-info → plancraft-0.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

plancraft 0.3.33__py3-none-any.whl → 0.4.0__py3-none-any.whl

plancraft 0.3.33py3-none-any.whl → 0.4.0py3-none-any.whl