PyPI - plancraft - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

plancraft 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

plancraft/__init__.py +0 -0
plancraft/config.py +155 -0
plancraft/environments/__init__.py +0 -0
plancraft/environments/actions.py +218 -0
plancraft/environments/env_real.py +316 -0
plancraft/environments/env_symbolic.py +212 -0
plancraft/environments/items.py +10 -0
plancraft/environments/planner.py +109 -0
plancraft/environments/recipes.py +542 -0
plancraft/environments/sampler.py +224 -0
plancraft/evaluator.py +273 -0
plancraft/models/__init__.py +21 -0
plancraft/models/act.py +184 -0
plancraft/models/base.py +152 -0
plancraft/models/bbox_model.py +492 -0
plancraft/models/dummy.py +54 -0
plancraft/models/few_shot_images/__init__.py +16 -0
plancraft/models/generators.py +480 -0
plancraft/models/oam.py +283 -0
plancraft/models/oracle.py +265 -0
plancraft/models/prompts.py +158 -0
plancraft/models/react.py +93 -0
plancraft/models/utils.py +289 -0
plancraft/train/dataset.py +187 -0
plancraft/utils.py +84 -0
{plancraft-0.1.2.dist-info → plancraft-0.1.3.dist-info}/METADATA +1 -1
plancraft-0.1.3.dist-info/RECORD +30 -0
plancraft-0.1.3.dist-info/top_level.txt +1 -0
plancraft-0.1.2.dist-info/RECORD +0 -5
plancraft-0.1.2.dist-info/top_level.txt +0 -1
{plancraft-0.1.2.dist-info → plancraft-0.1.3.dist-info}/LICENSE +0 -0
{plancraft-0.1.2.dist-info → plancraft-0.1.3.dist-info}/WHEEL +0 -0

plancraft/models/prompts.py ADDED Viewed

@@ -0,0 +1,158 @@
+from plancraft.models.utils import gold_search_recipe
+VALID_ACTIONS = ["move", "smelt", "think", "search", "impossible"]
+ACTIONS_DESCRIPTIONS = {
+    "move": {
+        "description": "Transfer a specific quantity of an item from one slot to another",
+        "format": "`move: from [Source] to [Target] with quantity N`",
+    },
+    "smelt": {
+        "description": "Smelt an item in a furnace and moves the output to a specific slot",
+        "format": "`smelt: from [Source] to [Target] with quantity N`",
+    },
+    "think": {
+        "description": "Generate thoughts to help you decide on the next action",
+        "format": "`think: <thought message>`",
+    },
+    "search": {
+        "description": "Search for a recipe to craft a specific item",
+        "format": "`search: <recipe name>`",
+    },
+    "impossible": {
+        "description": "Stop task if it is certain that it is impossible with given inventory",
+        "format": "`impossible: <reason>`",
+    },
+}
+BASE_SYSTEM_PROMPT = """You are crafting in Minecraft. You need to decide on the next action.
+Crafting Grid: The crafting table is organized into a 3x3 grid. Each slot in the grid has a unique identifier:
+    - Top row: [A1] [A2] [A3]
+    - Middle row: [B1] [B2] [B3]
+    - Bottom row: [C1] [C2] [C3]
+The output of the crafting process is placed in a designated output slot labeled [0] You cannot move or smelt items directly into slot [0]
+Inventory Slots: The remaining inventory slots (outside of the crafting grid) are used for storing items. These slots are labeled as [I1] to [I36]"""
+BASE_SYSTEM_PROMPT_EXAMPLE = """Example:
+    - `move: from [I2] to [A1] with quantity 3`
+    - `smelt: from [I5] to [I6] with quantity 1`
+Constraints:
+   - You cannot move or smelt items into [0]
+   - If an item is not in slot [0] then the recipe is incorrect
+   - You need to move items from [0] to a free inventory slot to complete the crafting process"""
+def get_system_prompt(actions: list[str]):
+    assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
+    assert "move" in actions, "move should be one of the actions"
+    assert "smelt" in actions, "smelt should be one of the actions"
+    descriptions = ""
+    for action in actions:
+        descriptions += f"\n\t- {action}: {ACTIONS_DESCRIPTIONS[action]['description']}"
+    output_format = ""
+    for action in actions:
+        output_format += f"\n\t- {ACTIONS_DESCRIPTIONS[action]['format']}"
+    return f"{BASE_SYSTEM_PROMPT}\n\nActions:{descriptions}\n\nFormat{output_format}\n\n{BASE_SYSTEM_PROMPT_EXAMPLE}"
+CRAFTING_STEPS = [
+    "Craft an item of type: andesite\ninventory:\n - diorite [I18] quantity 1\n - cobblestone [I30] quantity 1",
+    "Craft an item of type: andesite\ninventory:\n - diorite [B1] quantity 1\n - cobblestone [I30] quantity 1",
+    "Craft an item of type: andesite\ninventory:\n - andesite [0] quantity 1\n - diorite [B1] quantity 1\n - cobblestone [B2] quantity 1",
+    "Craft an item of type: iron_ingot\ninventory:\n - iron_ore [I36] quantity 1\n - cobblestone [I30] quantity 1",
+]
+BASE_ACTION_STEPS = [
+    "move: from [I18] to [B1] with quantity 1",
+    "move: from [I30] to [B2] with quantity 1",
+    "move: from [0] to [I6] with quantity 1",
+    "smelt: from [I36] to [I35] with quantity 1",
+]
+THINK_STEPS = [
+    "think: To solve this task I need to craft andesite using 1 diorite and 1 cobblestone side by side.",
+    "think: Now I need to move the cobblestone into position [B2] to be right of the diorite.",
+    "think: Now I can craft the andesite by moving it from the craft slot [0] to a free inventory slot.",
+    "think: To craft an iron_ingot, I need to smelt iron_ore into an empty slot.",
+]
+SEARCH_STEPS = [
+    "search: andesite",
+    None,
+    None,
+    "search: iron_ingot",
+]
+def get_prompt_example(
+    actions: list[str],
+    use_text_inventory=True,
+    use_multimodal_content_format=False,
+    use_images=False,
+) -> list[dict]:
+    assert set(actions).issubset(VALID_ACTIONS), f"Invalid actions: {actions}"
+    assert "move" in actions, "move should be one of the actions"
+    assert "smelt" in actions, "smelt should be one of the actions"
+    if use_images:
+        assert (
+            use_multimodal_content_format
+        ), "use_images requires use_multimodal_content_format"
+    example_dialogue = []
+    for i, step in enumerate(CRAFTING_STEPS):
+        text = step
+        if not use_text_inventory:
+            text = text.split("\ninventory:\n")[0]
+        example_dialogue.append({"role": "user", "content": text})
+        if "search" in actions and SEARCH_STEPS[i]:
+            example_dialogue.append({"role": "assistant", "content": SEARCH_STEPS[i]})
+            search_target = text.split("seach: ")[-1].strip()
+            search_response = gold_search_recipe(search_target)
+            example_dialogue.append({"role": "user", "content": search_response})
+        if "think" in actions:
+            example_dialogue.append({"role": "assistant", "content": THINK_STEPS[i]})
+            example_dialogue.append({"role": "user", "content": "Ok"})
+        example_dialogue.append({"role": "assistant", "content": BASE_ACTION_STEPS[i]})
+    if not use_multimodal_content_format:
+        return example_dialogue
+    # convert to multimodal dialogue
+    multimodal_dialogue = []
+    for message in example_dialogue:
+        if "Craft an item" in message["content"]:
+            content_list = [
+                {
+                    "type": "text",
+                    "text": message["content"],
+                }
+            ]
+            if use_images:
+                content_list.append(
+                    {
+                        "type": "image",
+                    }
+                )
+            multimodal_dialogue.append(
+                {"role": message["role"], "content": content_list}
+            )
+        else:
+            multimodal_dialogue.append(
+                {
+                    "role": message["role"],
+                    "content": [
+                        {"type": "text", "text": message["content"]},
+                    ],
+                }
+            )
+    return multimodal_dialogue

plancraft/models/react.py ADDED Viewed

@@ -0,0 +1,93 @@
+from dotenv import load_dotenv
+from plancraft.config import EvalConfig
+from plancraft.environments.actions import (
+    NoOp,
+    SymbolicAction,
+)
+from plancraft.models.act import ActModel
+from plancraft.models.utils import (
+    convert_observation_to_message,
+    parse_content_response,
+)
+load_dotenv()
+class ReactModel(ActModel):
+    """
+    Model that does action with interleaved thinking step
+    """
+    def __init__(self, cfg: EvalConfig):
+        super().__init__(cfg)
+        self.max_invalid_actions = 3
+    def step(self, observation: dict) -> SymbolicAction:
+        # override the step method in ActModel to force thinking step
+        self.history.add_observation_to_history(observation)
+        observation_message = convert_observation_to_message(
+            observation,
+            objective=self.history.objective,
+            bbox_model=self.bbox_model,
+            oam_model="oam" in self.llm.model_name,
+            use_text_inventory=self.use_text_inventory,
+            use_multimodal_content_format=self.use_multimodal_content_format,
+            use_images=self.use_images,
+        )
+        # add observation to history
+        self.history.add_message_to_history(content=observation_message, role="user")
+        i = 0
+        while i < self.max_invalid_actions:
+            message_window, image_window = self.llm.prepare_messages(
+                history=self.history,
+                max_messages_window=self.max_messages_window,
+                system_prompt=self.system_prompt,
+                prompt_images=self.prompt_images,
+            )
+            think_messages, think_token_used = self.llm.generate_unconstrained(
+                batch_messages=[message_window],
+                images=[image_window],
+                start_messages_generation="think:",
+            )
+            self.history.tokens_used += think_token_used
+            think_message = "think: " + think_messages[0].split("\n")[0].strip()
+            self.history.add_message_to_history(content=think_message, role="assistant")
+            # retrieve new message window (with thinking prompt)
+            message_window, image_window = self.llm.prepare_messages(
+                history=self.history,
+                max_messages_window=self.max_messages_window,
+                system_prompt=self.system_prompt,
+                prompt_images=self.prompt_images,
+            )
+            action_messages, action_token_used = self.llm.generate_unconstrained(
+                batch_messages=[message_window],
+                images=[image_window],
+                start_messages_generation="",
+            )
+            self.history.tokens_used += action_token_used
+            action_message = action_messages[0].split("\n")[0].strip()
+            self.history.add_message_to_history(
+                content=action_message, role="assistant"
+            )
+            response = parse_content_response(
+                action_message, valid_actions=self.valid_actions
+            )
+            if not isinstance(response, str):
+                # valid action
+                self.history.add_action_to_history(response)
+                return response
+            self.history.add_message_to_history(
+                content=response,
+            )
+            i += 1
+        # default move action
+        return NoOp()

plancraft/models/utils.py ADDED Viewed

@@ -0,0 +1,289 @@
+import base64
+import glob
+import io
+import pathlib
+import re
+import numpy as np
+import torch
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from plancraft.environments.actions import (
+    StopAction,
+    SymbolicAction,
+    SymbolicMoveAction,
+    SymbolicSmeltAction,
+    convert_from_slot_index,
+)
+from plancraft.environments.recipes import RECIPES
+def numpy_to_base64(img_array: np.ndarray, image_format: str = "PNG") -> str:
+    """
+    Convert a NumPy array to a base64 encoded string.
+    Parameters:
+    - img_array: np.ndarray - Input image array.
+    - image_format: str - The format to save the image in (e.g., "PNG", "JPEG").
+    Returns:
+    - str - Base64 encoded string of the image.
+    """
+    # Convert NumPy array to image
+    image = Image.fromarray(img_array)
+    # Save the image to a bytes buffer
+    buffered = io.BytesIO()
+    image.save(buffered, format=image_format)
+    # Encode the bytes to a base64 string
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return img_str
+def get_downloaded_models() -> dict:
+    """
+    Get the list of downloaded models on the NFS partition (EIDF).
+    """
+    downloaded_models = {}
+    # known models on NFS partition
+    if pathlib.Path("/nfs").exists():
+        local_models = glob.glob("/nfs/public/hf/models/*/*")
+        downloaded_models = {
+            model.replace("/nfs/public/hf/models/", ""): model for model in local_models
+        }
+    return downloaded_models
+class TrieNode:
+    def __init__(self):
+        self.children = {}
+        self.is_end_of_sequence = False
+class Trie:
+    def __init__(self):
+        self.root = TrieNode()
+        self.longest_sequence_length = 0
+    def insert(self, sequence: list):
+        node = self.root
+        for num in sequence:
+            if num not in node.children:
+                node.children[num] = TrieNode()
+            node = node.children[num]
+        node.is_end_of_sequence = True
+        if len(sequence) > self.longest_sequence_length:
+            self.longest_sequence_length = len(sequence)
+    def starts_with(self, prefix: list) -> bool:
+        node = self.root
+        for num in prefix:
+            if num not in node.children:
+                return False
+            node = node.children[num]
+        return True
+    def get_next(self, prefix: list) -> list:
+        node = self.root
+        for num in prefix:
+            if num not in node.children:
+                return []
+            node = node.children[num]
+        return list(node.children.keys())
+def tokenize(
+    model: AutoModelForCausalLM,
+    tokenizer: AutoTokenizer,
+    batch_messages: list[list[dict]],
+    start_messages_generation: list[str],
+    max_tokens=256,
+    images=None,
+) -> dict[str, torch.Tensor]:
+    """
+    Tokenize a list of messages and start the response message
+    """
+    assert len(start_messages_generation) == len(
+        batch_messages
+    ), "Length of start_messages_generation should be equal to batch_messages"
+    message_texts = tokenizer.apply_chat_template(
+        batch_messages,
+        add_generation_prompt=True,
+        tokenize=False,
+    )
+    # add the start of the response message for each message
+    message_texts = [
+        messages_text + new_message_start
+        for (messages_text, new_message_start) in zip(
+            message_texts, start_messages_generation
+        )
+    ]
+    max_prompt_length = None
+    # need to truncate if max_length is set
+    if model.generation_config.max_length > max_tokens:
+        max_prompt_length = model.generation_config.max_length - max_tokens
+    if images:
+        assert len(images) == len(
+            batch_messages
+        ), "Length of images should be equal to batch_messages"
+        tokenized_messages = tokenizer(
+            message_texts,
+            return_tensors="pt",
+            truncation=True,
+            max_length=max_prompt_length,
+            padding=True,
+            images=images,
+        )
+    else:
+        tokenized_messages = tokenizer(
+            message_texts,
+            return_tensors="pt",
+            truncation=True,
+            max_length=max_prompt_length,
+            padding=True,
+        )
+    return tokenized_messages
+def objective_and_inventory_to_str(objective: str, inventory: list[dict]) -> str:
+    inventory_str = ""
+    for item in inventory:
+        if item["quantity"] > 0:
+            if "index" in item:
+                slot = item["index"]
+            else:
+                slot = item["slot"]
+            if isinstance(slot, int):
+                slot = convert_from_slot_index(slot)
+        inventory_str += f"\n - {item['type']} {slot} quantity {item['quantity']}"
+    return f"{objective}\ninventory:{inventory_str}"
+def convert_observation_to_message(
+    observation: dict,
+    objective: str,
+    bbox_model=None,
+    oam_model=False,
+    use_text_inventory=True,
+    use_multimodal_content_format=False,
+    use_images=False,
+) -> str | dict:
+    """
+    Convert an observation to a message format
+    Parameters:
+    - observation: dict - The observation to convert.
+    - objective: str - The objective of the observation.
+    - bbox_model: Optional - The bounding box model to use.
+    - oam_model: bool - Whether to use the OAM model.
+    - use_text_inventory: bool - Whether to use text inventory.
+    - use_multimodal_content_format: bool - Whether to use multimodal content format.
+    - use_images: bool - Whether to append an image to the message content - must be used with use_multimodal_content_format.
+    """
+    if bbox_model is not None:
+        # convert to tensor
+        inventory = bbox_model.get_inventory(observation["pov"].copy())
+        text_content = objective_and_inventory_to_str(
+            objective, sorted(inventory, key=lambda x: x["slot"])
+        )
+    elif oam_model:
+        text_content = f"{objective}\ninventory:\n"
+    elif not use_text_inventory:
+        text_content = objective
+    else:
+        # if not multimodal, we only have text - we just dump a JSON of the inventory
+        inventory = []
+        for o in observation["inventory"]:
+            if o["quantity"] > 0:
+                inventory.append(
+                    {
+                        "type": o["type"],
+                        "slot": convert_from_slot_index(o["index"]),
+                        "quantity": o["quantity"],
+                    }
+                )
+        text_content = objective_and_inventory_to_str(objective, inventory)
+    if not use_multimodal_content_format:
+        return text_content
+    content_list = [{"type": "text", "text": text_content}]
+    if use_images:
+        content_list.append({"type": "image"})
+    return {"content": content_list}
+def gold_search_recipe(recipe_name: str) -> str:
+    """
+    Gold search recipe for the given observation and action
+    """
+    if recipe_name not in RECIPES:
+        return "Could not find a recipe by that name."
+    out_string = f"Recipes to craft {recipe_name}:\n"
+    for i, r in enumerate(RECIPES[recipe_name]):
+        if r.recipe_type != "smelting":
+            # sample a valid input grid (note that this is not guaranteed to be the only valid grid)
+            input_crafting_grid = r.sample_input_crafting_grid()
+            recipe_instructions = ""
+            for item in input_crafting_grid:
+                recipe_instructions += (
+                    f"{item['type']} at {convert_from_slot_index(item['slot'])}\n"
+                )
+        else:
+            # smelting recipe
+            recipe_instructions = f"smelt {r.ingredient}\n"
+        out_string += f"recipe {i+1}:\n{recipe_instructions}"
+    return out_string
+def parse_content_response(
+    content: str, valid_actions: list[str] = ["smelt", "move"]
+) -> str | SymbolicAction | StopAction:
+    """
+    Given a message and set of valid actions, parse the content to return the action
+    or a message if the action is not valid/requires message response
+    """
+    action_match = re.search(f"({'|'.join(valid_actions)}):", content)
+    if action_match:
+        action = action_match.group(1)
+        if action == "think":
+            return "Ok"
+        elif action == "impossible":
+            reason = re.search(r"impossible: (.*)", content).group(1)
+            return StopAction(reason=reason)
+        elif action == "search":
+            search_target = re.search(r"search: (\w+)", content).group(1)
+            return gold_search_recipe(search_target)
+        else:
+            try:
+                slot_from = re.search(r" from (\[[ABCI]?\d+\])", content).group(1)
+                slot_to = re.search(r" to (\[[ABCI]?\d+\])", content).group(1)
+                quantity = re.search(r"with quantity (\d+)", content).group(1)
+                if action == "move":
+                    action = SymbolicMoveAction(
+                        slot_from=slot_from,
+                        slot_to=slot_to,
+                        quantity=quantity,
+                    )
+                else:
+                    action = SymbolicSmeltAction(
+                        slot_from=slot_from,
+                        slot_to=slot_to,
+                        quantity=quantity,
+                    )
+                return action
+            except AttributeError as e:
+                return f"Format Error: {e}"
+    return f"Only select actions from the following: {', '.join(valid_actions)}"

plancraft 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

plancraft 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl