PyPI - plancraft - Versions diffs - 0.1.0__py3-none-any.whl - Mend

plancraft 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

environments/__init__.py +0 -0
environments/actions.py +218 -0
environments/env_real.py +315 -0
environments/env_symbolic.py +215 -0
environments/items.py +10 -0
environments/planner.py +109 -0
environments/recipes.py +542 -0
environments/sampler.py +224 -0
models/__init__.py +21 -0
models/act.py +184 -0
models/base.py +152 -0
models/bbox_model.py +492 -0
models/dummy.py +54 -0
models/few_shot_images/__init__.py +16 -0
models/generators.py +483 -0
models/oam.py +284 -0
models/oracle.py +268 -0
models/prompts.py +158 -0
models/react.py +98 -0
models/utils.py +289 -0
plancraft-0.1.0.dist-info/LICENSE +21 -0
plancraft-0.1.0.dist-info/METADATA +53 -0
plancraft-0.1.0.dist-info/RECORD +26 -0
plancraft-0.1.0.dist-info/WHEEL +5 -0
plancraft-0.1.0.dist-info/top_level.txt +3 -0
train/dataset.py +187 -0

models/bbox_model.py ADDED Viewed

@@ -0,0 +1,492 @@
+import einops
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms.v2 as v2
+from huggingface_hub import PyTorchModelHubMixin
+from plancraft.environments.items import ALL_ITEMS
+from torchvision.models.detection.faster_rcnn import (
+    fasterrcnn_resnet50_fpn_v2,
+    ResNet50_Weights,
+)
+from torchvision.models.detection.roi_heads import (
+    fastrcnn_loss,
+    keypointrcnn_inference,
+    keypointrcnn_loss,
+    maskrcnn_inference,
+    maskrcnn_loss,
+)
+from torchvision.ops import boxes as box_ops
+def slot_to_bbox(slot: int):
+    # crafting slot
+    if slot == 0:
+        # slot size: 25x25
+        # top left corner: (x= 118, y=30)
+        box_size = 25
+        left_x = 117
+        top_y = 29
+    # crafting grid
+    elif slot < 10:
+        # slot size: 18x18
+        # top left corner: (x = 28 + 18 * col, y = 16 + 18 * row)
+        box_size = 18
+        row = (slot - 1) // 3
+        col = (slot - 1) % 3
+        left_x = 27 + (box_size * col)
+        top_y = 15 + (box_size * row)
+    # inventory
+    elif slot < 37:
+        # slot size: 18x18
+        # top left corner: (x= 6 + 18 * col, y=83 + 18 * row)
+        box_size = 18
+        row = (slot - 10) // 9
+        col = (slot - 10) % 9
+        left_x = 5 + (box_size * col)
+        top_y = 82 + (box_size * row)
+    # hotbar
+    else:
+        # slot size: 18x18
+        # top left corner: (x= 6 + 18 * col, y=141)
+        box_size = 18
+        col = (slot - 37) % 9
+        left_x = 5 + (box_size * col)
+        top_y = 140
+    return [left_x, top_y, left_x + box_size, top_y + box_size]
+def precompute_slot_bboxes():
+    """Precompute the bounding boxes for all slots."""
+    slot_bboxes = {}
+    for slot in range(46):  # Assuming slot indices range from 0 to 45
+        slot_bboxes[slot] = slot_to_bbox(slot)
+    return slot_bboxes
+# Precompute all slot bounding boxes
+IDX_TO_BBOX = precompute_slot_bboxes()
+def postprocess_detections_custom(
+    self,
+    class_logits,
+    quantity_logits,
+    box_features,
+    box_regression,
+    proposals,
+    image_shapes,
+):
+    device = class_logits.device
+    num_classes = class_logits.shape[-1]
+    boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
+    pred_boxes = self.box_coder.decode(box_regression, proposals)
+    pred_scores = F.softmax(class_logits, -1)
+    pred_quantity = F.softmax(quantity_logits, -1).argmax(dim=-1)
+    # repeat the quantities, once for each class
+    pred_quantity = einops.repeat(
+        pred_quantity, "n -> n c", c=num_classes, n=pred_quantity.shape[0]
+    )
+    pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
+    pred_scores_list = pred_scores.split(boxes_per_image, 0)
+    pred_quantity_list = pred_quantity.split(boxes_per_image, 0)
+    pred_features_list = box_features.split(boxes_per_image, 0)
+    all_boxes = []
+    all_scores = []
+    all_labels = []
+    all_quantity_labels = []
+    all_features = []
+    for boxes, scores, quantities, features, image_shape in zip(
+        pred_boxes_list,
+        pred_scores_list,
+        pred_quantity_list,
+        pred_features_list,
+        image_shapes,
+    ):
+        boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
+        # create labels for each prediction
+        labels = torch.arange(num_classes, device=device)
+        labels = labels.view(1, -1).expand_as(scores)
+        box_idxs = (
+            torch.arange(boxes.size(0), device=device).view(-1, 1).expand_as(labels)
+        )
+        # remove predictions with the background label
+        boxes = boxes[:, 1:]
+        scores = scores[:, 1:]
+        labels = labels[:, 1:]
+        quantities = quantities[:, 1:]
+        box_idxs = box_idxs[:, 1:]
+        # batch everything, by making every class prediction be a separate instance
+        boxes = boxes.reshape(-1, 4)
+        scores = scores.reshape(-1)
+        labels = labels.reshape(-1)
+        quantities = quantities.reshape(-1)
+        box_idxs = box_idxs.reshape(-1)
+        # remove low scoring boxes
+        inds = torch.where(scores > self.score_thresh)[0]
+        boxes, scores, labels, quantities, box_idxs = (
+            boxes[inds],
+            scores[inds],
+            labels[inds],
+            quantities[inds],
+            box_idxs[inds],
+        )
+        # remove empty boxes
+        keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
+        boxes, scores, labels, quantities, box_idxs = (
+            boxes[keep],
+            scores[keep],
+            labels[keep],
+            quantities[keep],
+            box_idxs[keep],
+        )
+        # non-maximum suppression, independently done per class
+        keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
+        # keep only topk scoring predictions
+        keep = keep[: self.detections_per_img]
+        boxes, scores, labels, quantities, box_idxs = (
+            boxes[keep],
+            scores[keep],
+            labels[keep],
+            quantities[keep],
+            box_idxs[keep],
+        )
+        all_boxes.append(boxes)
+        all_scores.append(scores)
+        all_labels.append(labels)
+        all_quantity_labels.append(quantities)
+        all_features.append(features[box_idxs])
+    return all_boxes, all_scores, all_labels, all_quantity_labels, all_features
+def forward_custom(
+    self,
+    features,
+    proposals,
+    image_shapes,
+    targets=None,
+):
+    training = False
+    if targets is not None:
+        training = True
+        for t in targets:
+            floating_point_types = (torch.float, torch.double, torch.half)
+            if t["boxes"].dtype not in floating_point_types:
+                raise TypeError(
+                    f"target boxes must of float type, instead got {t['boxes'].dtype}"
+                )
+            if not t["labels"].dtype == torch.int64:
+                raise TypeError(
+                    f"target labels must of int64 type, instead got {t['labels'].dtype}"
+                )
+            if self.has_keypoint():
+                if not t["keypoints"].dtype == torch.float32:
+                    raise TypeError(
+                        f"target keypoints must of float type, instead got {t['keypoints'].dtype}"
+                    )
+    if training:
+        proposals, matched_idxs, labels, regression_targets = (
+            self.select_training_samples(proposals, targets)
+        )
+    else:
+        labels = None
+        regression_targets = None
+        matched_idxs = None
+    box_features = self.box_roi_pool(features, proposals, image_shapes)
+    box_features = self.box_head(box_features)
+    class_logits, box_regression = self.box_predictor(box_features)
+    result = []
+    losses = {}
+    if training:
+        if labels is None:
+            raise ValueError("labels cannot be None")
+        if regression_targets is None:
+            raise ValueError("regression_targets cannot be None")
+        loss_classifier, loss_box_reg = fastrcnn_loss(
+            class_logits, box_regression, labels, regression_targets
+        )
+        # custom addition to calculate quantity loss
+        dtype = proposals[0].dtype
+        gt_boxes = [t["boxes"].to(dtype) for t in targets]
+        gt_labels = [t["quantity_labels"] for t in targets]
+        _, quantity_labels = self.assign_targets_to_proposals(
+            proposals, gt_boxes, gt_labels
+        )
+        quantity_labels = torch.cat(quantity_labels, dim=0)
+        # needs quantity_prediction layer to be added to class
+        quantity_preds = self.quantity_prediction(box_features)
+        loss_classsifier_quantity = F.cross_entropy(
+            quantity_preds,
+            quantity_labels,
+        )
+        losses = {
+            "loss_classifier": loss_classifier,
+            "loss_box_reg": loss_box_reg,
+            "loss_classifier_quantity": loss_classsifier_quantity,
+        }
+    else:
+        quantity_logits = self.quantity_prediction(box_features)
+        boxes, scores, labels, quantities, features = postprocess_detections_custom(
+            self,
+            class_logits,
+            quantity_logits,
+            box_features,
+            box_regression,
+            proposals,
+            image_shapes,
+        )
+        num_images = len(boxes)
+        for i in range(num_images):
+            result.append(
+                {
+                    "boxes": boxes[i],
+                    "labels": labels[i],
+                    "scores": scores[i],
+                    "quantities": quantities[i],
+                    "features": features[i],
+                }
+            )
+    if self.has_mask():
+        mask_proposals = [p["boxes"] for p in result]
+        if training:
+            if matched_idxs is None:
+                raise ValueError("if in training, matched_idxs should not be None")
+            # during training, only focus on positive boxes
+            num_images = len(proposals)
+            mask_proposals = []
+            pos_matched_idxs = []
+            for img_id in range(num_images):
+                pos = torch.where(labels[img_id] > 0)[0]
+                mask_proposals.append(proposals[img_id][pos])
+                pos_matched_idxs.append(matched_idxs[img_id][pos])
+        else:
+            pos_matched_idxs = None
+        if self.mask_roi_pool is not None:
+            mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
+            mask_features = self.mask_head(mask_features)
+            mask_logits = self.mask_predictor(mask_features)
+        else:
+            raise Exception("Expected mask_roi_pool to be not None")
+        loss_mask = {}
+        if training:
+            if targets is None or pos_matched_idxs is None or mask_logits is None:
+                raise ValueError(
+                    "targets, pos_matched_idxs, mask_logits cannot be None when training"
+                )
+            gt_masks = [t["masks"] for t in targets]
+            gt_labels = [t["labels"] for t in targets]
+            rcnn_loss_mask = maskrcnn_loss(
+                mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs
+            )
+            loss_mask = {"loss_mask": rcnn_loss_mask}
+        else:
+            labels = [r["labels"] for r in result]
+            masks_probs = maskrcnn_inference(mask_logits, labels)
+            for mask_prob, r in zip(masks_probs, result):
+                r["masks"] = mask_prob
+        losses.update(loss_mask)
+    # keep none checks in if conditional so torchscript will conditionally
+    # compile each branch
+    if (
+        self.keypoint_roi_pool is not None
+        and self.keypoint_head is not None
+        and self.keypoint_predictor is not None
+    ):
+        keypoint_proposals = [p["boxes"] for p in result]
+        if training:
+            # during training, only focus on positive boxes
+            num_images = len(proposals)
+            keypoint_proposals = []
+            pos_matched_idxs = []
+            if matched_idxs is None:
+                raise ValueError("if in trainning, matched_idxs should not be None")
+            for img_id in range(num_images):
+                pos = torch.where(labels[img_id] > 0)[0]
+                keypoint_proposals.append(proposals[img_id][pos])
+                pos_matched_idxs.append(matched_idxs[img_id][pos])
+        else:
+            pos_matched_idxs = None
+        keypoint_features = self.keypoint_roi_pool(
+            features, keypoint_proposals, image_shapes
+        )
+        keypoint_features = self.keypoint_head(keypoint_features)
+        keypoint_logits = self.keypoint_predictor(keypoint_features)
+        loss_keypoint = {}
+        if training:
+            if targets is None or pos_matched_idxs is None:
+                raise ValueError(
+                    "both targets and pos_matched_idxs should not be None when in training mode"
+                )
+            gt_keypoints = [t["keypoints"] for t in targets]
+            rcnn_loss_keypoint = keypointrcnn_loss(
+                keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs
+            )
+            loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
+        else:
+            if keypoint_logits is None or keypoint_proposals is None:
+                raise ValueError(
+                    "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
+                )
+            keypoints_probs, kp_scores = keypointrcnn_inference(
+                keypoint_logits, keypoint_proposals
+            )
+            for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
+                r["keypoints"] = keypoint_prob
+                r["keypoints_scores"] = kps
+        losses.update(loss_keypoint)
+    return result, losses
+def calculate_iou(boxA, boxB):
+    """Calculate Intersection over Union (IoU) between two bounding boxes."""
+    # Determine the coordinates of the intersection rectangle
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+    # Compute the area of intersection
+    interArea = max(0, xB - xA) * max(0, yB - yA)
+    # Compute the area of both the bounding boxes
+    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
+    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
+    # Compute the IoU
+    iou = interArea / float(boxAArea + boxBArea - interArea)
+    return iou
+def bbox_to_slot_index_iou(bbox: tuple[int, int, int, int]) -> int:
+    """Assign the given bounding box to the slot with the highest IoU."""
+    best_slot = None
+    best_iou = -1
+    # Iterate through all precomputed slot bounding boxes
+    for slot, slot_bbox in IDX_TO_BBOX.items():
+        iou = calculate_iou(bbox, slot_bbox)
+        if iou > best_iou:
+            best_iou = iou
+            best_slot = slot
+    return best_slot
+class IntegratedBoundingBoxModel(nn.Module, PyTorchModelHubMixin):
+    """
+    Custom mask rcnn model with quantity prediction
+    Also returns the feature vectors of the detected boxes
+    """
+    def __init__(self, load_resnet_weights=False):
+        super(IntegratedBoundingBoxModel, self).__init__()
+        weights = None
+        if load_resnet_weights:
+            weights = ResNet50_Weights.DEFAULT
+        self.model = fasterrcnn_resnet50_fpn_v2(
+            weights_backbone=weights,
+            image_mean=[0.63, 0.63, 0.63],
+            image_std=[0.21, 0.21, 0.21],
+            min_size=128,
+            max_size=256,
+            num_classes=len(ALL_ITEMS),
+            box_score_thresh=0.05,
+            rpn_batch_size_per_image=64,
+            box_detections_per_img=64,
+            box_batch_size_per_image=128,
+        )
+        self.model.roi_heads.quantity_prediction = nn.Linear(1024, 65)
+        # replace the head with leaky activations
+        self.model.roi_heads.forward = forward_custom.__get__(
+            self.model.roi_heads, type(self.model.roi_heads)
+        )
+        self.transform = v2.Compose(
+            [v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]
+        )
+    def forward(self, x, targets=None):
+        if self.training:
+            # normal forward pass
+            loss_dict = self.model(x, targets)
+            return loss_dict
+        else:
+            preds = self.model(x)
+            return preds
+    def get_inventory(self, pil_image):
+        """
+        Predict boxes and quantities
+        """
+        img_tensor = self.transform(pil_image)
+        if next(self.model.parameters()).is_cuda:
+            img_tensor = img_tensor.cuda()
+        with torch.no_grad():
+            predictions = self.model(img_tensor.unsqueeze(0))
+        return self.prediction_to_inventory(predictions[0])
+    @staticmethod
+    def prediction_to_inventory(prediction, threshold=0.9) -> list[dict]:
+        inventory = []
+        seen_slots = set()
+        for i in range(len(prediction["boxes"])):
+            slot = bbox_to_slot_index_iou(prediction["boxes"][i])
+            score = prediction["scores"][i]
+            label_idx = prediction["labels"][i].item()
+            label = ALL_ITEMS[label_idx]
+            quantity = prediction["quantities"][i].item()
+            if score > threshold:
+                if slot in seen_slots:
+                    continue
+                inventory.append({"slot": slot, "type": label, "quantity": quantity})
+        return inventory
+    def freeze(self):
+        # NOTE: this might seem excessive
+        # but transformers trainer is really good at enabling gradients against my will
+        self.eval()
+        self.model.eval()
+        self.training = False
+        for param in self.model.parameters():
+            param.requires_grad = False
+        self.model.training = False
+        self.model.roi_heads.training = False
+        self.model.rpn.training = False
+    def save(self, path: str):
+        torch.save(self.state_dict(), path)

models/dummy.py ADDED Viewed

@@ -0,0 +1,54 @@
+import random
+from plancraft.config import EvalConfig
+from plancraft.environments.actions import (
+    RealActionInteraction,
+    SymbolicMoveAction,
+    SymbolicSmeltAction,
+)
+from plancraft.models.base import ABCModel, History
+class DummyModel(ABCModel):
+    """
+    Dummy model returns actions that do random action
+    """
+    def __init__(self, cfg: EvalConfig):
+        self.symbolic_move_action = cfg.plancraft.environment.symbolic_action_space
+        self.history = History(objective="")
+    def random_select(self, observation):
+        if observation is None or "inventory" not in observation:
+            return SymbolicMoveAction(slot_from=0, slot_to=0, quantity=1)
+        # randomly pick an item from the inventory
+        item_indices = set()
+        for item in observation["inventory"]:
+            if item["quantity"] > 0:
+                item_indices.add(item["index"])
+        all_slots_to = set(range(1, 46))
+        empty_slots = all_slots_to - item_indices
+        random_slot_from = random.choice(list(item_indices))
+        random_slot_to = random.choice(list(empty_slots))
+        return SymbolicMoveAction(
+            slot_from=random_slot_from, slot_to=random_slot_to, quantity=1
+        )
+    def step(
+        self, observation: dict
+    ) -> list[SymbolicMoveAction | RealActionInteraction | SymbolicSmeltAction]:
+        # add observation to history
+        self.history.add_observation_to_history(observation)
+        # get action
+        if self.symbolic_move_action:
+            action = self.random_select(observation)
+        else:
+            action = RealActionInteraction()
+        # add action to history
+        self.history.add_action_to_history(action)
+        return action

models/few_shot_images/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+import os
+import glob
+import numpy as np
+import imageio
+def get_few_shot_images_path():
+    return os.path.dirname(__file__)
+def load_prompt_images() -> list[np.ndarray]:
+    current_dir = get_few_shot_images_path()
+    files = glob.glob(os.path.join(current_dir, "*.png"))
+    images = [imageio.imread(file) for file in files]
+    return images