PyPI - graspzero - Versions diffs - 0.1.0__py3-none-any.whl - Mend

graspzero 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

graspzero/__init__.py +3 -0
graspzero/model.py +68 -0
graspzero/predictor.py +87 -0
graspzero-0.1.0.dist-info/METADATA +16 -0
graspzero-0.1.0.dist-info/RECORD +7 -0
graspzero-0.1.0.dist-info/WHEEL +5 -0
graspzero-0.1.0.dist-info/top_level.txt +1 -0

graspzero/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .predictor import GraspPredictor
+__version__ = "0.1.0"
+__all__ = ["GraspPredictor"]

graspzero/model.py ADDED Viewed

@@ -0,0 +1,68 @@
+# model.py — exact architecture matching your checkpoint
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import Dinov2Model
+class LoRALinear(nn.Module):
+    def __init__(self, original, rank=8):
+        super().__init__()
+        self.original = original
+        self.lora_A = nn.Linear(original.in_features, rank, bias=False)
+        self.lora_B = nn.Linear(rank, original.out_features, bias=False)
+    def forward(self, x):
+        return self.original(x) + self.lora_B(self.lora_A(x))
+class AffordanceDecoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.block1 = nn.Sequential(
+            nn.Conv2d(384, 256, 3, padding=1),
+            nn.BatchNorm2d(256), nn.ReLU()
+        )
+        self.block2 = nn.Sequential(
+            nn.Conv2d(256, 128, 3, padding=1),
+            nn.BatchNorm2d(128), nn.ReLU()
+        )
+        self.block3 = nn.Sequential(
+            nn.Conv2d(128, 64, 3, padding=1),
+            nn.BatchNorm2d(64), nn.ReLU()
+        )
+        self.block4 = nn.Sequential(
+            nn.Conv2d(64, 32, 3, padding=1),
+            nn.BatchNorm2d(32), nn.ReLU()
+        )
+        self.output_head = nn.Conv2d(32, 1, 1)
+    def forward(self, x):
+        x = F.interpolate(self.block1(x), scale_factor=2,
+                          mode='bilinear', align_corners=False)
+        x = F.interpolate(self.block2(x), scale_factor=2,
+                          mode='bilinear', align_corners=False)
+        x = F.interpolate(self.block3(x), scale_factor=2,
+                          mode='bilinear', align_corners=False)
+        x = F.interpolate(self.block4(x), scale_factor=2,
+                          mode='bilinear', align_corners=False)
+        return self.output_head(x)
+class GraspZeroModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.backbone = Dinov2Model.from_pretrained(
+            "facebook/dinov2-small"
+        )
+        for layer_idx in range(8, 12):
+            layer = self.backbone.encoder.layer[layer_idx]
+            attn  = layer.attention.attention
+            attn.query = LoRALinear(attn.query)
+            attn.key   = LoRALinear(attn.key)
+            attn.value = LoRALinear(attn.value)
+        self.decoder = AffordanceDecoder()
+    def forward(self, x):
+        outputs = self.backbone(x)
+        patch_tokens = outputs.last_hidden_state[:, 1:, :]
+        B, N, C = patch_tokens.shape
+        H = W = int(N ** 0.5)
+        patch_tokens = patch_tokens.permute(0, 2, 1).reshape(B, C, H, W)
+        return self.decoder(patch_tokens)

graspzero/predictor.py ADDED Viewed

@@ -0,0 +1,87 @@
+# predictor.py — main user-facing class
+import torch
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+from huggingface_hub import hf_hub_download
+from .model import GraspZeroModel
+WEIGHTS_REPO = "Jignesh2619/graspzero"
+WEIGHTS_FILE = "graspzero_weights.pt"
+class GraspPredictor:
+    def __init__(self, device=None):
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = device
+        print("Loading GraspZero model...")
+        weights_path = hf_hub_download(
+            repo_id=WEIGHTS_REPO,
+            filename=WEIGHTS_FILE
+        )
+        self.model = GraspZeroModel()
+        ckpt = torch.load(weights_path, map_location="cpu")
+        state = ckpt["model"] if "model" in ckpt else ckpt
+        self.model.load_state_dict(state)
+        self.model.eval().to(self.device)
+        print(f"Ready on {self.device}")
+        self.transform = transforms.Compose([
+            transforms.Resize((518, 518)),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            )
+        ])
+    def predict(self, image):
+        """
+        Args:
+            image: file path (str) or PIL Image or numpy array
+        Returns:
+            dict with keys:
+              grasp_x, grasp_y  — pixel coords in original image space
+              confidence        — float 0-1
+              mask              — numpy array (H, W) probability map
+        """
+        # Load image
+        if isinstance(image, str):
+            img = Image.open(image).convert("RGB")
+        elif isinstance(image, np.ndarray):
+            img = Image.fromarray(image).convert("RGB")
+        else:
+            img = image.convert("RGB")
+        orig_w, orig_h = img.size
+        # Inference
+        x = self.transform(img).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            pred  = self.model(x)
+            mask  = torch.sigmoid(pred).squeeze().cpu().numpy()
+        # Resize mask back to original image size
+        mask = np.array(
+            Image.fromarray((mask * 255).astype(np.uint8))
+            .resize((orig_w, orig_h))
+        ) / 255.0
+        # Grasp point = center of mass of high-confidence region
+        confidence = float(mask.max())
+        if confidence > 0.3:
+            high = mask > (confidence * 0.7)
+            ys, xs = np.where(high)
+            grasp_x = int(xs.mean())
+            grasp_y = int(ys.mean())
+        else:
+            grasp_x = orig_w // 2
+            grasp_y = orig_h // 2
+        return {
+            "grasp_x":    grasp_x,
+            "grasp_y":    grasp_y,
+            "confidence": confidence,
+            "mask":       mask
+        }

graspzero-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,16 @@
+Metadata-Version: 2.4
+Name: graspzero
+Version: 0.1.0
+Summary: Zero-shot robotic grasping — no demos, no training
+Author: Jignesh
+Requires-Python: >=3.9
+Requires-Dist: torch>=2.0.0
+Requires-Dist: torchvision>=0.15.0
+Requires-Dist: transformers>=4.35.0
+Requires-Dist: huggingface_hub>=0.19.0
+Requires-Dist: Pillow>=9.0.0
+Requires-Dist: numpy>=1.24.0
+Dynamic: author
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary

graspzero-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+graspzero/__init__.py,sha256=njuImq59Uz6tO2oxlSuZ9E7FbRoLtpI6NZqaPIhLxc8,90
+graspzero/model.py,sha256=RGlDBVH8FVM3i-XnhNlatN5_rlfEU0PaEeoSFqQGyVk,2638
+graspzero/predictor.py,sha256=5H3CmSvcvD7ZC41R7foFuC0i63qYizH13NVl0ryn908,2872
+graspzero-0.1.0.dist-info/METADATA,sha256=yamyw-Y3xIJWkenXroLzpumcIZUDzkZHblnV5LUScog,447
+graspzero-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+graspzero-0.1.0.dist-info/top_level.txt,sha256=szyqy2z25Yylo3zO66bWx_jlUUIrpzjmDFkcxGw07BU,10
+graspzero-0.1.0.dist-info/RECORD,,

graspzero-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

graspzero-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ graspzero