PyPI - dora-sam2 - Versions diffs - 0.0.0__tar.gz → 0.3.10rc1__tar.gz - Mend

dora-sam2 0.0.0tar.gz → 0.3.10rc1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{dora_sam2-0.0.0 → dora_sam2-0.3.10rc1}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.2
 Name: dora-sam2
-Version: 0.0.0
+Version: 0.3.10rc1
 Summary: dora-sam2
 Author-email: Your Name <email@email.com>
 License: MIT
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
-Requires-Dist: dora-rs>=0.3.6
+Requires-Dist: dora-rs>=0.3.10rc1
 Requires-Dist: huggingface-hub>=0.29.0
 Requires-Dist: opencv-python>=4.11.0.86
 Requires-Dist: sam2>=1.1.0

dora_sam2-0.3.10rc1/dora_sam2/main.py ADDED Viewed

@@ -0,0 +1,208 @@
+import cv2
+import numpy as np
+import pyarrow as pa
+import torch
+from dora import Node
+from PIL import Image
+from sam2.sam2_image_predictor import SAM2ImagePredictor
+predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-large")
+def main():
+    pa.array([])  # initialize pyarrow array
+    node = Node()
+    frames = {}
+    last_pred = None
+    labels = None
+    return_type = pa.Array
+    image_id = None
+    for event in node:
+        event_type = event["type"]
+        if event_type == "INPUT":
+            event_id = event["id"]
+            if "image" in event_id:
+                storage = event["value"]
+                metadata = event["metadata"]
+                encoding = metadata["encoding"]
+                width = metadata["width"]
+                height = metadata["height"]
+                if (
+                    encoding == "bgr8"
+                    or encoding == "rgb8"
+                    or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]
+                ):
+                    channels = 3
+                    storage_type = np.uint8
+                else:
+                    error = f"Unsupported image encoding: {encoding}"
+                    raise RuntimeError(error)
+                if encoding == "bgr8":
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
+                elif encoding == "rgb8":
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
+                elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+                    storage = storage.to_numpy()
+                    frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
+                else:
+                    raise RuntimeError(f"Unsupported image encoding: {encoding}")
+                image = Image.fromarray(frame)
+                frames[event_id] = image
+                # TODO: Fix the tracking code for SAM2.
+                continue
+                if last_pred is not None:
+                    with (
+                        torch.inference_mode(),
+                        torch.autocast(
+                            "cuda",
+                            dtype=torch.bfloat16,
+                        ),
+                    ):
+                        predictor.set_image(frames[image_id])
+                        new_logits = []
+                        new_masks = []
+                        if len(last_pred.shape) < 3:
+                            last_pred = np.expand_dims(last_pred, 0)
+                        for mask in last_pred:
+                            mask = np.expand_dims(mask, 0)  # Make shape: 1x256x256
+                            masks, _, new_logit = predictor.predict(
+                                mask_input=mask,
+                                multimask_output=False,
+                            )
+                            if len(masks.shape) == 4:
+                                masks = masks[:, 0, :, :]
+                            else:
+                                masks = masks[0, :, :]
+                            masks = masks > 0
+                            new_masks.append(masks)
+                            new_logits.append(new_logit)
+                            ## Mask to 3 channel image
+                        last_pred = np.concatenate(new_logits, axis=0)
+                        masks = np.concatenate(new_masks, axis=0)
+                        match return_type:
+                            case pa.Array:
+                                node.send_output(
+                                    "masks",
+                                    pa.array(masks.ravel()),
+                                    metadata={
+                                        "image_id": image_id,
+                                        "width": frames[image_id].width,
+                                        "height": frames[image_id].height,
+                                    },
+                                )
+                            case pa.StructArray:
+                                node.send_output(
+                                    "masks",
+                                    pa.array(
+                                        [
+                                            {
+                                                "masks": masks.ravel(),
+                                                "labels": event["value"]["labels"],
+                                            }
+                                        ]
+                                    ),
+                                    metadata={
+                                        "image_id": image_id,
+                                        "width": frames[image_id].width,
+                                        "height": frames[image_id].height,
+                                    },
+                                )
+            elif "boxes2d" in event_id:
+                if isinstance(event["value"], pa.StructArray):
+                    boxes2d = event["value"][0].get("bbox").values.to_numpy()
+                    labels = (
+                        event["value"][0]
+                        .get("labels")
+                        .values.to_numpy(zero_copy_only=False)
+                    )
+                    return_type = pa.Array
+                else:
+                    boxes2d = event["value"].to_numpy()
+                    labels = None
+                    return_type = pa.Array
+                metadata = event["metadata"]
+                encoding = metadata["encoding"]
+                if encoding != "xyxy":
+                    raise RuntimeError(f"Unsupported boxes2d encoding: {encoding}")
+                boxes2d = boxes2d.reshape(-1, 4)
+                image_id = metadata["image_id"]
+                with (
+                    torch.inference_mode(),
+                    torch.autocast(
+                        "cuda",
+                        dtype=torch.bfloat16,
+                    ),
+                ):
+                    predictor.set_image(frames[image_id])
+                    masks, _scores, last_pred = predictor.predict(
+                        box=boxes2d, point_labels=labels, multimask_output=False
+                    )
+                    if len(masks.shape) == 4:
+                        masks = masks[:, 0, :, :]
+                        last_pred = last_pred[:, 0, :, :]
+                    else:
+                        masks = masks[0, :, :]
+                        last_pred = last_pred[0, :, :]
+                    masks = masks > 0
+                    ## Mask to 3 channel image
+                    match return_type:
+                        case pa.Array:
+                            node.send_output(
+                                "masks",
+                                pa.array(masks.ravel()),
+                                metadata={
+                                    "image_id": image_id,
+                                    "width": frames[image_id].width,
+                                    "height": frames[image_id].height,
+                                },
+                            )
+                        case pa.StructArray:
+                            node.send_output(
+                                "masks",
+                                pa.array(
+                                    [
+                                        {
+                                            "masks": masks.ravel(),
+                                            "labels": event["value"]["labels"],
+                                        }
+                                    ]
+                                ),
+                                metadata={
+                                    "image_id": image_id,
+                                    "width": frames[image_id].width,
+                                    "height": frames[image_id].height,
+                                },
+                            )
+        elif event_type == "ERROR":
+            print("Event Error:" + event["error"])
+if __name__ == "__main__":
+    main()

{dora_sam2-0.0.0 → dora_sam2-0.3.10rc1}/dora_sam2.egg-info/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.2
 Name: dora-sam2
-Version: 0.0.0
+Version: 0.3.10rc1
 Summary: dora-sam2
 Author-email: Your Name <email@email.com>
 License: MIT
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
-Requires-Dist: dora-rs>=0.3.6
+Requires-Dist: dora-rs>=0.3.10rc1
 Requires-Dist: huggingface-hub>=0.29.0
 Requires-Dist: opencv-python>=4.11.0.86
 Requires-Dist: sam2>=1.1.0

{dora_sam2-0.0.0 → dora_sam2-0.3.10rc1}/dora_sam2.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,4 @@
-dora-rs>=0.3.6
+dora-rs>=0.3.10rc1
 huggingface-hub>=0.29.0
 opencv-python>=4.11.0.86
 sam2>=1.1.0

{dora_sam2-0.0.0 → dora_sam2-0.3.10rc1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dora-sam2"
-version = "0.0.0"
+version = "0.3.10-rc1"
 authors = [{ name = "Your Name", email = "email@email.com" }]
 description = "dora-sam2"
 license = { text = "MIT" }
@@ -8,7 +8,7 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
-    "dora-rs >= 0.3.6",
+    "dora-rs >= 0.3.10rc1",
     "huggingface-hub>=0.29.0",
     "opencv-python>=4.11.0.86",
     "sam2>=1.1.0",

dora_sam2-0.0.0/dora_sam2/main.py DELETED Viewed

@@ -1,94 +0,0 @@
-import cv2
-import numpy as np
-import pyarrow as pa
-import torch
-from dora import Node
-from PIL import Image
-from sam2.sam2_image_predictor import SAM2ImagePredictor
-predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-large")
-def main():
-    pa.array([])  # initialize pyarrow array
-    node = Node()
-    frames = {}
-    for event in node:
-        event_type = event["type"]
-        if event_type == "INPUT":
-            event_id = event["id"]
-            if "image" in event_id:
-                storage = event["value"]
-                metadata = event["metadata"]
-                encoding = metadata["encoding"]
-                width = metadata["width"]
-                height = metadata["height"]
-                if (
-                    encoding == "bgr8"
-                    or encoding == "rgb8"
-                    or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]
-                ):
-                    channels = 3
-                    storage_type = np.uint8
-                else:
-                    error = f"Unsupported image encoding: {encoding}"
-                    raise RuntimeError(error)
-                if encoding == "bgr8":
-                    frame = (
-                        storage.to_numpy()
-                        .astype(storage_type)
-                        .reshape((height, width, channels))
-                    )
-                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
-                elif encoding == "rgb8":
-                    frame = (
-                        storage.to_numpy()
-                        .astype(storage_type)
-                        .reshape((height, width, channels))
-                    )
-                elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
-                    storage = storage.to_numpy()
-                    frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
-                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
-                else:
-                    raise RuntimeError(f"Unsupported image encoding: {encoding}")
-                image = Image.fromarray(frame)
-                frames[event_id] = image
-            elif "boxes2d" in event_id:
-                boxes2d = event["value"].to_numpy()
-                metadata = event["metadata"]
-                encoding = metadata["encoding"]
-                if encoding != "xyxy":
-                    raise RuntimeError(f"Unsupported boxes2d encoding: {encoding}")
-                image_id = metadata["image_id"]
-                with torch.inference_mode(), torch.autocast(
-                    "cuda",
-                    dtype=torch.bfloat16,
-                ):
-                    predictor.set_image(frames[image_id])
-                    masks, _, _ = predictor.predict(box=boxes2d)
-                    masks = masks[0]
-                    ## Mask to 3 channel image
-                    node.send_output(
-                        "masks",
-                        pa.array(masks.ravel()),
-                        metadata={
-                            "image_id": image_id,
-                            "width": frames[image_id].width,
-                            "height": frames[image_id].height,
-                        },
-                    )
-        elif event_type == "ERROR":
-            print("Event Error:" + event["error"])
-if __name__ == "__main__":
-    main()