PyPI - dora-sam2 - Versions diffs - 0.4.1__tar.gz - Mend

dora-sam2 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

dora_sam2-0.4.1/PKG-INFO +53 -0
dora_sam2-0.4.1/README.md +40 -0
dora_sam2-0.4.1/dora_sam2/__init__.py +13 -0
dora_sam2-0.4.1/dora_sam2/__main__.py +6 -0
dora_sam2-0.4.1/dora_sam2/main.py +267 -0
dora_sam2-0.4.1/dora_sam2.egg-info/PKG-INFO +53 -0
dora_sam2-0.4.1/dora_sam2.egg-info/SOURCES.txt +12 -0
dora_sam2-0.4.1/dora_sam2.egg-info/dependency_links.txt +1 -0
dora_sam2-0.4.1/dora_sam2.egg-info/entry_points.txt +2 -0
dora_sam2-0.4.1/dora_sam2.egg-info/requires.txt +4 -0
dora_sam2-0.4.1/dora_sam2.egg-info/top_level.txt +1 -0
dora_sam2-0.4.1/pyproject.toml +36 -0
dora_sam2-0.4.1/setup.cfg +4 -0
dora_sam2-0.4.1/tests/test_dora_sam2.py +20 -0

dora_sam2-0.4.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,53 @@
+Metadata-Version: 2.4
+Name: dora-sam2
+Version: 0.4.1
+Summary: dora-sam2
+Author-email: Your Name <email@email.com>
+License: MIT
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: dora-rs>=0.3.9
+Requires-Dist: huggingface-hub>=0.29.0
+Requires-Dist: opencv-python>=4.11.0.86
+Requires-Dist: sam2>=1.1.0
+# dora-sam2
+> [!WARNING]
+> SAM2 requires Nvidia GPU to be able to run.
+## Getting started
+- Install it with pip:
+```bash
+pip install -e .
+```
+## Contribution Guide
+- Format with [ruff](https://docs.astral.sh/ruff/):
+```bash
+ruff check . --fix
+```
+- Lint with ruff:
+```bash
+ruff check .
+```
+- Test with [pytest](https://github.com/pytest-dev/pytest)
+```bash
+pytest . # Test
+```
+## YAML Specification
+## Examples
+## License
+dora-sam2's code are released under the MIT License

dora_sam2-0.4.1/README.md ADDED Viewed

@@ -0,0 +1,40 @@
+# dora-sam2
+> [!WARNING]
+> SAM2 requires Nvidia GPU to be able to run.
+## Getting started
+- Install it with pip:
+```bash
+pip install -e .
+```
+## Contribution Guide
+- Format with [ruff](https://docs.astral.sh/ruff/):
+```bash
+ruff check . --fix
+```
+- Lint with ruff:
+```bash
+ruff check .
+```
+- Test with [pytest](https://github.com/pytest-dev/pytest)
+```bash
+pytest . # Test
+```
+## YAML Specification
+## Examples
+## License
+dora-sam2's code are released under the MIT License

dora_sam2-0.4.1/dora_sam2/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""TODO: Add docstring."""
+import os
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+# Read the content of the README file
+try:
+    with open(readme_path, encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."

dora_sam2-0.4.1/dora_sam2/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""TODO: Add docstring."""
+from .main import main
+if __name__ == "__main__":
+    main()

dora_sam2-0.4.1/dora_sam2/main.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""TODO: Add docstring."""
+import cv2
+import numpy as np
+import pyarrow as pa
+import torch
+from dora import Node
+from PIL import Image
+from sam2.sam2_image_predictor import SAM2ImagePredictor
+predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-large")
+def main():
+    """TODO: Add docstring."""
+    pa.array([])  # initialize pyarrow array
+    node = Node()
+    frames = {}
+    last_pred = None
+    labels = None
+    return_type = pa.Array
+    image_id = None
+    for event in node:
+        event_type = event["type"]
+        if event_type == "INPUT":
+            event_id = event["id"]
+            if "image" in event_id:
+                storage = event["value"]
+                metadata = event["metadata"]
+                encoding = metadata["encoding"]
+                width = metadata["width"]
+                height = metadata["height"]
+                if (
+                    encoding == "bgr8"
+                    or encoding == "rgb8"
+                    or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]
+                ):
+                    channels = 3
+                    storage_type = np.uint8
+                else:
+                    error = f"Unsupported image encoding: {encoding}"
+                    raise RuntimeError(error)
+                if encoding == "bgr8":
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
+                elif encoding == "rgb8":
+                    frame = (
+                        storage.to_numpy()
+                        .astype(storage_type)
+                        .reshape((height, width, channels))
+                    )
+                elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
+                    storage = storage.to_numpy()
+                    frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
+                else:
+                    raise RuntimeError(f"Unsupported image encoding: {encoding}")
+                image = Image.fromarray(frame)
+                frames[event_id] = image
+                # TODO: Fix the tracking code for SAM2.
+                continue
+                if last_pred is not None:
+                    with (
+                        torch.inference_mode(),
+                        torch.autocast(
+                            "cuda",
+                            dtype=torch.bfloat16,
+                        ),
+                    ):
+                        predictor.set_image(frames[image_id])
+                        new_logits = []
+                        new_masks = []
+                        if len(last_pred.shape) < 3:
+                            last_pred = np.expand_dims(last_pred, 0)
+                        for mask in last_pred:
+                            mask = np.expand_dims(mask, 0)  # Make shape: 1x256x256
+                            masks, _, new_logit = predictor.predict(
+                                mask_input=mask,
+                                multimask_output=False,
+                            )
+                            if len(masks.shape) == 4:
+                                masks = masks[:, 0, :, :]
+                            else:
+                                masks = masks[0, :, :]
+                            masks = masks > 0
+                            new_masks.append(masks)
+                            new_logits.append(new_logit)
+                            ## Mask to 3 channel image
+                        last_pred = np.concatenate(new_logits, axis=0)
+                        masks = np.concatenate(new_masks, axis=0)
+                        match return_type:
+                            case pa.Array:
+                                node.send_output(
+                                    "masks",
+                                    pa.array(masks.ravel()),
+                                    metadata={
+                                        "image_id": image_id,
+                                        "width": frames[image_id].width,
+                                        "height": frames[image_id].height,
+                                    },
+                                )
+                            case pa.StructArray:
+                                node.send_output(
+                                    "masks",
+                                    pa.array(
+                                        [
+                                            {
+                                                "masks": masks.ravel(),
+                                                "labels": event["value"]["labels"],
+                                            },
+                                        ],
+                                    ),
+                                    metadata={
+                                        "image_id": image_id,
+                                        "width": frames[image_id].width,
+                                        "height": frames[image_id].height,
+                                    },
+                                )
+            if "boxes2d" in event_id:
+                if len(event["value"]) == 0:
+                    node.send_output("masks", pa.array([]), {"primitive": "masks"})
+                    continue
+                if isinstance(event["value"], pa.StructArray):
+                    boxes2d = event["value"][0].get("bbox").values.to_numpy()
+                    labels = (
+                        event["value"][0]
+                        .get("labels")
+                        .values.to_numpy(zero_copy_only=False)
+                    )
+                    return_type = pa.Array
+                else:
+                    boxes2d = event["value"].to_numpy()
+                    labels = None
+                    return_type = pa.Array
+                metadata = event["metadata"]
+                encoding = metadata["encoding"]
+                if encoding != "xyxy":
+                    raise RuntimeError(f"Unsupported boxes2d encoding: {encoding}")
+                boxes2d = boxes2d.reshape(-1, 4)
+                image_id = metadata["image_id"]
+                with (
+                    torch.inference_mode(),
+                    torch.autocast(
+                        "cuda",
+                        dtype=torch.bfloat16,
+                    ),
+                ):
+                    predictor.set_image(frames[image_id])
+                    masks, _scores, last_pred = predictor.predict(
+                        box=boxes2d,
+                        point_labels=labels,
+                        multimask_output=False,
+                    )
+                    if len(masks.shape) == 4:
+                        masks = masks[:, 0, :, :]
+                        last_pred = last_pred[:, 0, :, :]
+                    else:
+                        masks = masks[0, :, :]
+                        last_pred = last_pred[0, :, :]
+                    masks = masks > 0
+                    metadata["image_id"] = image_id
+                    metadata["width"] = frames[image_id].width
+                    metadata["height"] = frames[image_id].height
+                    ## Mask to 3 channel image
+                    match return_type:
+                        case pa.Array:
+                            metadata["primitive"] = "masks"
+                            node.send_output("masks", pa.array(masks.ravel()), metadata)
+                        case pa.StructArray:
+                            metadata["primitive"] = "masks"
+                            node.send_output(
+                                "masks",
+                                pa.array(
+                                    [
+                                        {
+                                            "masks": masks.ravel(),
+                                            "labels": event["value"]["labels"],
+                                        },
+                                    ],
+                                ),
+                                metadata,
+                            )
+            elif "points" in event_id:
+                points = event["value"].to_numpy().reshape((-1, 2))
+                return_type = pa.Array
+                if len(frames) == 0:
+                    continue
+                first_image = next(iter(frames.keys()))
+                image_id = event["metadata"].get("image_id", first_image)
+                with (
+                    torch.inference_mode(),
+                    torch.autocast(
+                        "cuda",
+                        dtype=torch.bfloat16,
+                    ),
+                ):
+                    predictor.set_image(frames[image_id])
+                    labels = [i for i in range(len(points))]
+                    masks, _scores, last_pred = predictor.predict(
+                        points,
+                        point_labels=labels,
+                        multimask_output=False,
+                    )
+                    if len(masks.shape) == 4:
+                        masks = masks[:, 0, :, :]
+                        last_pred = last_pred[:, 0, :, :]
+                    else:
+                        masks = masks[0, :, :]
+                        last_pred = last_pred[0, :, :]
+                    masks = masks > 0
+                    ## Mask to 3 channel image
+                    match return_type:
+                        case pa.Array:
+                            node.send_output(
+                                "masks",
+                                pa.array(masks.ravel()),
+                                metadata={
+                                    "image_id": image_id,
+                                    "width": frames[image_id].width,
+                                    "height": frames[image_id].height,
+                                },
+                            )
+                        case pa.StructArray:
+                            node.send_output(
+                                "masks",
+                                pa.array(
+                                    [
+                                        {
+                                            "masks": masks.ravel(),
+                                            "labels": event["value"]["labels"],
+                                        },
+                                    ],
+                                ),
+                                metadata={
+                                    "image_id": image_id,
+                                    "width": frames[image_id].width,
+                                    "height": frames[image_id].height,
+                                },
+                            )
+        elif event_type == "ERROR":
+            print("Event Error:" + event["error"])
+if __name__ == "__main__":
+    main()

dora_sam2-0.4.1/dora_sam2.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,53 @@
+Metadata-Version: 2.4
+Name: dora-sam2
+Version: 0.4.1
+Summary: dora-sam2
+Author-email: Your Name <email@email.com>
+License: MIT
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: dora-rs>=0.3.9
+Requires-Dist: huggingface-hub>=0.29.0
+Requires-Dist: opencv-python>=4.11.0.86
+Requires-Dist: sam2>=1.1.0
+# dora-sam2
+> [!WARNING]
+> SAM2 requires Nvidia GPU to be able to run.
+## Getting started
+- Install it with pip:
+```bash
+pip install -e .
+```
+## Contribution Guide
+- Format with [ruff](https://docs.astral.sh/ruff/):
+```bash
+ruff check . --fix
+```
+- Lint with ruff:
+```bash
+ruff check .
+```
+- Test with [pytest](https://github.com/pytest-dev/pytest)
+```bash
+pytest . # Test
+```
+## YAML Specification
+## Examples
+## License
+dora-sam2's code are released under the MIT License

dora_sam2-0.4.1/dora_sam2.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,12 @@
+README.md
+pyproject.toml
+dora_sam2/__init__.py
+dora_sam2/__main__.py
+dora_sam2/main.py
+dora_sam2.egg-info/PKG-INFO
+dora_sam2.egg-info/SOURCES.txt
+dora_sam2.egg-info/dependency_links.txt
+dora_sam2.egg-info/entry_points.txt
+dora_sam2.egg-info/requires.txt
+dora_sam2.egg-info/top_level.txt
+tests/test_dora_sam2.py

dora_sam2-0.4.1/dora_sam2.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

dora_sam2-0.4.1/dora_sam2.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ dora-sam2 = dora_sam2.main:main

dora_sam2-0.4.1/dora_sam2.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,4 @@
+dora-rs>=0.3.9
+huggingface-hub>=0.29.0
+opencv-python>=4.11.0.86
+sam2>=1.1.0

dora_sam2-0.4.1/dora_sam2.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ dora_sam2

dora_sam2-0.4.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,36 @@
+[project]
+name = "dora-sam2"
+version = "0.4.1"
+authors = [{ name = "Your Name", email = "email@email.com" }]
+description = "dora-sam2"
+license = { text = "MIT" }
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+  "dora-rs >= 0.3.9",
+  "huggingface-hub>=0.29.0",
+  "opencv-python>=4.11.0.86",
+  "sam2>=1.1.0",
+]
+[tool.uv]
+no-build-isolation-package = ['sam2']
+[dependency-groups]
+dev = ["pytest >=8.1.1", "ruff >=0.9.1"]
+[project.scripts]
+dora-sam2 = "dora_sam2.main:main"
+[tool.ruff.lint]
+extend-select = [
+  "D",    # pydocstyle
+  "UP",   # Ruff's UP rule
+  "PERF", # Ruff's PERF rule
+  "RET",  # Ruff's RET rule
+  "RSE",  # Ruff's RSE rule
+  "NPY",  # Ruff's NPY rule
+  "N",    # Ruff's N rule
+  "I",    # Ruff's I rule
+]

dora_sam2-0.4.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

dora_sam2-0.4.1/tests/test_dora_sam2.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""TODO: Add docstring."""
+import os
+import pytest
+CI = os.getenv("CI", "false") in ["True", "true"]
+def test_import_main():
+    """TODO: Add docstring."""
+    if CI:
+        # Skip test as test requires Nvidia GPU
+        return
+    from dora_sam2.main import main
+    # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
+    with pytest.raises(RuntimeError):
+        main()