dora-sam2 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.4
2
+ Name: dora-sam2
3
+ Version: 0.4.1
4
+ Summary: dora-sam2
5
+ Author-email: Your Name <email@email.com>
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: dora-rs>=0.3.9
10
+ Requires-Dist: huggingface-hub>=0.29.0
11
+ Requires-Dist: opencv-python>=4.11.0.86
12
+ Requires-Dist: sam2>=1.1.0
13
+
14
+ # dora-sam2
15
+
16
+ > [!WARNING]
17
+ > SAM2 requires Nvidia GPU to be able to run.
18
+
19
+ ## Getting started
20
+
21
+ - Install it with pip:
22
+
23
+ ```bash
24
+ pip install -e .
25
+ ```
26
+
27
+ ## Contribution Guide
28
+
29
+ - Format with [ruff](https://docs.astral.sh/ruff/):
30
+
31
+ ```bash
32
+ ruff check . --fix
33
+ ```
34
+
35
+ - Lint with ruff:
36
+
37
+ ```bash
38
+ ruff check .
39
+ ```
40
+
41
+ - Test with [pytest](https://github.com/pytest-dev/pytest)
42
+
43
+ ```bash
44
+ pytest . # Test
45
+ ```
46
+
47
+ ## YAML Specification
48
+
49
+ ## Examples
50
+
51
+ ## License
52
+
53
+ dora-sam2's code are released under the MIT License
@@ -0,0 +1,40 @@
1
+ # dora-sam2
2
+
3
+ > [!WARNING]
4
+ > SAM2 requires Nvidia GPU to be able to run.
5
+
6
+ ## Getting started
7
+
8
+ - Install it with pip:
9
+
10
+ ```bash
11
+ pip install -e .
12
+ ```
13
+
14
+ ## Contribution Guide
15
+
16
+ - Format with [ruff](https://docs.astral.sh/ruff/):
17
+
18
+ ```bash
19
+ ruff check . --fix
20
+ ```
21
+
22
+ - Lint with ruff:
23
+
24
+ ```bash
25
+ ruff check .
26
+ ```
27
+
28
+ - Test with [pytest](https://github.com/pytest-dev/pytest)
29
+
30
+ ```bash
31
+ pytest . # Test
32
+ ```
33
+
34
+ ## YAML Specification
35
+
36
+ ## Examples
37
+
38
+ ## License
39
+
40
+ dora-sam2's code are released under the MIT License
@@ -0,0 +1,13 @@
1
+ """TODO: Add docstring."""
2
+
3
+ import os
4
+
5
+ # Define the path to the README file relative to the package directory
6
+ readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
7
+
8
+ # Read the content of the README file
9
+ try:
10
+ with open(readme_path, encoding="utf-8") as f:
11
+ __doc__ = f.read()
12
+ except FileNotFoundError:
13
+ __doc__ = "README file not found."
@@ -0,0 +1,6 @@
1
+ """TODO: Add docstring."""
2
+
3
+ from .main import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,267 @@
1
+ """TODO: Add docstring."""
2
+
3
+ import cv2
4
+ import numpy as np
5
+ import pyarrow as pa
6
+ import torch
7
+ from dora import Node
8
+ from PIL import Image
9
+ from sam2.sam2_image_predictor import SAM2ImagePredictor
10
+
11
+ predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-large")
12
+
13
+
14
+ def main():
15
+ """TODO: Add docstring."""
16
+ pa.array([]) # initialize pyarrow array
17
+ node = Node()
18
+ frames = {}
19
+ last_pred = None
20
+ labels = None
21
+ return_type = pa.Array
22
+ image_id = None
23
+ for event in node:
24
+ event_type = event["type"]
25
+
26
+ if event_type == "INPUT":
27
+ event_id = event["id"]
28
+
29
+ if "image" in event_id:
30
+ storage = event["value"]
31
+ metadata = event["metadata"]
32
+ encoding = metadata["encoding"]
33
+ width = metadata["width"]
34
+ height = metadata["height"]
35
+
36
+ if (
37
+ encoding == "bgr8"
38
+ or encoding == "rgb8"
39
+ or encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]
40
+ ):
41
+ channels = 3
42
+ storage_type = np.uint8
43
+ else:
44
+ error = f"Unsupported image encoding: {encoding}"
45
+ raise RuntimeError(error)
46
+
47
+ if encoding == "bgr8":
48
+ frame = (
49
+ storage.to_numpy()
50
+ .astype(storage_type)
51
+ .reshape((height, width, channels))
52
+ )
53
+ frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
54
+ elif encoding == "rgb8":
55
+ frame = (
56
+ storage.to_numpy()
57
+ .astype(storage_type)
58
+ .reshape((height, width, channels))
59
+ )
60
+ elif encoding in ["jpeg", "jpg", "jpe", "bmp", "webp", "png"]:
61
+ storage = storage.to_numpy()
62
+ frame = cv2.imdecode(storage, cv2.IMREAD_COLOR)
63
+ frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
64
+ else:
65
+ raise RuntimeError(f"Unsupported image encoding: {encoding}")
66
+ image = Image.fromarray(frame)
67
+ frames[event_id] = image
68
+
69
+ # TODO: Fix the tracking code for SAM2.
70
+ continue
71
+ if last_pred is not None:
72
+ with (
73
+ torch.inference_mode(),
74
+ torch.autocast(
75
+ "cuda",
76
+ dtype=torch.bfloat16,
77
+ ),
78
+ ):
79
+ predictor.set_image(frames[image_id])
80
+
81
+ new_logits = []
82
+ new_masks = []
83
+
84
+ if len(last_pred.shape) < 3:
85
+ last_pred = np.expand_dims(last_pred, 0)
86
+
87
+ for mask in last_pred:
88
+ mask = np.expand_dims(mask, 0) # Make shape: 1x256x256
89
+ masks, _, new_logit = predictor.predict(
90
+ mask_input=mask,
91
+ multimask_output=False,
92
+ )
93
+ if len(masks.shape) == 4:
94
+ masks = masks[:, 0, :, :]
95
+ else:
96
+ masks = masks[0, :, :]
97
+
98
+ masks = masks > 0
99
+ new_masks.append(masks)
100
+ new_logits.append(new_logit)
101
+ ## Mask to 3 channel image
102
+
103
+ last_pred = np.concatenate(new_logits, axis=0)
104
+ masks = np.concatenate(new_masks, axis=0)
105
+
106
+ match return_type:
107
+ case pa.Array:
108
+ node.send_output(
109
+ "masks",
110
+ pa.array(masks.ravel()),
111
+ metadata={
112
+ "image_id": image_id,
113
+ "width": frames[image_id].width,
114
+ "height": frames[image_id].height,
115
+ },
116
+ )
117
+ case pa.StructArray:
118
+ node.send_output(
119
+ "masks",
120
+ pa.array(
121
+ [
122
+ {
123
+ "masks": masks.ravel(),
124
+ "labels": event["value"]["labels"],
125
+ },
126
+ ],
127
+ ),
128
+ metadata={
129
+ "image_id": image_id,
130
+ "width": frames[image_id].width,
131
+ "height": frames[image_id].height,
132
+ },
133
+ )
134
+
135
+ if "boxes2d" in event_id:
136
+ if len(event["value"]) == 0:
137
+ node.send_output("masks", pa.array([]), {"primitive": "masks"})
138
+ continue
139
+ if isinstance(event["value"], pa.StructArray):
140
+ boxes2d = event["value"][0].get("bbox").values.to_numpy()
141
+ labels = (
142
+ event["value"][0]
143
+ .get("labels")
144
+ .values.to_numpy(zero_copy_only=False)
145
+ )
146
+ return_type = pa.Array
147
+ else:
148
+ boxes2d = event["value"].to_numpy()
149
+ labels = None
150
+ return_type = pa.Array
151
+
152
+ metadata = event["metadata"]
153
+ encoding = metadata["encoding"]
154
+ if encoding != "xyxy":
155
+ raise RuntimeError(f"Unsupported boxes2d encoding: {encoding}")
156
+ boxes2d = boxes2d.reshape(-1, 4)
157
+ image_id = metadata["image_id"]
158
+ with (
159
+ torch.inference_mode(),
160
+ torch.autocast(
161
+ "cuda",
162
+ dtype=torch.bfloat16,
163
+ ),
164
+ ):
165
+ predictor.set_image(frames[image_id])
166
+ masks, _scores, last_pred = predictor.predict(
167
+ box=boxes2d,
168
+ point_labels=labels,
169
+ multimask_output=False,
170
+ )
171
+
172
+ if len(masks.shape) == 4:
173
+ masks = masks[:, 0, :, :]
174
+ last_pred = last_pred[:, 0, :, :]
175
+ else:
176
+ masks = masks[0, :, :]
177
+ last_pred = last_pred[0, :, :]
178
+
179
+ masks = masks > 0
180
+ metadata["image_id"] = image_id
181
+ metadata["width"] = frames[image_id].width
182
+ metadata["height"] = frames[image_id].height
183
+ ## Mask to 3 channel image
184
+ match return_type:
185
+ case pa.Array:
186
+ metadata["primitive"] = "masks"
187
+ node.send_output("masks", pa.array(masks.ravel()), metadata)
188
+ case pa.StructArray:
189
+ metadata["primitive"] = "masks"
190
+ node.send_output(
191
+ "masks",
192
+ pa.array(
193
+ [
194
+ {
195
+ "masks": masks.ravel(),
196
+ "labels": event["value"]["labels"],
197
+ },
198
+ ],
199
+ ),
200
+ metadata,
201
+ )
202
+ elif "points" in event_id:
203
+ points = event["value"].to_numpy().reshape((-1, 2))
204
+ return_type = pa.Array
205
+ if len(frames) == 0:
206
+ continue
207
+ first_image = next(iter(frames.keys()))
208
+ image_id = event["metadata"].get("image_id", first_image)
209
+ with (
210
+ torch.inference_mode(),
211
+ torch.autocast(
212
+ "cuda",
213
+ dtype=torch.bfloat16,
214
+ ),
215
+ ):
216
+ predictor.set_image(frames[image_id])
217
+ labels = [i for i in range(len(points))]
218
+ masks, _scores, last_pred = predictor.predict(
219
+ points,
220
+ point_labels=labels,
221
+ multimask_output=False,
222
+ )
223
+
224
+ if len(masks.shape) == 4:
225
+ masks = masks[:, 0, :, :]
226
+ last_pred = last_pred[:, 0, :, :]
227
+ else:
228
+ masks = masks[0, :, :]
229
+ last_pred = last_pred[0, :, :]
230
+
231
+ masks = masks > 0
232
+ ## Mask to 3 channel image
233
+ match return_type:
234
+ case pa.Array:
235
+ node.send_output(
236
+ "masks",
237
+ pa.array(masks.ravel()),
238
+ metadata={
239
+ "image_id": image_id,
240
+ "width": frames[image_id].width,
241
+ "height": frames[image_id].height,
242
+ },
243
+ )
244
+ case pa.StructArray:
245
+ node.send_output(
246
+ "masks",
247
+ pa.array(
248
+ [
249
+ {
250
+ "masks": masks.ravel(),
251
+ "labels": event["value"]["labels"],
252
+ },
253
+ ],
254
+ ),
255
+ metadata={
256
+ "image_id": image_id,
257
+ "width": frames[image_id].width,
258
+ "height": frames[image_id].height,
259
+ },
260
+ )
261
+
262
+ elif event_type == "ERROR":
263
+ print("Event Error:" + event["error"])
264
+
265
+
266
+ if __name__ == "__main__":
267
+ main()
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.4
2
+ Name: dora-sam2
3
+ Version: 0.4.1
4
+ Summary: dora-sam2
5
+ Author-email: Your Name <email@email.com>
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: dora-rs>=0.3.9
10
+ Requires-Dist: huggingface-hub>=0.29.0
11
+ Requires-Dist: opencv-python>=4.11.0.86
12
+ Requires-Dist: sam2>=1.1.0
13
+
14
+ # dora-sam2
15
+
16
+ > [!WARNING]
17
+ > SAM2 requires Nvidia GPU to be able to run.
18
+
19
+ ## Getting started
20
+
21
+ - Install it with pip:
22
+
23
+ ```bash
24
+ pip install -e .
25
+ ```
26
+
27
+ ## Contribution Guide
28
+
29
+ - Format with [ruff](https://docs.astral.sh/ruff/):
30
+
31
+ ```bash
32
+ ruff check . --fix
33
+ ```
34
+
35
+ - Lint with ruff:
36
+
37
+ ```bash
38
+ ruff check .
39
+ ```
40
+
41
+ - Test with [pytest](https://github.com/pytest-dev/pytest)
42
+
43
+ ```bash
44
+ pytest . # Test
45
+ ```
46
+
47
+ ## YAML Specification
48
+
49
+ ## Examples
50
+
51
+ ## License
52
+
53
+ dora-sam2's code are released under the MIT License
@@ -0,0 +1,12 @@
1
+ README.md
2
+ pyproject.toml
3
+ dora_sam2/__init__.py
4
+ dora_sam2/__main__.py
5
+ dora_sam2/main.py
6
+ dora_sam2.egg-info/PKG-INFO
7
+ dora_sam2.egg-info/SOURCES.txt
8
+ dora_sam2.egg-info/dependency_links.txt
9
+ dora_sam2.egg-info/entry_points.txt
10
+ dora_sam2.egg-info/requires.txt
11
+ dora_sam2.egg-info/top_level.txt
12
+ tests/test_dora_sam2.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dora-sam2 = dora_sam2.main:main
@@ -0,0 +1,4 @@
1
+ dora-rs>=0.3.9
2
+ huggingface-hub>=0.29.0
3
+ opencv-python>=4.11.0.86
4
+ sam2>=1.1.0
@@ -0,0 +1 @@
1
+ dora_sam2
@@ -0,0 +1,36 @@
1
+ [project]
2
+ name = "dora-sam2"
3
+ version = "0.4.1"
4
+ authors = [{ name = "Your Name", email = "email@email.com" }]
5
+ description = "dora-sam2"
6
+ license = { text = "MIT" }
7
+ readme = "README.md"
8
+ requires-python = ">=3.10"
9
+
10
+ dependencies = [
11
+ "dora-rs >= 0.3.9",
12
+ "huggingface-hub>=0.29.0",
13
+ "opencv-python>=4.11.0.86",
14
+ "sam2>=1.1.0",
15
+ ]
16
+
17
+ [tool.uv]
18
+ no-build-isolation-package = ['sam2']
19
+
20
+ [dependency-groups]
21
+ dev = ["pytest >=8.1.1", "ruff >=0.9.1"]
22
+
23
+ [project.scripts]
24
+ dora-sam2 = "dora_sam2.main:main"
25
+
26
+ [tool.ruff.lint]
27
+ extend-select = [
28
+ "D", # pydocstyle
29
+ "UP", # Ruff's UP rule
30
+ "PERF", # Ruff's PERF rule
31
+ "RET", # Ruff's RET rule
32
+ "RSE", # Ruff's RSE rule
33
+ "NPY", # Ruff's NPY rule
34
+ "N", # Ruff's N rule
35
+ "I", # Ruff's I rule
36
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,20 @@
1
+ """TODO: Add docstring."""
2
+
3
+ import os
4
+
5
+ import pytest
6
+
7
+ CI = os.getenv("CI", "false") in ["True", "true"]
8
+
9
+
10
+ def test_import_main():
11
+ """TODO: Add docstring."""
12
+ if CI:
13
+ # Skip test as test requires Nvidia GPU
14
+ return
15
+
16
+ from dora_sam2.main import main
17
+
18
+ # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
19
+ with pytest.raises(RuntimeError):
20
+ main()