dgenerate-ultralytics-headless 8.3.253__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dgenerate_ultralytics_headless-8.3.253.dist-info/METADATA +405 -0
  2. dgenerate_ultralytics_headless-8.3.253.dist-info/RECORD +299 -0
  3. dgenerate_ultralytics_headless-8.3.253.dist-info/WHEEL +5 -0
  4. dgenerate_ultralytics_headless-8.3.253.dist-info/entry_points.txt +3 -0
  5. dgenerate_ultralytics_headless-8.3.253.dist-info/licenses/LICENSE +661 -0
  6. dgenerate_ultralytics_headless-8.3.253.dist-info/top_level.txt +1 -0
  7. tests/__init__.py +23 -0
  8. tests/conftest.py +59 -0
  9. tests/test_cli.py +131 -0
  10. tests/test_cuda.py +216 -0
  11. tests/test_engine.py +157 -0
  12. tests/test_exports.py +309 -0
  13. tests/test_integrations.py +151 -0
  14. tests/test_python.py +777 -0
  15. tests/test_solutions.py +371 -0
  16. ultralytics/__init__.py +48 -0
  17. ultralytics/assets/bus.jpg +0 -0
  18. ultralytics/assets/zidane.jpg +0 -0
  19. ultralytics/cfg/__init__.py +1028 -0
  20. ultralytics/cfg/datasets/Argoverse.yaml +78 -0
  21. ultralytics/cfg/datasets/DOTAv1.5.yaml +37 -0
  22. ultralytics/cfg/datasets/DOTAv1.yaml +36 -0
  23. ultralytics/cfg/datasets/GlobalWheat2020.yaml +68 -0
  24. ultralytics/cfg/datasets/HomeObjects-3K.yaml +32 -0
  25. ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
  26. ultralytics/cfg/datasets/Objects365.yaml +447 -0
  27. ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
  28. ultralytics/cfg/datasets/TT100K.yaml +346 -0
  29. ultralytics/cfg/datasets/VOC.yaml +102 -0
  30. ultralytics/cfg/datasets/VisDrone.yaml +87 -0
  31. ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
  32. ultralytics/cfg/datasets/brain-tumor.yaml +22 -0
  33. ultralytics/cfg/datasets/carparts-seg.yaml +44 -0
  34. ultralytics/cfg/datasets/coco-pose.yaml +64 -0
  35. ultralytics/cfg/datasets/coco.yaml +118 -0
  36. ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
  37. ultralytics/cfg/datasets/coco128.yaml +101 -0
  38. ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
  39. ultralytics/cfg/datasets/coco8-multispectral.yaml +104 -0
  40. ultralytics/cfg/datasets/coco8-pose.yaml +47 -0
  41. ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
  42. ultralytics/cfg/datasets/coco8.yaml +101 -0
  43. ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
  44. ultralytics/cfg/datasets/crack-seg.yaml +22 -0
  45. ultralytics/cfg/datasets/dog-pose.yaml +52 -0
  46. ultralytics/cfg/datasets/dota8-multispectral.yaml +38 -0
  47. ultralytics/cfg/datasets/dota8.yaml +35 -0
  48. ultralytics/cfg/datasets/hand-keypoints.yaml +50 -0
  49. ultralytics/cfg/datasets/kitti.yaml +27 -0
  50. ultralytics/cfg/datasets/lvis.yaml +1240 -0
  51. ultralytics/cfg/datasets/medical-pills.yaml +21 -0
  52. ultralytics/cfg/datasets/open-images-v7.yaml +663 -0
  53. ultralytics/cfg/datasets/package-seg.yaml +22 -0
  54. ultralytics/cfg/datasets/signature.yaml +21 -0
  55. ultralytics/cfg/datasets/tiger-pose.yaml +41 -0
  56. ultralytics/cfg/datasets/xView.yaml +155 -0
  57. ultralytics/cfg/default.yaml +130 -0
  58. ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +17 -0
  59. ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
  60. ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
  61. ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
  62. ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
  63. ultralytics/cfg/models/11/yolo11.yaml +50 -0
  64. ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
  65. ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
  66. ultralytics/cfg/models/12/yolo12-cls.yaml +32 -0
  67. ultralytics/cfg/models/12/yolo12-obb.yaml +48 -0
  68. ultralytics/cfg/models/12/yolo12-pose.yaml +49 -0
  69. ultralytics/cfg/models/12/yolo12-seg.yaml +48 -0
  70. ultralytics/cfg/models/12/yolo12.yaml +48 -0
  71. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +53 -0
  72. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +45 -0
  73. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +45 -0
  74. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +57 -0
  75. ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
  76. ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
  77. ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
  78. ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
  79. ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
  80. ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
  81. ultralytics/cfg/models/v3/yolov3-spp.yaml +49 -0
  82. ultralytics/cfg/models/v3/yolov3-tiny.yaml +40 -0
  83. ultralytics/cfg/models/v3/yolov3.yaml +49 -0
  84. ultralytics/cfg/models/v5/yolov5-p6.yaml +62 -0
  85. ultralytics/cfg/models/v5/yolov5.yaml +51 -0
  86. ultralytics/cfg/models/v6/yolov6.yaml +56 -0
  87. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +48 -0
  88. ultralytics/cfg/models/v8/yoloe-v8.yaml +48 -0
  89. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +28 -0
  90. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +28 -0
  91. ultralytics/cfg/models/v8/yolov8-cls.yaml +32 -0
  92. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +58 -0
  93. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +60 -0
  94. ultralytics/cfg/models/v8/yolov8-ghost.yaml +50 -0
  95. ultralytics/cfg/models/v8/yolov8-obb.yaml +49 -0
  96. ultralytics/cfg/models/v8/yolov8-p2.yaml +57 -0
  97. ultralytics/cfg/models/v8/yolov8-p6.yaml +59 -0
  98. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +60 -0
  99. ultralytics/cfg/models/v8/yolov8-pose.yaml +50 -0
  100. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +49 -0
  101. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +59 -0
  102. ultralytics/cfg/models/v8/yolov8-seg.yaml +49 -0
  103. ultralytics/cfg/models/v8/yolov8-world.yaml +51 -0
  104. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +49 -0
  105. ultralytics/cfg/models/v8/yolov8.yaml +49 -0
  106. ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
  107. ultralytics/cfg/models/v9/yolov9c.yaml +41 -0
  108. ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
  109. ultralytics/cfg/models/v9/yolov9e.yaml +64 -0
  110. ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
  111. ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
  112. ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
  113. ultralytics/cfg/trackers/botsort.yaml +21 -0
  114. ultralytics/cfg/trackers/bytetrack.yaml +12 -0
  115. ultralytics/data/__init__.py +26 -0
  116. ultralytics/data/annotator.py +66 -0
  117. ultralytics/data/augment.py +2801 -0
  118. ultralytics/data/base.py +435 -0
  119. ultralytics/data/build.py +437 -0
  120. ultralytics/data/converter.py +855 -0
  121. ultralytics/data/dataset.py +834 -0
  122. ultralytics/data/loaders.py +704 -0
  123. ultralytics/data/scripts/download_weights.sh +18 -0
  124. ultralytics/data/scripts/get_coco.sh +61 -0
  125. ultralytics/data/scripts/get_coco128.sh +18 -0
  126. ultralytics/data/scripts/get_imagenet.sh +52 -0
  127. ultralytics/data/split.py +138 -0
  128. ultralytics/data/split_dota.py +344 -0
  129. ultralytics/data/utils.py +798 -0
  130. ultralytics/engine/__init__.py +1 -0
  131. ultralytics/engine/exporter.py +1580 -0
  132. ultralytics/engine/model.py +1125 -0
  133. ultralytics/engine/predictor.py +508 -0
  134. ultralytics/engine/results.py +1522 -0
  135. ultralytics/engine/trainer.py +977 -0
  136. ultralytics/engine/tuner.py +449 -0
  137. ultralytics/engine/validator.py +387 -0
  138. ultralytics/hub/__init__.py +166 -0
  139. ultralytics/hub/auth.py +151 -0
  140. ultralytics/hub/google/__init__.py +174 -0
  141. ultralytics/hub/session.py +422 -0
  142. ultralytics/hub/utils.py +162 -0
  143. ultralytics/models/__init__.py +9 -0
  144. ultralytics/models/fastsam/__init__.py +7 -0
  145. ultralytics/models/fastsam/model.py +79 -0
  146. ultralytics/models/fastsam/predict.py +169 -0
  147. ultralytics/models/fastsam/utils.py +23 -0
  148. ultralytics/models/fastsam/val.py +38 -0
  149. ultralytics/models/nas/__init__.py +7 -0
  150. ultralytics/models/nas/model.py +98 -0
  151. ultralytics/models/nas/predict.py +56 -0
  152. ultralytics/models/nas/val.py +38 -0
  153. ultralytics/models/rtdetr/__init__.py +7 -0
  154. ultralytics/models/rtdetr/model.py +63 -0
  155. ultralytics/models/rtdetr/predict.py +88 -0
  156. ultralytics/models/rtdetr/train.py +89 -0
  157. ultralytics/models/rtdetr/val.py +216 -0
  158. ultralytics/models/sam/__init__.py +25 -0
  159. ultralytics/models/sam/amg.py +275 -0
  160. ultralytics/models/sam/build.py +365 -0
  161. ultralytics/models/sam/build_sam3.py +377 -0
  162. ultralytics/models/sam/model.py +169 -0
  163. ultralytics/models/sam/modules/__init__.py +1 -0
  164. ultralytics/models/sam/modules/blocks.py +1067 -0
  165. ultralytics/models/sam/modules/decoders.py +495 -0
  166. ultralytics/models/sam/modules/encoders.py +794 -0
  167. ultralytics/models/sam/modules/memory_attention.py +298 -0
  168. ultralytics/models/sam/modules/sam.py +1160 -0
  169. ultralytics/models/sam/modules/tiny_encoder.py +979 -0
  170. ultralytics/models/sam/modules/transformer.py +344 -0
  171. ultralytics/models/sam/modules/utils.py +512 -0
  172. ultralytics/models/sam/predict.py +3940 -0
  173. ultralytics/models/sam/sam3/__init__.py +3 -0
  174. ultralytics/models/sam/sam3/decoder.py +546 -0
  175. ultralytics/models/sam/sam3/encoder.py +529 -0
  176. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  177. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  178. ultralytics/models/sam/sam3/model_misc.py +199 -0
  179. ultralytics/models/sam/sam3/necks.py +129 -0
  180. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  181. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  182. ultralytics/models/sam/sam3/vitdet.py +547 -0
  183. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  184. ultralytics/models/utils/__init__.py +1 -0
  185. ultralytics/models/utils/loss.py +466 -0
  186. ultralytics/models/utils/ops.py +315 -0
  187. ultralytics/models/yolo/__init__.py +7 -0
  188. ultralytics/models/yolo/classify/__init__.py +7 -0
  189. ultralytics/models/yolo/classify/predict.py +90 -0
  190. ultralytics/models/yolo/classify/train.py +202 -0
  191. ultralytics/models/yolo/classify/val.py +216 -0
  192. ultralytics/models/yolo/detect/__init__.py +7 -0
  193. ultralytics/models/yolo/detect/predict.py +122 -0
  194. ultralytics/models/yolo/detect/train.py +227 -0
  195. ultralytics/models/yolo/detect/val.py +507 -0
  196. ultralytics/models/yolo/model.py +430 -0
  197. ultralytics/models/yolo/obb/__init__.py +7 -0
  198. ultralytics/models/yolo/obb/predict.py +56 -0
  199. ultralytics/models/yolo/obb/train.py +79 -0
  200. ultralytics/models/yolo/obb/val.py +302 -0
  201. ultralytics/models/yolo/pose/__init__.py +7 -0
  202. ultralytics/models/yolo/pose/predict.py +65 -0
  203. ultralytics/models/yolo/pose/train.py +110 -0
  204. ultralytics/models/yolo/pose/val.py +248 -0
  205. ultralytics/models/yolo/segment/__init__.py +7 -0
  206. ultralytics/models/yolo/segment/predict.py +109 -0
  207. ultralytics/models/yolo/segment/train.py +69 -0
  208. ultralytics/models/yolo/segment/val.py +307 -0
  209. ultralytics/models/yolo/world/__init__.py +5 -0
  210. ultralytics/models/yolo/world/train.py +173 -0
  211. ultralytics/models/yolo/world/train_world.py +178 -0
  212. ultralytics/models/yolo/yoloe/__init__.py +22 -0
  213. ultralytics/models/yolo/yoloe/predict.py +162 -0
  214. ultralytics/models/yolo/yoloe/train.py +287 -0
  215. ultralytics/models/yolo/yoloe/train_seg.py +122 -0
  216. ultralytics/models/yolo/yoloe/val.py +206 -0
  217. ultralytics/nn/__init__.py +27 -0
  218. ultralytics/nn/autobackend.py +964 -0
  219. ultralytics/nn/modules/__init__.py +182 -0
  220. ultralytics/nn/modules/activation.py +54 -0
  221. ultralytics/nn/modules/block.py +1947 -0
  222. ultralytics/nn/modules/conv.py +669 -0
  223. ultralytics/nn/modules/head.py +1183 -0
  224. ultralytics/nn/modules/transformer.py +793 -0
  225. ultralytics/nn/modules/utils.py +159 -0
  226. ultralytics/nn/tasks.py +1768 -0
  227. ultralytics/nn/text_model.py +356 -0
  228. ultralytics/py.typed +1 -0
  229. ultralytics/solutions/__init__.py +41 -0
  230. ultralytics/solutions/ai_gym.py +108 -0
  231. ultralytics/solutions/analytics.py +264 -0
  232. ultralytics/solutions/config.py +107 -0
  233. ultralytics/solutions/distance_calculation.py +123 -0
  234. ultralytics/solutions/heatmap.py +125 -0
  235. ultralytics/solutions/instance_segmentation.py +86 -0
  236. ultralytics/solutions/object_blurrer.py +89 -0
  237. ultralytics/solutions/object_counter.py +190 -0
  238. ultralytics/solutions/object_cropper.py +87 -0
  239. ultralytics/solutions/parking_management.py +280 -0
  240. ultralytics/solutions/queue_management.py +93 -0
  241. ultralytics/solutions/region_counter.py +133 -0
  242. ultralytics/solutions/security_alarm.py +151 -0
  243. ultralytics/solutions/similarity_search.py +219 -0
  244. ultralytics/solutions/solutions.py +828 -0
  245. ultralytics/solutions/speed_estimation.py +114 -0
  246. ultralytics/solutions/streamlit_inference.py +260 -0
  247. ultralytics/solutions/templates/similarity-search.html +156 -0
  248. ultralytics/solutions/trackzone.py +88 -0
  249. ultralytics/solutions/vision_eye.py +67 -0
  250. ultralytics/trackers/__init__.py +7 -0
  251. ultralytics/trackers/basetrack.py +115 -0
  252. ultralytics/trackers/bot_sort.py +257 -0
  253. ultralytics/trackers/byte_tracker.py +469 -0
  254. ultralytics/trackers/track.py +116 -0
  255. ultralytics/trackers/utils/__init__.py +1 -0
  256. ultralytics/trackers/utils/gmc.py +339 -0
  257. ultralytics/trackers/utils/kalman_filter.py +482 -0
  258. ultralytics/trackers/utils/matching.py +154 -0
  259. ultralytics/utils/__init__.py +1450 -0
  260. ultralytics/utils/autobatch.py +118 -0
  261. ultralytics/utils/autodevice.py +205 -0
  262. ultralytics/utils/benchmarks.py +728 -0
  263. ultralytics/utils/callbacks/__init__.py +5 -0
  264. ultralytics/utils/callbacks/base.py +233 -0
  265. ultralytics/utils/callbacks/clearml.py +146 -0
  266. ultralytics/utils/callbacks/comet.py +625 -0
  267. ultralytics/utils/callbacks/dvc.py +197 -0
  268. ultralytics/utils/callbacks/hub.py +110 -0
  269. ultralytics/utils/callbacks/mlflow.py +134 -0
  270. ultralytics/utils/callbacks/neptune.py +126 -0
  271. ultralytics/utils/callbacks/platform.py +453 -0
  272. ultralytics/utils/callbacks/raytune.py +42 -0
  273. ultralytics/utils/callbacks/tensorboard.py +123 -0
  274. ultralytics/utils/callbacks/wb.py +188 -0
  275. ultralytics/utils/checks.py +1020 -0
  276. ultralytics/utils/cpu.py +85 -0
  277. ultralytics/utils/dist.py +123 -0
  278. ultralytics/utils/downloads.py +529 -0
  279. ultralytics/utils/errors.py +35 -0
  280. ultralytics/utils/events.py +113 -0
  281. ultralytics/utils/export/__init__.py +7 -0
  282. ultralytics/utils/export/engine.py +237 -0
  283. ultralytics/utils/export/imx.py +325 -0
  284. ultralytics/utils/export/tensorflow.py +231 -0
  285. ultralytics/utils/files.py +219 -0
  286. ultralytics/utils/git.py +137 -0
  287. ultralytics/utils/instance.py +484 -0
  288. ultralytics/utils/logger.py +506 -0
  289. ultralytics/utils/loss.py +849 -0
  290. ultralytics/utils/metrics.py +1563 -0
  291. ultralytics/utils/nms.py +337 -0
  292. ultralytics/utils/ops.py +664 -0
  293. ultralytics/utils/patches.py +201 -0
  294. ultralytics/utils/plotting.py +1047 -0
  295. ultralytics/utils/tal.py +404 -0
  296. ultralytics/utils/torch_utils.py +984 -0
  297. ultralytics/utils/tqdm.py +443 -0
  298. ultralytics/utils/triton.py +112 -0
  299. ultralytics/utils/tuner.py +168 -0
@@ -0,0 +1,664 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import math
7
+ import re
8
+ import time
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import torch
13
+ import torch.nn.functional as F
14
+
15
+ from ultralytics.utils import NOT_MACOS14
16
+
17
+
18
+ class Profile(contextlib.ContextDecorator):
19
+ """Ultralytics Profile class for timing code execution.
20
+
21
+ Use as a decorator with @Profile() or as a context manager with 'with Profile():'. Provides accurate timing
22
+ measurements with CUDA synchronization support for GPU operations.
23
+
24
+ Attributes:
25
+ t (float): Accumulated time in seconds.
26
+ device (torch.device): Device used for model inference.
27
+ cuda (bool): Whether CUDA is being used for timing synchronization.
28
+
29
+ Examples:
30
+ Use as a context manager to time code execution
31
+ >>> with Profile(device=device) as dt:
32
+ ... pass # slow operation here
33
+ >>> print(dt) # prints "Elapsed time is 9.5367431640625e-07 s"
34
+
35
+ Use as a decorator to time function execution
36
+ >>> @Profile()
37
+ ... def slow_function():
38
+ ... time.sleep(0.1)
39
+ """
40
+
41
+ def __init__(self, t: float = 0.0, device: torch.device | None = None):
42
+ """Initialize the Profile class.
43
+
44
+ Args:
45
+ t (float): Initial accumulated time in seconds.
46
+ device (torch.device, optional): Device used for model inference to enable CUDA synchronization.
47
+ """
48
+ self.t = t
49
+ self.device = device
50
+ self.cuda = bool(device and str(device).startswith("cuda"))
51
+
52
+ def __enter__(self):
53
+ """Start timing."""
54
+ self.start = self.time()
55
+ return self
56
+
57
+ def __exit__(self, type, value, traceback):
58
+ """Stop timing."""
59
+ self.dt = self.time() - self.start # delta-time
60
+ self.t += self.dt # accumulate dt
61
+
62
+ def __str__(self):
63
+ """Return a human-readable string representing the accumulated elapsed time."""
64
+ return f"Elapsed time is {self.t} s"
65
+
66
+ def time(self):
67
+ """Get current time with CUDA synchronization if applicable."""
68
+ if self.cuda:
69
+ torch.cuda.synchronize(self.device)
70
+ return time.perf_counter()
71
+
72
+
73
+ def segment2box(segment, width: int = 640, height: int = 640):
74
+ """Convert segment coordinates to bounding box coordinates.
75
+
76
+ Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates. Applies
77
+ inside-image constraint and clips coordinates when necessary.
78
+
79
+ Args:
80
+ segment (torch.Tensor): Segment coordinates in format (N, 2) where N is number of points.
81
+ width (int): Width of the image in pixels.
82
+ height (int): Height of the image in pixels.
83
+
84
+ Returns:
85
+ (np.ndarray): Bounding box coordinates in xyxy format [x1, y1, x2, y2].
86
+ """
87
+ x, y = segment.T # segment xy
88
+ # Clip coordinates if 3 out of 4 sides are outside the image
89
+ if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3:
90
+ x = x.clip(0, width)
91
+ y = y.clip(0, height)
92
+ inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
93
+ x = x[inside]
94
+ y = y[inside]
95
+ return (
96
+ np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)
97
+ if any(x)
98
+ else np.zeros(4, dtype=segment.dtype)
99
+ ) # xyxy
100
+
101
+
102
+ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = True, xywh: bool = False):
103
+ """Rescale bounding boxes from one image shape to another.
104
+
105
+ Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes. Supports
106
+ both xyxy and xywh box formats.
107
+
108
+ Args:
109
+ img1_shape (tuple): Shape of the source image (height, width).
110
+ boxes (torch.Tensor): Bounding boxes to rescale in format (N, 4).
111
+ img0_shape (tuple): Shape of the target image (height, width).
112
+ ratio_pad (tuple, optional): Tuple of (ratio, pad) for scaling. If None, calculated from image shapes.
113
+ padding (bool): Whether boxes are based on YOLO-style augmented images with padding.
114
+ xywh (bool): Whether box format is xywh (True) or xyxy (False).
115
+
116
+ Returns:
117
+ (torch.Tensor): Rescaled bounding boxes in the same format as input.
118
+ """
119
+ if ratio_pad is None: # calculate from img0_shape
120
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
121
+ pad_x = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1)
122
+ pad_y = round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)
123
+ else:
124
+ gain = ratio_pad[0][0]
125
+ pad_x, pad_y = ratio_pad[1]
126
+
127
+ if padding:
128
+ boxes[..., 0] -= pad_x # x padding
129
+ boxes[..., 1] -= pad_y # y padding
130
+ if not xywh:
131
+ boxes[..., 2] -= pad_x # x padding
132
+ boxes[..., 3] -= pad_y # y padding
133
+ boxes[..., :4] /= gain
134
+ return boxes if xywh else clip_boxes(boxes, img0_shape)
135
+
136
+
137
+ def make_divisible(x: int, divisor):
138
+ """Return the nearest number that is divisible by the given divisor.
139
+
140
+ Args:
141
+ x (int): The number to make divisible.
142
+ divisor (int | torch.Tensor): The divisor.
143
+
144
+ Returns:
145
+ (int): The nearest number divisible by the divisor.
146
+ """
147
+ if isinstance(divisor, torch.Tensor):
148
+ divisor = int(divisor.max()) # to int
149
+ return math.ceil(x / divisor) * divisor
150
+
151
+
152
+ def clip_boxes(boxes, shape):
153
+ """Clip bounding boxes to image boundaries.
154
+
155
+ Args:
156
+ boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
157
+ shape (tuple): Image shape as HWC or HW (supports both).
158
+
159
+ Returns:
160
+ (torch.Tensor | np.ndarray): Clipped bounding boxes.
161
+ """
162
+ h, w = shape[:2] # supports both HWC or HW shapes
163
+ if isinstance(boxes, torch.Tensor): # faster individually
164
+ if NOT_MACOS14:
165
+ boxes[..., 0].clamp_(0, w) # x1
166
+ boxes[..., 1].clamp_(0, h) # y1
167
+ boxes[..., 2].clamp_(0, w) # x2
168
+ boxes[..., 3].clamp_(0, h) # y2
169
+ else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
170
+ boxes[..., 0] = boxes[..., 0].clamp(0, w)
171
+ boxes[..., 1] = boxes[..., 1].clamp(0, h)
172
+ boxes[..., 2] = boxes[..., 2].clamp(0, w)
173
+ boxes[..., 3] = boxes[..., 3].clamp(0, h)
174
+ else: # np.array (faster grouped)
175
+ boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, w) # x1, x2
176
+ boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, h) # y1, y2
177
+ return boxes
178
+
179
+
180
+ def clip_coords(coords, shape):
181
+ """Clip line coordinates to image boundaries.
182
+
183
+ Args:
184
+ coords (torch.Tensor | np.ndarray): Line coordinates to clip.
185
+ shape (tuple): Image shape as HWC or HW (supports both).
186
+
187
+ Returns:
188
+ (torch.Tensor | np.ndarray): Clipped coordinates.
189
+ """
190
+ h, w = shape[:2] # supports both HWC or HW shapes
191
+ if isinstance(coords, torch.Tensor):
192
+ if NOT_MACOS14:
193
+ coords[..., 0].clamp_(0, w) # x
194
+ coords[..., 1].clamp_(0, h) # y
195
+ else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
196
+ coords[..., 0] = coords[..., 0].clamp(0, w)
197
+ coords[..., 1] = coords[..., 1].clamp(0, h)
198
+ else: # np.array
199
+ coords[..., 0] = coords[..., 0].clip(0, w) # x
200
+ coords[..., 1] = coords[..., 1].clip(0, h) # y
201
+ return coords
202
+
203
+
204
+ def xyxy2xywh(x):
205
+ """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is
206
+ the top-left corner and (x2, y2) is the bottom-right corner.
207
+
208
+ Args:
209
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
210
+
211
+ Returns:
212
+ (np.ndarray | torch.Tensor): Bounding box coordinates in (x, y, width, height) format.
213
+ """
214
+ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
215
+ y = empty_like(x) # faster than clone/copy
216
+ x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
217
+ y[..., 0] = (x1 + x2) / 2 # x center
218
+ y[..., 1] = (y1 + y2) / 2 # y center
219
+ y[..., 2] = x2 - x1 # width
220
+ y[..., 3] = y2 - y1 # height
221
+ return y
222
+
223
+
224
+ def xywh2xyxy(x):
225
+ """Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is
226
+ the top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.
227
+
228
+ Args:
229
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x, y, width, height) format.
230
+
231
+ Returns:
232
+ (np.ndarray | torch.Tensor): Bounding box coordinates in (x1, y1, x2, y2) format.
233
+ """
234
+ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
235
+ y = empty_like(x) # faster than clone/copy
236
+ xy = x[..., :2] # centers
237
+ wh = x[..., 2:] / 2 # half width-height
238
+ y[..., :2] = xy - wh # top left xy
239
+ y[..., 2:] = xy + wh # bottom right xy
240
+ return y
241
+
242
+
243
+ def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
244
+ """Convert normalized bounding box coordinates to pixel coordinates.
245
+
246
+ Args:
247
+ x (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, w, h) format.
248
+ w (int): Image width in pixels.
249
+ h (int): Image height in pixels.
250
+ padw (int): Padding width in pixels.
251
+ padh (int): Padding height in pixels.
252
+
253
+ Returns:
254
+ y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is
255
+ the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
256
+ """
257
+ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
258
+ y = empty_like(x) # faster than clone/copy
259
+ xc, yc, xw, xh = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
260
+ half_w, half_h = xw / 2, xh / 2
261
+ y[..., 0] = w * (xc - half_w) + padw # top left x
262
+ y[..., 1] = h * (yc - half_h) + padh # top left y
263
+ y[..., 2] = w * (xc + half_w) + padw # bottom right x
264
+ y[..., 3] = h * (yc + half_h) + padh # bottom right y
265
+ return y
266
+
267
+
268
+ def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0):
269
+ """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
270
+ width and height are normalized to image dimensions.
271
+
272
+ Args:
273
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
274
+ w (int): Image width in pixels.
275
+ h (int): Image height in pixels.
276
+ clip (bool): Whether to clip boxes to image boundaries.
277
+ eps (float): Minimum value for box width and height.
278
+
279
+ Returns:
280
+ (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, width, height) format.
281
+ """
282
+ if clip:
283
+ x = clip_boxes(x, (h - eps, w - eps))
284
+ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
285
+ y = empty_like(x) # faster than clone/copy
286
+ x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
287
+ y[..., 0] = ((x1 + x2) / 2) / w # x center
288
+ y[..., 1] = ((y1 + y2) / 2) / h # y center
289
+ y[..., 2] = (x2 - x1) / w # width
290
+ y[..., 3] = (y2 - y1) / h # height
291
+ return y
292
+
293
+
294
+ def xywh2ltwh(x):
295
+ """Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.
296
+
297
+ Args:
298
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates in xywh format.
299
+
300
+ Returns:
301
+ (np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
302
+ """
303
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
304
+ y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
305
+ y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
306
+ return y
307
+
308
+
309
+ def xyxy2ltwh(x):
310
+ """Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.
311
+
312
+ Args:
313
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates in xyxy format.
314
+
315
+ Returns:
316
+ (np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
317
+ """
318
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
319
+ y[..., 2] = x[..., 2] - x[..., 0] # width
320
+ y[..., 3] = x[..., 3] - x[..., 1] # height
321
+ return y
322
+
323
+
324
+ def ltwh2xywh(x):
325
+ """Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
326
+
327
+ Args:
328
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates.
329
+
330
+ Returns:
331
+ (np.ndarray | torch.Tensor): Bounding box coordinates in xywh format.
332
+ """
333
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
334
+ y[..., 0] = x[..., 0] + x[..., 2] / 2 # center x
335
+ y[..., 1] = x[..., 1] + x[..., 3] / 2 # center y
336
+ return y
337
+
338
+
339
+ def xyxyxyxy2xywhr(x):
340
+ """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
341
+
342
+ Args:
343
+ x (np.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.
344
+
345
+ Returns:
346
+ (np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5). Rotation
347
+ values are in radians from 0 to pi/2.
348
+ """
349
+ is_torch = isinstance(x, torch.Tensor)
350
+ points = x.cpu().numpy() if is_torch else x
351
+ points = points.reshape(len(x), -1, 2)
352
+ rboxes = []
353
+ for pts in points:
354
+ # NOTE: Use cv2.minAreaRect to get accurate xywhr,
355
+ # especially some objects are cut off by augmentations in dataloader.
356
+ (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
357
+ rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
358
+ return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)
359
+
360
+
361
+ def xywhr2xyxyxyxy(x):
362
+ """Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
363
+
364
+ Args:
365
+ x (np.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5). Rotation
366
+ values should be in radians from 0 to pi/2.
367
+
368
+ Returns:
369
+ (np.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
370
+ """
371
+ cos, sin, cat, stack = (
372
+ (torch.cos, torch.sin, torch.cat, torch.stack)
373
+ if isinstance(x, torch.Tensor)
374
+ else (np.cos, np.sin, np.concatenate, np.stack)
375
+ )
376
+
377
+ ctr = x[..., :2]
378
+ w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
379
+ cos_value, sin_value = cos(angle), sin(angle)
380
+ vec1 = [w / 2 * cos_value, w / 2 * sin_value]
381
+ vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
382
+ vec1 = cat(vec1, -1)
383
+ vec2 = cat(vec2, -1)
384
+ pt1 = ctr + vec1 + vec2
385
+ pt2 = ctr + vec1 - vec2
386
+ pt3 = ctr - vec1 - vec2
387
+ pt4 = ctr - vec1 + vec2
388
+ return stack([pt1, pt2, pt3, pt4], -2)
389
+
390
+
391
+ def ltwh2xyxy(x):
392
+ """Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
393
+
394
+ Args:
395
+ x (np.ndarray | torch.Tensor): Input bounding box coordinates.
396
+
397
+ Returns:
398
+ (np.ndarray | torch.Tensor): Bounding box coordinates in xyxy format.
399
+ """
400
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
401
+ y[..., 2] = x[..., 2] + x[..., 0] # x2
402
+ y[..., 3] = x[..., 3] + x[..., 1] # y2
403
+ return y
404
+
405
+
406
+ def segments2boxes(segments):
407
+ """Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
408
+
409
+ Args:
410
+ segments (list): List of segments where each segment is a list of points, each point is [x, y] coordinates.
411
+
412
+ Returns:
413
+ (np.ndarray): Bounding box coordinates in xywh format.
414
+ """
415
+ boxes = []
416
+ for s in segments:
417
+ x, y = s.T # segment xy
418
+ boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
419
+ return xyxy2xywh(np.array(boxes)) # cls, xywh
420
+
421
+
422
+ def resample_segments(segments, n: int = 1000):
423
+ """Resample segments to n points each using linear interpolation.
424
+
425
+ Args:
426
+ segments (list): List of (N, 2) arrays where N is the number of points in each segment.
427
+ n (int): Number of points to resample each segment to.
428
+
429
+ Returns:
430
+ (list): Resampled segments with n points each.
431
+ """
432
+ for i, s in enumerate(segments):
433
+ if len(s) == n:
434
+ continue
435
+ s = np.concatenate((s, s[0:1, :]), axis=0)
436
+ x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
437
+ xp = np.arange(len(s))
438
+ x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
439
+ segments[i] = (
440
+ np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
441
+ ) # segment xy
442
+ return segments
443
+
444
+
445
+ def crop_mask(masks: torch.Tensor, boxes: torch.Tensor) -> torch.Tensor:
446
+ """Crop masks to bounding box regions.
447
+
448
+ Args:
449
+ masks (torch.Tensor): Masks with shape (N, H, W).
450
+ boxes (torch.Tensor): Bounding box coordinates with shape (N, 4) in relative point form.
451
+
452
+ Returns:
453
+ (torch.Tensor): Cropped masks.
454
+ """
455
+ if boxes.device != masks.device:
456
+ boxes = boxes.to(masks.device)
457
+ n, h, w = masks.shape
458
+ if n < 50 and not masks.is_cuda: # faster for fewer masks (predict)
459
+ for i, (x1, y1, x2, y2) in enumerate(boxes.round().int()):
460
+ masks[i, :y1] = 0
461
+ masks[i, y2:] = 0
462
+ masks[i, :, :x1] = 0
463
+ masks[i, :, x2:] = 0
464
+ return masks
465
+ else: # faster for more masks (val)
466
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
467
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
468
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
469
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
470
+
471
+
472
+ def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False):
473
+ """Apply masks to bounding boxes using mask head output.
474
+
475
+ Args:
476
+ protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
477
+ masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
478
+ bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
479
+ shape (tuple): Input image size as (height, width).
480
+ upsample (bool): Whether to upsample masks to original image size.
481
+
482
+ Returns:
483
+ (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
484
+ are the height and width of the input image. The mask is applied to the bounding boxes.
485
+ """
486
+ c, mh, mw = protos.shape # CHW
487
+ masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw) # NHW
488
+
489
+ width_ratio = mw / shape[1]
490
+ height_ratio = mh / shape[0]
491
+ ratios = torch.tensor([[width_ratio, height_ratio, width_ratio, height_ratio]], device=bboxes.device)
492
+
493
+ masks = crop_mask(masks, boxes=bboxes * ratios) # NHW
494
+ if upsample:
495
+ masks = F.interpolate(masks[None], shape, mode="bilinear")[0] # NHW
496
+ return masks.gt_(0.0).byte()
497
+
498
+
499
+ def process_mask_native(protos, masks_in, bboxes, shape):
500
+ """Apply masks to bounding boxes using mask head output with native upsampling.
501
+
502
+ Args:
503
+ protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
504
+ masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
505
+ bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
506
+ shape (tuple): Input image size as (height, width).
507
+
508
+ Returns:
509
+ (torch.Tensor): Binary mask tensor with shape (N, H, W).
510
+ """
511
+ c, mh, mw = protos.shape # CHW
512
+ masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
513
+ masks = scale_masks(masks[None], shape)[0] # NHW
514
+ masks = crop_mask(masks, bboxes) # NHW
515
+ return masks.gt_(0.0).byte()
516
+
517
+
518
+ def scale_masks(
519
+ masks: torch.Tensor,
520
+ shape: tuple[int, int],
521
+ ratio_pad: tuple[tuple[int, int], tuple[int, int]] | None = None,
522
+ padding: bool = True,
523
+ ) -> torch.Tensor:
524
+ """Rescale segment masks to target shape.
525
+
526
+ Args:
527
+ masks (torch.Tensor): Masks with shape (N, C, H, W).
528
+ shape (tuple[int, int]): Target height and width as (height, width).
529
+ ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
530
+ padding (bool): Whether masks are based on YOLO-style augmented images with padding.
531
+
532
+ Returns:
533
+ (torch.Tensor): Rescaled masks.
534
+ """
535
+ im1_h, im1_w = masks.shape[2:]
536
+ im0_h, im0_w = shape[:2]
537
+ if im1_h == im0_h and im1_w == im0_w:
538
+ return masks
539
+
540
+ if ratio_pad is None: # calculate from im0_shape
541
+ gain = min(im1_h / im0_h, im1_w / im0_w) # gain = old / new
542
+ pad_w, pad_h = (im1_w - im0_w * gain), (im1_h - im0_h * gain) # wh padding
543
+ if padding:
544
+ pad_w /= 2
545
+ pad_h /= 2
546
+ else:
547
+ pad_w, pad_h = ratio_pad[1]
548
+ top, left = (round(pad_h - 0.1), round(pad_w - 0.1)) if padding else (0, 0)
549
+ bottom = im1_h - round(pad_h + 0.1)
550
+ right = im1_w - round(pad_w + 0.1)
551
+ return F.interpolate(masks[..., top:bottom, left:right].float(), shape, mode="bilinear") # NCHW masks
552
+
553
+
554
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
555
+ """Rescale segment coordinates from img1_shape to img0_shape.
556
+
557
+ Args:
558
+ img1_shape (tuple): Source image shape as HWC or HW (supports both).
559
+ coords (torch.Tensor): Coordinates to scale with shape (N, 2).
560
+ img0_shape (tuple): Image 0 shape as HWC or HW (supports both).
561
+ ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
562
+ normalize (bool): Whether to normalize coordinates to range [0, 1].
563
+ padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.
564
+
565
+ Returns:
566
+ (torch.Tensor): Scaled coordinates.
567
+ """
568
+ img0_h, img0_w = img0_shape[:2] # supports both HWC or HW shapes
569
+ if ratio_pad is None: # calculate from img0_shape
570
+ img1_h, img1_w = img1_shape[:2] # supports both HWC or HW shapes
571
+ gain = min(img1_h / img0_h, img1_w / img0_w) # gain = old / new
572
+ pad = (img1_w - img0_w * gain) / 2, (img1_h - img0_h * gain) / 2 # wh padding
573
+ else:
574
+ gain = ratio_pad[0][0]
575
+ pad = ratio_pad[1]
576
+
577
+ if padding:
578
+ coords[..., 0] -= pad[0] # x padding
579
+ coords[..., 1] -= pad[1] # y padding
580
+ coords[..., 0] /= gain
581
+ coords[..., 1] /= gain
582
+ coords = clip_coords(coords, img0_shape)
583
+ if normalize:
584
+ coords[..., 0] /= img0_w # width
585
+ coords[..., 1] /= img0_h # height
586
+ return coords
587
+
588
+
589
+ def regularize_rboxes(rboxes):
590
+ """Regularize rotated bounding boxes to range [0, pi/2].
591
+
592
+ Args:
593
+ rboxes (torch.Tensor): Input rotated boxes with shape (N, 5) in xywhr format.
594
+
595
+ Returns:
596
+ (torch.Tensor): Regularized rotated boxes.
597
+ """
598
+ x, y, w, h, t = rboxes.unbind(dim=-1)
599
+ # Swap edge if t >= pi/2 while not being symmetrically opposite
600
+ swap = t % math.pi >= math.pi / 2
601
+ w_ = torch.where(swap, h, w)
602
+ h_ = torch.where(swap, w, h)
603
+ t = t % (math.pi / 2)
604
+ return torch.stack([x, y, w_, h_, t], dim=-1) # regularized boxes
605
+
606
+
607
+ def masks2segments(masks: np.ndarray | torch.Tensor, strategy: str = "all") -> list[np.ndarray]:
608
+ """Convert masks to segments using contour detection.
609
+
610
+ Args:
611
+ masks (np.ndarray | torch.Tensor): Binary masks with shape (batch_size, 160, 160).
612
+ strategy (str): Segmentation strategy, either 'all' or 'largest'.
613
+
614
+ Returns:
615
+ (list): List of segment masks as float32 arrays.
616
+ """
617
+ from ultralytics.data.converter import merge_multi_segment
618
+
619
+ masks = masks.astype("uint8") if isinstance(masks, np.ndarray) else masks.byte().cpu().numpy()
620
+ segments = []
621
+ for x in np.ascontiguousarray(masks):
622
+ c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
623
+ if c:
624
+ if strategy == "all": # merge and concatenate all segments
625
+ c = (
626
+ np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c]))
627
+ if len(c) > 1
628
+ else c[0].reshape(-1, 2)
629
+ )
630
+ elif strategy == "largest": # select largest segment
631
+ c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
632
+ else:
633
+ c = np.zeros((0, 2)) # no segments found
634
+ segments.append(c.astype("float32"))
635
+ return segments
636
+
637
+
638
+ def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
639
+ """Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.
640
+
641
+ Args:
642
+ batch (torch.Tensor): Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32.
643
+
644
+ Returns:
645
+ (np.ndarray): Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8.
646
+ """
647
+ return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).byte().cpu().numpy()
648
+
649
+
650
+ def clean_str(s):
651
+ """Clean a string by replacing special characters with '_' character.
652
+
653
+ Args:
654
+ s (str): A string needing special characters replaced.
655
+
656
+ Returns:
657
+ (str): A string with special characters replaced by an underscore _.
658
+ """
659
+ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨`><+]", repl="_", string=s)
660
+
661
+
662
+ def empty_like(x):
663
+ """Create empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
664
+ return torch.empty_like(x, dtype=x.dtype) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=x.dtype)