dgenerate-ultralytics-headless 8.3.189__py3-none-any.whl → 8.3.190__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/RECORD +33 -32
  3. ultralytics/__init__.py +1 -1
  4. ultralytics/data/utils.py +2 -2
  5. ultralytics/engine/exporter.py +5 -2
  6. ultralytics/engine/predictor.py +1 -1
  7. ultralytics/engine/results.py +5 -5
  8. ultralytics/engine/trainer.py +2 -0
  9. ultralytics/engine/validator.py +3 -1
  10. ultralytics/hub/__init__.py +6 -2
  11. ultralytics/hub/auth.py +2 -2
  12. ultralytics/hub/google/__init__.py +2 -2
  13. ultralytics/hub/session.py +3 -5
  14. ultralytics/hub/utils.py +5 -5
  15. ultralytics/models/yolo/detect/predict.py +2 -2
  16. ultralytics/models/yolo/detect/val.py +2 -2
  17. ultralytics/models/yolo/obb/val.py +2 -1
  18. ultralytics/nn/autobackend.py +28 -38
  19. ultralytics/nn/modules/__init__.py +3 -3
  20. ultralytics/nn/modules/head.py +5 -1
  21. ultralytics/utils/__init__.py +34 -12
  22. ultralytics/utils/callbacks/platform.py +2 -1
  23. ultralytics/utils/checks.py +3 -3
  24. ultralytics/utils/downloads.py +2 -2
  25. ultralytics/utils/logger.py +7 -6
  26. ultralytics/utils/nms.py +346 -0
  27. ultralytics/utils/ops.py +80 -249
  28. ultralytics/utils/tal.py +1 -1
  29. ultralytics/utils/tqdm.py +34 -23
  30. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/WHEEL +0 -0
  31. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/entry_points.txt +0 -0
  32. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/licenses/LICENSE +0 -0
  33. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.190.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ import torch.nn as nn
10
10
  import torch.nn.functional as F
11
11
  from torch.nn.init import constant_, xavier_uniform_
12
12
 
13
+ from ultralytics.utils import NOT_MACOS14
13
14
  from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
14
15
  from ultralytics.utils.torch_utils import fuse_conv_and_bn, smart_inference_mode
15
16
 
@@ -408,7 +409,10 @@ class Pose(Detect):
408
409
  else:
409
410
  y = kpts.clone()
410
411
  if ndim == 3:
411
- y[:, 2::ndim] = y[:, 2::ndim].sigmoid() # sigmoid (WARNING: inplace .sigmoid_() Apple MPS bug)
412
+ if NOT_MACOS14:
413
+ y[:, 2::ndim].sigmoid_()
414
+ else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
415
+ y[:, 2::ndim] = y[:, 2::ndim].sigmoid()
412
416
  y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
413
417
  y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
414
418
  return y
@@ -8,6 +8,7 @@ import logging
8
8
  import os
9
9
  import platform
10
10
  import re
11
+ import socket
11
12
  import subprocess
12
13
  import sys
13
14
  import threading
@@ -44,6 +45,7 @@ VERBOSE = str(os.getenv("YOLO_VERBOSE", True)).lower() == "true" # global verbo
44
45
  LOGGING_NAME = "ultralytics"
45
46
  MACOS, LINUX, WINDOWS = (platform.system() == x for x in ["Darwin", "Linux", "Windows"]) # environment booleans
46
47
  MACOS_VERSION = platform.mac_ver()[0] if MACOS else None
48
+ NOT_MACOS14 = not (MACOS and MACOS_VERSION.startswith("14."))
47
49
  ARM64 = platform.machine() in {"arm64", "aarch64"} # ARM64 booleans
48
50
  PYTHON_VERSION = platform.python_version()
49
51
  TORCH_VERSION = torch.__version__
@@ -752,20 +754,21 @@ def is_jetson(jetpack=None) -> bool:
752
754
 
753
755
  def is_online() -> bool:
754
756
  """
755
- Check internet connectivity by attempting to connect to a known online host.
757
+ Fast online check using DNS (v4/v6) resolution (Cloudflare + Google).
756
758
 
757
759
  Returns:
758
760
  (bool): True if connection is successful, False otherwise.
759
761
  """
760
- try:
761
- assert str(os.getenv("YOLO_OFFLINE", "")).lower() != "true" # check if ENV var YOLO_OFFLINE="True"
762
- import socket
762
+ if str(os.getenv("YOLO_OFFLINE", "")).lower() == "true":
763
+ return False
763
764
 
764
- for dns in ("1.1.1.1", "8.8.8.8"): # check Cloudflare and Google DNS
765
- socket.create_connection(address=(dns, 80), timeout=2.0).close()
765
+ for host in ("one.one.one.one", "dns.google"):
766
+ try:
767
+ socket.getaddrinfo(host, 0, socket.AF_UNSPEC, 0, 0, socket.AI_ADDRCONFIG)
766
768
  return True
767
- except Exception:
768
- return False
769
+ except OSError:
770
+ continue
771
+ return False
769
772
 
770
773
 
771
774
  def is_pip_package(filepath: str = __name__) -> bool:
@@ -842,6 +845,7 @@ def is_git_dir():
842
845
  return GIT_DIR is not None
843
846
 
844
847
 
848
+ @lru_cache(maxsize=1)
845
849
  def get_git_origin_url():
846
850
  """
847
851
  Retrieve the origin URL of a git repository.
@@ -851,12 +855,14 @@ def get_git_origin_url():
851
855
  """
852
856
  if IS_GIT_DIR:
853
857
  try:
854
- origin = subprocess.check_output(["git", "config", "--get", "remote.origin.url"])
855
- return origin.decode().strip()
858
+ return subprocess.check_output(
859
+ ["git", "config", "--get", "remote.origin.url"], stderr=subprocess.DEVNULL, text=True
860
+ ).strip()
856
861
  except subprocess.CalledProcessError:
857
862
  return None
858
863
 
859
864
 
865
+ @lru_cache(maxsize=1)
860
866
  def get_git_branch():
861
867
  """
862
868
  Return the current git branch name. If not in a git repository, return None.
@@ -866,8 +872,24 @@ def get_git_branch():
866
872
  """
867
873
  if IS_GIT_DIR:
868
874
  try:
869
- origin = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"])
870
- return origin.decode().strip()
875
+ return subprocess.check_output(
876
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL, text=True
877
+ ).strip()
878
+ except subprocess.CalledProcessError:
879
+ return None
880
+
881
+
882
+ @lru_cache(maxsize=1)
883
+ def get_git_commit():
884
+ """
885
+ Return the current git commit hash. If not in a git repository, return None.
886
+
887
+ Returns:
888
+ (str | None): The current git commit hash or None if not a git directory.
889
+ """
890
+ if IS_GIT_DIR:
891
+ try:
892
+ return subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL, text=True).strip()
871
893
  except subprocess.CalledProcessError:
872
894
  return None
873
895
 
@@ -1,12 +1,13 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
3
  from ultralytics.utils import RANK, SETTINGS
4
- from ultralytics.utils.logger import DEFAULT_LOG_PATH, ConsoleLogger, SystemLogger
5
4
 
6
5
 
7
6
  def on_pretrain_routine_start(trainer):
8
7
  """Initialize and start console logging immediately at the very beginning."""
9
8
  if RANK in {-1, 0}:
9
+ from ultralytics.utils.logger import DEFAULT_LOG_PATH, ConsoleLogger, SystemLogger
10
+
10
11
  trainer.system_logger = SystemLogger()
11
12
  trainer.console_logger = ConsoleLogger(DEFAULT_LOG_PATH)
12
13
  trainer.console_logger.start_capture()
@@ -274,7 +274,7 @@ def check_latest_pypi_version(package_name="ultralytics"):
274
274
  Returns:
275
275
  (str): The latest version of the package.
276
276
  """
277
- import requests # slow import
277
+ import requests # scoped as slow import
278
278
 
279
279
  try:
280
280
  requests.packages.urllib3.disable_warnings() # Disable the InsecureRequestWarning
@@ -637,7 +637,7 @@ def check_yolo(verbose=True, device=""):
637
637
  verbose (bool): Whether to print verbose information.
638
638
  device (str | torch.device): Device to use for YOLO.
639
639
  """
640
- import psutil
640
+ import psutil # scoped as slow import
641
641
 
642
642
  from ultralytics.utils.torch_utils import select_device
643
643
 
@@ -670,7 +670,7 @@ def collect_system_info():
670
670
  Returns:
671
671
  (dict): Dictionary containing system information.
672
672
  """
673
- import psutil
673
+ import psutil # scoped as slow import
674
674
 
675
675
  from ultralytics.utils import ENVIRONMENT # scope to avoid circular import
676
676
  from ultralytics.utils.torch_utils import get_cpu_info, get_gpu_info
@@ -252,7 +252,7 @@ def get_google_drive_file_info(link: str) -> tuple[str, str | None]:
252
252
  >>> link = "https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link"
253
253
  >>> url, filename = get_google_drive_file_info(link)
254
254
  """
255
- import requests # slow import
255
+ import requests # scoped as slow import
256
256
 
257
257
  file_id = link.split("/d/")[1].split("/view", 1)[0]
258
258
  drive_url = f"https://drive.google.com/uc?export=download&id={file_id}"
@@ -416,7 +416,7 @@ def get_github_assets(
416
416
  Examples:
417
417
  >>> tag, assets = get_github_assets(repo="ultralytics/assets", version="latest")
418
418
  """
419
- import requests # slow import
419
+ import requests # scoped as slow import
420
420
 
421
421
  if version != "latest":
422
422
  version = f"tags/{version}" # i.e. tags/v6.2
@@ -9,9 +9,6 @@ import time
9
9
  from datetime import datetime
10
10
  from pathlib import Path
11
11
 
12
- import psutil
13
- import requests
14
-
15
12
  from ultralytics.utils import MACOS, RANK
16
13
  from ultralytics.utils.checks import check_requirements
17
14
 
@@ -189,8 +186,10 @@ class ConsoleLogger:
189
186
  """Write log to API endpoint or local file destination."""
190
187
  try:
191
188
  if self.is_api:
189
+ import requests # scoped as slow import
190
+
192
191
  payload = {"timestamp": datetime.now().isoformat(), "message": text.strip()}
193
- requests.post(self.destination, json=payload, timeout=5)
192
+ requests.post(str(self.destination), json=payload, timeout=5)
194
193
  else:
195
194
  self.destination.parent.mkdir(parents=True, exist_ok=True)
196
195
  with self.destination.open("a", encoding="utf-8") as f:
@@ -237,7 +236,6 @@ class SystemLogger:
237
236
  Attributes:
238
237
  pynvml: NVIDIA pynvml module instance if successfully imported, None otherwise.
239
238
  nvidia_initialized (bool): Whether NVIDIA GPU monitoring is available and initialized.
240
- process (psutil.Process): Current psutil.Process instance for process-specific metrics.
241
239
  net_start: Initial network I/O counters for calculating cumulative usage.
242
240
  disk_start: Initial disk I/O counters for calculating cumulative usage.
243
241
 
@@ -260,9 +258,10 @@ class SystemLogger:
260
258
 
261
259
  def __init__(self):
262
260
  """Initialize the system logger."""
261
+ import psutil # scoped as slow import
262
+
263
263
  self.pynvml = None
264
264
  self.nvidia_initialized = self._init_nvidia()
265
- self.process = psutil.Process()
266
265
  self.net_start = psutil.net_io_counters()
267
266
  self.disk_start = psutil.disk_io_counters()
268
267
 
@@ -315,6 +314,8 @@ class SystemLogger:
315
314
  Returns:
316
315
  metrics (dict): System metrics containing 'cpu', 'ram', 'disk', 'network', 'gpus' with respective usage data.
317
316
  """
317
+ import psutil # scoped as slow import
318
+
318
319
  net = psutil.net_io_counters()
319
320
  disk = psutil.disk_io_counters()
320
321
  memory = psutil.virtual_memory()
@@ -0,0 +1,346 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ import sys
4
+ import time
5
+
6
+ import torch
7
+
8
+ from ultralytics.utils import LOGGER
9
+ from ultralytics.utils.metrics import batch_probiou, box_iou
10
+ from ultralytics.utils.ops import xywh2xyxy
11
+
12
+
13
+ def non_max_suppression(
14
+ prediction,
15
+ conf_thres: float = 0.25,
16
+ iou_thres: float = 0.45,
17
+ classes=None,
18
+ agnostic: bool = False,
19
+ multi_label: bool = False,
20
+ labels=(),
21
+ max_det: int = 300,
22
+ nc: int = 0, # number of classes (optional)
23
+ max_time_img: float = 0.05,
24
+ max_nms: int = 30000,
25
+ max_wh: int = 7680,
26
+ rotated: bool = False,
27
+ end2end: bool = False,
28
+ return_idxs: bool = False,
29
+ ):
30
+ """
31
+ Perform non-maximum suppression (NMS) on prediction results.
32
+
33
+ Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple
34
+ detection formats including standard boxes, rotated boxes, and masks.
35
+
36
+ Args:
37
+ prediction (torch.Tensor): Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes)
38
+ containing boxes, classes, and optional masks.
39
+ conf_thres (float): Confidence threshold for filtering detections. Valid values are between 0.0 and 1.0.
40
+ iou_thres (float): IoU threshold for NMS filtering. Valid values are between 0.0 and 1.0.
41
+ classes (List[int], optional): List of class indices to consider. If None, all classes are considered.
42
+ agnostic (bool): Whether to perform class-agnostic NMS.
43
+ multi_label (bool): Whether each box can have multiple labels.
44
+ labels (List[List[Union[int, float, torch.Tensor]]]): A priori labels for each image.
45
+ max_det (int): Maximum number of detections to keep per image.
46
+ nc (int): Number of classes. Indices after this are considered masks.
47
+ max_time_img (float): Maximum time in seconds for processing one image.
48
+ max_nms (int): Maximum number of boxes for NMS.
49
+ max_wh (int): Maximum box width and height in pixels.
50
+ rotated (bool): Whether to handle Oriented Bounding Boxes (OBB).
51
+ end2end (bool): Whether the model is end-to-end and doesn't require NMS.
52
+ return_idxs (bool): Whether to return the indices of kept detections.
53
+
54
+ Returns:
55
+ output (List[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks)
56
+ containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
57
+ keepi (List[torch.Tensor]): Indices of kept detections if return_idxs=True.
58
+ """
59
+ # Checks
60
+ assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
61
+ assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
62
+ if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
63
+ prediction = prediction[0] # select only inference output
64
+ if classes is not None:
65
+ classes = torch.tensor(classes, device=prediction.device)
66
+
67
+ if prediction.shape[-1] == 6 or end2end: # end-to-end model (BNC, i.e. 1,300,6)
68
+ output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
69
+ if classes is not None:
70
+ output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
71
+ return output
72
+
73
+ bs = prediction.shape[0] # batch size (BCN, i.e. 1,84,6300)
74
+ nc = nc or (prediction.shape[1] - 4) # number of classes
75
+ extra = prediction.shape[1] - nc - 4 # number of extra info
76
+ mi = 4 + nc # mask start index
77
+ xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
78
+ xinds = torch.arange(prediction.shape[-1], device=prediction.device).expand(bs, -1)[..., None] # to track idxs
79
+
80
+ # Settings
81
+ # min_wh = 2 # (pixels) minimum box width and height
82
+ time_limit = 2.0 + max_time_img * bs # seconds to quit after
83
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
84
+
85
+ prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
86
+ if not rotated:
87
+ prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
88
+
89
+ t = time.time()
90
+ output = [torch.zeros((0, 6 + extra), device=prediction.device)] * bs
91
+ keepi = [torch.zeros((0, 1), device=prediction.device)] * bs # to store the kept idxs
92
+ for xi, (x, xk) in enumerate(zip(prediction, xinds)): # image index, (preds, preds indices)
93
+ # Apply constraints
94
+ # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
95
+ filt = xc[xi] # confidence
96
+ x = x[filt]
97
+ if return_idxs:
98
+ xk = xk[filt]
99
+
100
+ # Cat apriori labels if autolabelling
101
+ if labels and len(labels[xi]) and not rotated:
102
+ lb = labels[xi]
103
+ v = torch.zeros((len(lb), nc + extra + 4), device=x.device)
104
+ v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
105
+ v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
106
+ x = torch.cat((x, v), 0)
107
+
108
+ # If none remain process next image
109
+ if not x.shape[0]:
110
+ continue
111
+
112
+ # Detections matrix nx6 (xyxy, conf, cls)
113
+ box, cls, mask = x.split((4, nc, extra), 1)
114
+
115
+ if multi_label:
116
+ i, j = torch.where(cls > conf_thres)
117
+ x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
118
+ if return_idxs:
119
+ xk = xk[i]
120
+ else: # best class only
121
+ conf, j = cls.max(1, keepdim=True)
122
+ filt = conf.view(-1) > conf_thres
123
+ x = torch.cat((box, conf, j.float(), mask), 1)[filt]
124
+ if return_idxs:
125
+ xk = xk[filt]
126
+
127
+ # Filter by class
128
+ if classes is not None:
129
+ filt = (x[:, 5:6] == classes).any(1)
130
+ x = x[filt]
131
+ if return_idxs:
132
+ xk = xk[filt]
133
+
134
+ # Check shape
135
+ n = x.shape[0] # number of boxes
136
+ if not n: # no boxes
137
+ continue
138
+ if n > max_nms: # excess boxes
139
+ filt = x[:, 4].argsort(descending=True)[:max_nms] # sort by confidence and remove excess boxes
140
+ x = x[filt]
141
+ if return_idxs:
142
+ xk = xk[filt]
143
+
144
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
145
+ scores = x[:, 4] # scores
146
+ if rotated:
147
+ boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1) # xywhr
148
+ i = TorchNMS.fast_nms(boxes, scores, iou_thres, iou_func=batch_probiou)
149
+ else:
150
+ boxes = x[:, :4] + c # boxes (offset by class)
151
+ # Speed strategy: torchvision for val or already loaded (faster), TorchNMS for predict (lower latency)
152
+ if "torchvision" in sys.modules:
153
+ import torchvision # scope as slow import
154
+
155
+ i = torchvision.ops.nms(boxes, scores, iou_thres)
156
+ else:
157
+ i = TorchNMS.nms(boxes, scores, iou_thres)
158
+ i = i[:max_det] # limit detections
159
+
160
+ output[xi] = x[i]
161
+ if return_idxs:
162
+ keepi[xi] = xk[i].view(-1)
163
+ if (time.time() - t) > time_limit:
164
+ LOGGER.warning(f"NMS time limit {time_limit:.3f}s exceeded")
165
+ break # time limit exceeded
166
+
167
+ return (output, keepi) if return_idxs else output
168
+
169
+
170
+ class TorchNMS:
171
+ """
172
+ Ultralytics custom NMS implementation optimized for YOLO.
173
+
174
+ This class provides static methods for performing non-maximum suppression (NMS) operations on bounding boxes,
175
+ including both standard NMS and batched NMS for multi-class scenarios.
176
+
177
+ Methods:
178
+ nms: Optimized NMS with early termination that matches torchvision behavior exactly.
179
+ batched_nms: Batched NMS for class-aware suppression.
180
+
181
+ Examples:
182
+ Perform standard NMS on boxes and scores
183
+ >>> boxes = torch.tensor([[0, 0, 10, 10], [5, 5, 15, 15]])
184
+ >>> scores = torch.tensor([0.9, 0.8])
185
+ >>> keep = TorchNMS.nms(boxes, scores, 0.5)
186
+ """
187
+
188
+ @staticmethod
189
+ def fast_nms(
190
+ boxes: torch.Tensor,
191
+ scores: torch.Tensor,
192
+ iou_threshold: float,
193
+ use_triu: bool = True,
194
+ iou_func=box_iou,
195
+ ) -> torch.Tensor:
196
+ """
197
+ Fast-NMS implementation from https://arxiv.org/pdf/1904.02689 using upper triangular matrix operations.
198
+
199
+ Args:
200
+ boxes (torch.Tensor): Bounding boxes with shape (N, 4) in xyxy format.
201
+ scores (torch.Tensor): Confidence scores with shape (N,).
202
+ iou_threshold (float): IoU threshold for suppression.
203
+ use_triu (bool): Whether to use torch.triu operator for upper triangular matrix operations.
204
+ iou_func (callable): Function to compute IoU between boxes.
205
+
206
+ Returns:
207
+ (torch.Tensor): Indices of boxes to keep after NMS.
208
+
209
+ Examples:
210
+ Apply NMS to a set of boxes
211
+ >>> boxes = torch.tensor([[0, 0, 10, 10], [5, 5, 15, 15]])
212
+ >>> scores = torch.tensor([0.9, 0.8])
213
+ >>> keep = TorchNMS.nms(boxes, scores, 0.5)
214
+ """
215
+ if boxes.numel() == 0:
216
+ return torch.empty((0,), dtype=torch.int64, device=boxes.device)
217
+
218
+ sorted_idx = torch.argsort(scores, descending=True)
219
+ boxes = boxes[sorted_idx]
220
+ ious = iou_func(boxes, boxes)
221
+ if use_triu:
222
+ ious = ious.triu_(diagonal=1)
223
+ # NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
224
+ pick = torch.nonzero((ious >= iou_threshold).sum(0) <= 0).squeeze_(-1)
225
+ else:
226
+ n = boxes.shape[0]
227
+ row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n)
228
+ col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1)
229
+ upper_mask = row_idx < col_idx
230
+ ious = ious * upper_mask
231
+ # Zeroing these scores ensures the additional indices would not affect the final results
232
+ scores[~((ious >= iou_threshold).sum(0) <= 0)] = 0
233
+ # NOTE: return indices with fixed length to avoid TFLite reshape error
234
+ pick = torch.topk(scores, scores.shape[0]).indices
235
+ return sorted_idx[pick]
236
+
237
+ @staticmethod
238
+ def nms(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float) -> torch.Tensor:
239
+ """
240
+ Optimized NMS with early termination that matches torchvision behavior exactly.
241
+
242
+ Args:
243
+ boxes (torch.Tensor): Bounding boxes with shape (N, 4) in xyxy format.
244
+ scores (torch.Tensor): Confidence scores with shape (N,).
245
+ iou_threshold (float): IoU threshold for suppression.
246
+
247
+ Returns:
248
+ (torch.Tensor): Indices of boxes to keep after NMS.
249
+
250
+ Examples:
251
+ Apply NMS to a set of boxes
252
+ >>> boxes = torch.tensor([[0, 0, 10, 10], [5, 5, 15, 15]])
253
+ >>> scores = torch.tensor([0.9, 0.8])
254
+ >>> keep = TorchNMS.nms(boxes, scores, 0.5)
255
+ """
256
+ if boxes.numel() == 0:
257
+ return torch.empty((0,), dtype=torch.int64, device=boxes.device)
258
+
259
+ # Pre-allocate and extract coordinates once
260
+ x1, y1, x2, y2 = boxes.unbind(1)
261
+ areas = (x2 - x1) * (y2 - y1)
262
+
263
+ # Sort by scores descending
264
+ _, order = scores.sort(0, descending=True)
265
+
266
+ # Pre-allocate keep list with maximum possible size
267
+ keep = torch.zeros(order.numel(), dtype=torch.int64, device=boxes.device)
268
+ keep_idx = 0
269
+
270
+ while order.numel() > 0:
271
+ i = order[0]
272
+ keep[keep_idx] = i
273
+ keep_idx += 1
274
+
275
+ if order.numel() == 1:
276
+ break
277
+
278
+ # Vectorized IoU calculation for remaining boxes
279
+ rest = order[1:]
280
+ xx1 = torch.maximum(x1[i], x1[rest])
281
+ yy1 = torch.maximum(y1[i], y1[rest])
282
+ xx2 = torch.minimum(x2[i], x2[rest])
283
+ yy2 = torch.minimum(y2[i], y2[rest])
284
+
285
+ # Fast intersection and IoU
286
+ w = (xx2 - xx1).clamp_(min=0)
287
+ h = (yy2 - yy1).clamp_(min=0)
288
+ inter = w * h
289
+
290
+ # Early termination: skip IoU calculation if no intersection
291
+ if inter.sum() == 0:
292
+ # No overlaps with current box, keep all remaining boxes
293
+ remaining_count = rest.numel()
294
+ keep[keep_idx : keep_idx + remaining_count] = rest
295
+ keep_idx += remaining_count
296
+ break
297
+
298
+ iou = inter / (areas[i] + areas[rest] - inter)
299
+
300
+ # Keep boxes with IoU <= threshold
301
+ mask = iou <= iou_threshold
302
+ order = rest[mask]
303
+
304
+ return keep[:keep_idx]
305
+
306
+ @staticmethod
307
+ def batched_nms(
308
+ boxes: torch.Tensor,
309
+ scores: torch.Tensor,
310
+ idxs: torch.Tensor,
311
+ iou_threshold: float,
312
+ use_fast_nms: bool = False,
313
+ ) -> torch.Tensor:
314
+ """
315
+ Batched NMS for class-aware suppression.
316
+
317
+ Args:
318
+ boxes (torch.Tensor): Bounding boxes with shape (N, 4) in xyxy format.
319
+ scores (torch.Tensor): Confidence scores with shape (N,).
320
+ idxs (torch.Tensor): Class indices with shape (N,).
321
+ iou_threshold (float): IoU threshold for suppression.
322
+ use_fast_nms (bool): Whether to use the Fast-NMS implementation.
323
+
324
+ Returns:
325
+ (torch.Tensor): Indices of boxes to keep after NMS.
326
+
327
+ Examples:
328
+ Apply batched NMS across multiple classes
329
+ >>> boxes = torch.tensor([[0, 0, 10, 10], [5, 5, 15, 15]])
330
+ >>> scores = torch.tensor([0.9, 0.8])
331
+ >>> idxs = torch.tensor([0, 1])
332
+ >>> keep = TorchNMS.batched_nms(boxes, scores, idxs, 0.5)
333
+ """
334
+ if boxes.numel() == 0:
335
+ return torch.empty((0,), dtype=torch.int64, device=boxes.device)
336
+
337
+ # Strategy: offset boxes by class index to prevent cross-class suppression
338
+ max_coordinate = boxes.max()
339
+ offsets = idxs.to(boxes) * (max_coordinate + 1)
340
+ boxes_for_nms = boxes + offsets[:, None]
341
+
342
+ return (
343
+ TorchNMS.fast_nms(boxes_for_nms, scores, iou_threshold)
344
+ if use_fast_nms
345
+ else TorchNMS.nms(boxes_for_nms, scores, iou_threshold)
346
+ )