dgenerate-ultralytics-headless 8.3.189__py3-none-any.whl → 8.3.191__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/RECORD +111 -109
  3. tests/test_cuda.py +6 -5
  4. tests/test_exports.py +1 -6
  5. tests/test_python.py +1 -4
  6. tests/test_solutions.py +1 -1
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +16 -14
  9. ultralytics/cfg/datasets/VisDrone.yaml +4 -4
  10. ultralytics/data/annotator.py +6 -6
  11. ultralytics/data/augment.py +53 -51
  12. ultralytics/data/base.py +15 -13
  13. ultralytics/data/build.py +7 -4
  14. ultralytics/data/converter.py +9 -10
  15. ultralytics/data/dataset.py +24 -22
  16. ultralytics/data/loaders.py +13 -11
  17. ultralytics/data/split.py +4 -3
  18. ultralytics/data/split_dota.py +14 -12
  19. ultralytics/data/utils.py +31 -25
  20. ultralytics/engine/exporter.py +7 -4
  21. ultralytics/engine/model.py +16 -14
  22. ultralytics/engine/predictor.py +9 -7
  23. ultralytics/engine/results.py +59 -57
  24. ultralytics/engine/trainer.py +7 -0
  25. ultralytics/engine/tuner.py +4 -3
  26. ultralytics/engine/validator.py +3 -1
  27. ultralytics/hub/__init__.py +6 -2
  28. ultralytics/hub/auth.py +2 -2
  29. ultralytics/hub/google/__init__.py +9 -8
  30. ultralytics/hub/session.py +11 -11
  31. ultralytics/hub/utils.py +8 -9
  32. ultralytics/models/fastsam/model.py +8 -6
  33. ultralytics/models/nas/model.py +5 -3
  34. ultralytics/models/rtdetr/train.py +4 -3
  35. ultralytics/models/rtdetr/val.py +6 -4
  36. ultralytics/models/sam/amg.py +13 -10
  37. ultralytics/models/sam/model.py +3 -2
  38. ultralytics/models/sam/modules/blocks.py +21 -21
  39. ultralytics/models/sam/modules/decoders.py +11 -11
  40. ultralytics/models/sam/modules/encoders.py +25 -25
  41. ultralytics/models/sam/modules/memory_attention.py +9 -8
  42. ultralytics/models/sam/modules/sam.py +8 -10
  43. ultralytics/models/sam/modules/tiny_encoder.py +21 -20
  44. ultralytics/models/sam/modules/transformer.py +6 -5
  45. ultralytics/models/sam/modules/utils.py +7 -5
  46. ultralytics/models/sam/predict.py +32 -31
  47. ultralytics/models/utils/loss.py +29 -27
  48. ultralytics/models/utils/ops.py +10 -8
  49. ultralytics/models/yolo/classify/train.py +7 -5
  50. ultralytics/models/yolo/classify/val.py +10 -8
  51. ultralytics/models/yolo/detect/predict.py +3 -3
  52. ultralytics/models/yolo/detect/train.py +8 -6
  53. ultralytics/models/yolo/detect/val.py +23 -21
  54. ultralytics/models/yolo/model.py +14 -14
  55. ultralytics/models/yolo/obb/train.py +5 -3
  56. ultralytics/models/yolo/obb/val.py +13 -10
  57. ultralytics/models/yolo/pose/train.py +7 -5
  58. ultralytics/models/yolo/pose/val.py +11 -9
  59. ultralytics/models/yolo/segment/train.py +4 -5
  60. ultralytics/models/yolo/segment/val.py +12 -10
  61. ultralytics/models/yolo/world/train.py +9 -7
  62. ultralytics/models/yolo/yoloe/train.py +7 -6
  63. ultralytics/models/yolo/yoloe/val.py +10 -8
  64. ultralytics/nn/autobackend.py +40 -52
  65. ultralytics/nn/modules/__init__.py +3 -3
  66. ultralytics/nn/modules/block.py +12 -12
  67. ultralytics/nn/modules/conv.py +4 -3
  68. ultralytics/nn/modules/head.py +46 -38
  69. ultralytics/nn/modules/transformer.py +22 -21
  70. ultralytics/nn/tasks.py +2 -2
  71. ultralytics/nn/text_model.py +6 -5
  72. ultralytics/solutions/analytics.py +7 -5
  73. ultralytics/solutions/config.py +12 -10
  74. ultralytics/solutions/distance_calculation.py +3 -3
  75. ultralytics/solutions/heatmap.py +4 -2
  76. ultralytics/solutions/object_counter.py +5 -3
  77. ultralytics/solutions/parking_management.py +4 -2
  78. ultralytics/solutions/region_counter.py +7 -5
  79. ultralytics/solutions/similarity_search.py +5 -3
  80. ultralytics/solutions/solutions.py +38 -36
  81. ultralytics/solutions/streamlit_inference.py +8 -7
  82. ultralytics/trackers/bot_sort.py +11 -9
  83. ultralytics/trackers/byte_tracker.py +17 -15
  84. ultralytics/trackers/utils/gmc.py +4 -3
  85. ultralytics/utils/__init__.py +27 -77
  86. ultralytics/utils/autobatch.py +3 -2
  87. ultralytics/utils/autodevice.py +10 -10
  88. ultralytics/utils/benchmarks.py +11 -10
  89. ultralytics/utils/callbacks/comet.py +9 -9
  90. ultralytics/utils/callbacks/platform.py +2 -1
  91. ultralytics/utils/checks.py +20 -29
  92. ultralytics/utils/downloads.py +2 -2
  93. ultralytics/utils/export.py +12 -11
  94. ultralytics/utils/files.py +8 -7
  95. ultralytics/utils/git.py +139 -0
  96. ultralytics/utils/instance.py +8 -7
  97. ultralytics/utils/logger.py +7 -6
  98. ultralytics/utils/loss.py +15 -13
  99. ultralytics/utils/metrics.py +62 -62
  100. ultralytics/utils/nms.py +346 -0
  101. ultralytics/utils/ops.py +83 -251
  102. ultralytics/utils/patches.py +6 -4
  103. ultralytics/utils/plotting.py +18 -16
  104. ultralytics/utils/tal.py +1 -1
  105. ultralytics/utils/torch_utils.py +4 -2
  106. ultralytics/utils/tqdm.py +47 -33
  107. ultralytics/utils/triton.py +3 -2
  108. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/WHEEL +0 -0
  109. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/entry_points.txt +0 -0
  110. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/licenses/LICENSE +0 -0
  111. {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/top_level.txt +0 -0
ultralytics/utils/ops.py CHANGED
@@ -1,18 +1,18 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import contextlib
4
6
  import math
5
7
  import re
6
8
  import time
7
- from typing import Optional
8
9
 
9
10
  import cv2
10
11
  import numpy as np
11
12
  import torch
12
13
  import torch.nn.functional as F
13
14
 
14
- from ultralytics.utils import LOGGER
15
- from ultralytics.utils.metrics import batch_probiou
15
+ from ultralytics.utils import NOT_MACOS14
16
16
 
17
17
 
18
18
  class Profile(contextlib.ContextDecorator):
@@ -39,7 +39,7 @@ class Profile(contextlib.ContextDecorator):
39
39
  ... time.sleep(0.1)
40
40
  """
41
41
 
42
- def __init__(self, t: float = 0.0, device: Optional[torch.device] = None):
42
+ def __init__(self, t: float = 0.0, device: torch.device | None = None):
43
43
  """
44
44
  Initialize the Profile class.
45
45
 
@@ -122,20 +122,18 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = T
122
122
  """
123
123
  if ratio_pad is None: # calculate from img0_shape
124
124
  gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
125
- pad = (
126
- round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1),
127
- round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1),
128
- ) # wh padding
125
+ pad_x = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1)
126
+ pad_y = round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)
129
127
  else:
130
128
  gain = ratio_pad[0][0]
131
- pad = ratio_pad[1]
129
+ pad_x, pad_y = ratio_pad[1]
132
130
 
133
131
  if padding:
134
- boxes[..., 0] -= pad[0] # x padding
135
- boxes[..., 1] -= pad[1] # y padding
132
+ boxes[..., 0] -= pad_x # x padding
133
+ boxes[..., 1] -= pad_y # y padding
136
134
  if not xywh:
137
- boxes[..., 2] -= pad[0] # x padding
138
- boxes[..., 3] -= pad[1] # y padding
135
+ boxes[..., 2] -= pad_x # x padding
136
+ boxes[..., 3] -= pad_y # y padding
139
137
  boxes[..., :4] /= gain
140
138
  return clip_boxes(boxes, img0_shape)
141
139
 
@@ -156,207 +154,32 @@ def make_divisible(x: int, divisor):
156
154
  return math.ceil(x / divisor) * divisor
157
155
 
158
156
 
159
- def nms_rotated(boxes, scores, threshold: float = 0.45, use_triu: bool = True):
160
- """
161
- Perform NMS on oriented bounding boxes using probiou and fast-nms.
162
-
163
- Args:
164
- boxes (torch.Tensor): Rotated bounding boxes with shape (N, 5) in xywhr format.
165
- scores (torch.Tensor): Confidence scores with shape (N,).
166
- threshold (float): IoU threshold for NMS.
167
- use_triu (bool): Whether to use torch.triu operator for upper triangular matrix operations.
168
-
169
- Returns:
170
- (torch.Tensor): Indices of boxes to keep after NMS.
171
- """
172
- sorted_idx = torch.argsort(scores, descending=True)
173
- boxes = boxes[sorted_idx]
174
- ious = batch_probiou(boxes, boxes)
175
- if use_triu:
176
- ious = ious.triu_(diagonal=1)
177
- # NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
178
- pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1)
179
- else:
180
- n = boxes.shape[0]
181
- row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n)
182
- col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1)
183
- upper_mask = row_idx < col_idx
184
- ious = ious * upper_mask
185
- # Zeroing these scores ensures the additional indices would not affect the final results
186
- scores[~((ious >= threshold).sum(0) <= 0)] = 0
187
- # NOTE: return indices with fixed length to avoid TFLite reshape error
188
- pick = torch.topk(scores, scores.shape[0]).indices
189
- return sorted_idx[pick]
190
-
191
-
192
- def non_max_suppression(
193
- prediction,
194
- conf_thres: float = 0.25,
195
- iou_thres: float = 0.45,
196
- classes=None,
197
- agnostic: bool = False,
198
- multi_label: bool = False,
199
- labels=(),
200
- max_det: int = 300,
201
- nc: int = 0, # number of classes (optional)
202
- max_time_img: float = 0.05,
203
- max_nms: int = 30000,
204
- max_wh: int = 7680,
205
- in_place: bool = True,
206
- rotated: bool = False,
207
- end2end: bool = False,
208
- return_idxs: bool = False,
209
- ):
210
- """
211
- Perform non-maximum suppression (NMS) on prediction results.
212
-
213
- Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple
214
- detection formats including standard boxes, rotated boxes, and masks.
215
-
216
- Args:
217
- prediction (torch.Tensor): Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes)
218
- containing boxes, classes, and optional masks.
219
- conf_thres (float): Confidence threshold for filtering detections. Valid values are between 0.0 and 1.0.
220
- iou_thres (float): IoU threshold for NMS filtering. Valid values are between 0.0 and 1.0.
221
- classes (List[int], optional): List of class indices to consider. If None, all classes are considered.
222
- agnostic (bool): Whether to perform class-agnostic NMS.
223
- multi_label (bool): Whether each box can have multiple labels.
224
- labels (List[List[Union[int, float, torch.Tensor]]]): A priori labels for each image.
225
- max_det (int): Maximum number of detections to keep per image.
226
- nc (int): Number of classes. Indices after this are considered masks.
227
- max_time_img (float): Maximum time in seconds for processing one image.
228
- max_nms (int): Maximum number of boxes for torchvision.ops.nms().
229
- max_wh (int): Maximum box width and height in pixels.
230
- in_place (bool): Whether to modify the input prediction tensor in place.
231
- rotated (bool): Whether to handle Oriented Bounding Boxes (OBB).
232
- end2end (bool): Whether the model is end-to-end and doesn't require NMS.
233
- return_idxs (bool): Whether to return the indices of kept detections.
234
-
235
- Returns:
236
- output (List[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks)
237
- containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
238
- keepi (List[torch.Tensor]): Indices of kept detections if return_idxs=True.
239
- """
240
- import torchvision # scope for faster 'import ultralytics'
241
-
242
- # Checks
243
- assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
244
- assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
245
- if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
246
- prediction = prediction[0] # select only inference output
247
- if classes is not None:
248
- classes = torch.tensor(classes, device=prediction.device)
249
-
250
- if prediction.shape[-1] == 6 or end2end: # end-to-end model (BNC, i.e. 1,300,6)
251
- output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
252
- if classes is not None:
253
- output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
254
- return output
255
-
256
- bs = prediction.shape[0] # batch size (BCN, i.e. 1,84,6300)
257
- nc = nc or (prediction.shape[1] - 4) # number of classes
258
- extra = prediction.shape[1] - nc - 4 # number of extra info
259
- mi = 4 + nc # mask start index
260
- xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
261
- xinds = torch.stack([torch.arange(len(i), device=prediction.device) for i in xc])[..., None] # to track idxs
262
-
263
- # Settings
264
- # min_wh = 2 # (pixels) minimum box width and height
265
- time_limit = 2.0 + max_time_img * bs # seconds to quit after
266
- multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
267
-
268
- prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
269
- if not rotated:
270
- if in_place:
271
- prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
272
- else:
273
- prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1) # xywh to xyxy
274
-
275
- t = time.time()
276
- output = [torch.zeros((0, 6 + extra), device=prediction.device)] * bs
277
- keepi = [torch.zeros((0, 1), device=prediction.device)] * bs # to store the kept idxs
278
- for xi, (x, xk) in enumerate(zip(prediction, xinds)): # image index, (preds, preds indices)
279
- # Apply constraints
280
- # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
281
- filt = xc[xi] # confidence
282
- x, xk = x[filt], xk[filt]
283
-
284
- # Cat apriori labels if autolabelling
285
- if labels and len(labels[xi]) and not rotated:
286
- lb = labels[xi]
287
- v = torch.zeros((len(lb), nc + extra + 4), device=x.device)
288
- v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
289
- v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
290
- x = torch.cat((x, v), 0)
291
-
292
- # If none remain process next image
293
- if not x.shape[0]:
294
- continue
295
-
296
- # Detections matrix nx6 (xyxy, conf, cls)
297
- box, cls, mask = x.split((4, nc, extra), 1)
298
-
299
- if multi_label:
300
- i, j = torch.where(cls > conf_thres)
301
- x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
302
- xk = xk[i]
303
- else: # best class only
304
- conf, j = cls.max(1, keepdim=True)
305
- filt = conf.view(-1) > conf_thres
306
- x = torch.cat((box, conf, j.float(), mask), 1)[filt]
307
- xk = xk[filt]
308
-
309
- # Filter by class
310
- if classes is not None:
311
- filt = (x[:, 5:6] == classes).any(1)
312
- x, xk = x[filt], xk[filt]
313
-
314
- # Check shape
315
- n = x.shape[0] # number of boxes
316
- if not n: # no boxes
317
- continue
318
- if n > max_nms: # excess boxes
319
- filt = x[:, 4].argsort(descending=True)[:max_nms] # sort by confidence and remove excess boxes
320
- x, xk = x[filt], xk[filt]
321
-
322
- # Batched NMS
323
- c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
324
- scores = x[:, 4] # scores
325
- if rotated:
326
- boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1) # xywhr
327
- i = nms_rotated(boxes, scores, iou_thres)
328
- else:
329
- boxes = x[:, :4] + c # boxes (offset by class)
330
- i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
331
- i = i[:max_det] # limit detections
332
-
333
- output[xi], keepi[xi] = x[i], xk[i].reshape(-1)
334
- if (time.time() - t) > time_limit:
335
- LOGGER.warning(f"NMS time limit {time_limit:.3f}s exceeded")
336
- break # time limit exceeded
337
-
338
- return (output, keepi) if return_idxs else output
339
-
340
-
341
157
  def clip_boxes(boxes, shape):
342
158
  """
343
159
  Clip bounding boxes to image boundaries.
344
160
 
345
161
  Args:
346
162
  boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
347
- shape (tuple): Image shape as (height, width).
163
+ shape (tuple): Image shape as HWC or HW (supports both).
348
164
 
349
165
  Returns:
350
166
  (torch.Tensor | np.ndarray): Clipped bounding boxes.
351
167
  """
352
- if isinstance(boxes, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
353
- boxes[..., 0] = boxes[..., 0].clamp(0, shape[1]) # x1
354
- boxes[..., 1] = boxes[..., 1].clamp(0, shape[0]) # y1
355
- boxes[..., 2] = boxes[..., 2].clamp(0, shape[1]) # x2
356
- boxes[..., 3] = boxes[..., 3].clamp(0, shape[0]) # y2
168
+ h, w = shape[:2] # supports both HWC or HW shapes
169
+ if isinstance(boxes, torch.Tensor): # faster individually
170
+ if NOT_MACOS14:
171
+ boxes[..., 0].clamp_(0, w) # x1
172
+ boxes[..., 1].clamp_(0, h) # y1
173
+ boxes[..., 2].clamp_(0, w) # x2
174
+ boxes[..., 3].clamp_(0, h) # y2
175
+ else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
176
+ boxes[..., 0] = boxes[..., 0].clamp(0, w)
177
+ boxes[..., 1] = boxes[..., 1].clamp(0, h)
178
+ boxes[..., 2] = boxes[..., 2].clamp(0, w)
179
+ boxes[..., 3] = boxes[..., 3].clamp(0, h)
357
180
  else: # np.array (faster grouped)
358
- boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
359
- boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
181
+ boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, w) # x1, x2
182
+ boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, h) # y1, y2
360
183
  return boxes
361
184
 
362
185
 
@@ -366,17 +189,22 @@ def clip_coords(coords, shape):
366
189
 
367
190
  Args:
368
191
  coords (torch.Tensor | np.ndarray): Line coordinates to clip.
369
- shape (tuple): Image shape as (height, width).
192
+ shape (tuple): Image shape as HWC or HW (supports both).
370
193
 
371
194
  Returns:
372
195
  (torch.Tensor | np.ndarray): Clipped coordinates.
373
196
  """
374
- if isinstance(coords, torch.Tensor): # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
375
- coords[..., 0] = coords[..., 0].clamp(0, shape[1]) # x
376
- coords[..., 1] = coords[..., 1].clamp(0, shape[0]) # y
377
- else: # np.array (faster grouped)
378
- coords[..., 0] = coords[..., 0].clip(0, shape[1]) # x
379
- coords[..., 1] = coords[..., 1].clip(0, shape[0]) # y
197
+ h, w = shape[:2] # supports both HWC or HW shapes
198
+ if isinstance(coords, torch.Tensor):
199
+ if NOT_MACOS14:
200
+ coords[..., 0].clamp_(0, w) # x
201
+ coords[..., 1].clamp_(0, h) # y
202
+ else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
203
+ coords[..., 0] = coords[..., 0].clamp(0, w)
204
+ coords[..., 1] = coords[..., 1].clamp(0, h)
205
+ else: # np.array
206
+ coords[..., 0] = coords[..., 0].clip(0, w) # x
207
+ coords[..., 1] = coords[..., 1].clip(0, h) # y
380
208
  return coords
381
209
 
382
210
 
@@ -389,32 +217,34 @@ def scale_image(masks, im0_shape, ratio_pad=None):
389
217
 
390
218
  Args:
391
219
  masks (np.ndarray): Resized and padded masks with shape [H, W, N] or [H, W, 3].
392
- im0_shape (tuple): Original image shape as (height, width).
220
+ im0_shape (tuple): Original image shape as HWC or HW (supports both).
393
221
  ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
394
222
 
395
223
  Returns:
396
224
  (np.ndarray): Rescaled masks with shape [H, W, N] matching original image dimensions.
397
225
  """
398
226
  # Rescale coordinates (xyxy) from im1_shape to im0_shape
399
- im1_shape = masks.shape
400
- if im1_shape[:2] == im0_shape[:2]:
227
+ im0_h, im0_w = im0_shape[:2] # supports both HWC or HW shapes
228
+ im1_h, im1_w, _ = masks.shape
229
+ if im1_h == im0_h and im1_w == im0_w:
401
230
  return masks
231
+
402
232
  if ratio_pad is None: # calculate from im0_shape
403
- gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
404
- pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
233
+ gain = min(im1_h / im0_h, im1_w / im0_w) # gain = old / new
234
+ pad = (im1_w - im0_w * gain) / 2, (im1_h - im0_h * gain) / 2 # wh padding
405
235
  else:
406
236
  pad = ratio_pad[1]
407
237
 
408
- top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)))
409
- bottom, right = (
410
- im1_shape[0] - int(round(pad[1] + 0.1)),
411
- im1_shape[1] - int(round(pad[0] + 0.1)),
412
- )
238
+ pad_w, pad_h = pad
239
+ top = int(round(pad_h - 0.1))
240
+ left = int(round(pad_w - 0.1))
241
+ bottom = im1_h - int(round(pad_h + 0.1))
242
+ right = im1_w - int(round(pad_w + 0.1))
413
243
 
414
244
  if len(masks.shape) < 2:
415
245
  raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
416
246
  masks = masks[top:bottom, left:right]
417
- masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
247
+ masks = cv2.resize(masks, (im0_w, im0_h))
418
248
  if len(masks.shape) == 2:
419
249
  masks = masks[:, :, None]
420
250
 
@@ -434,10 +264,11 @@ def xyxy2xywh(x):
434
264
  """
435
265
  assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
436
266
  y = empty_like(x) # faster than clone/copy
437
- y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center
438
- y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center
439
- y[..., 2] = x[..., 2] - x[..., 0] # width
440
- y[..., 3] = x[..., 3] - x[..., 1] # height
267
+ x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
268
+ y[..., 0] = (x1 + x2) / 2 # x center
269
+ y[..., 1] = (y1 + y2) / 2 # y center
270
+ y[..., 2] = x2 - x1 # width
271
+ y[..., 3] = y2 - y1 # height
441
272
  return y
442
273
 
443
274
 
@@ -478,10 +309,12 @@ def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
478
309
  """
479
310
  assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
480
311
  y = empty_like(x) # faster than clone/copy
481
- y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
482
- y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
483
- y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x
484
- y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y
312
+ xc, yc, xw, xh = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
313
+ half_w, half_h = xw / 2, xh / 2
314
+ y[..., 0] = w * (xc - half_w) + padw # top left x
315
+ y[..., 1] = h * (yc - half_h) + padh # top left y
316
+ y[..., 2] = w * (xc + half_w) + padw # bottom right x
317
+ y[..., 3] = h * (yc + half_h) + padh # bottom right y
485
318
  return y
486
319
 
487
320
 
@@ -504,10 +337,11 @@ def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0
504
337
  x = clip_boxes(x, (h - eps, w - eps))
505
338
  assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
506
339
  y = empty_like(x) # faster than clone/copy
507
- y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center
508
- y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center
509
- y[..., 2] = (x[..., 2] - x[..., 0]) / w # width
510
- y[..., 3] = (x[..., 3] - x[..., 1]) / h # height
340
+ x1, y1, x2, y2 = x[..., 0], x[..., 1], x[..., 2], x[..., 3]
341
+ y[..., 0] = ((x1 + x2) / 2) / w # x center
342
+ y[..., 1] = ((y1 + y2) / 2) / h # y center
343
+ y[..., 2] = (x2 - x1) / w # width
344
+ y[..., 3] = (y2 - y1) / h # height
511
345
  return y
512
346
 
513
347
 
@@ -756,19 +590,15 @@ def scale_masks(masks, shape, padding: bool = True):
756
590
  """
757
591
  mh, mw = masks.shape[2:]
758
592
  gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
759
- pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
593
+ pad_w = mw - shape[1] * gain
594
+ pad_h = mh - shape[0] * gain
760
595
  if padding:
761
- pad[0] /= 2
762
- pad[1] /= 2
763
- top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))) if padding else (0, 0) # y, x
764
- bottom, right = (
765
- mh - int(round(pad[1] + 0.1)),
766
- mw - int(round(pad[0] + 0.1)),
767
- )
768
- masks = masks[..., top:bottom, left:right]
769
-
770
- masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
771
- return masks
596
+ pad_w /= 2
597
+ pad_h /= 2
598
+ top, left = (int(round(pad_h - 0.1)), int(round(pad_w - 0.1))) if padding else (0, 0)
599
+ bottom = mh - int(round(pad_h + 0.1))
600
+ right = mw - int(round(pad_w + 0.1))
601
+ return F.interpolate(masks[..., top:bottom, left:right], shape, mode="bilinear", align_corners=False) # NCHW masks
772
602
 
773
603
 
774
604
  def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
@@ -776,9 +606,9 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
776
606
  Rescale segment coordinates from img1_shape to img0_shape.
777
607
 
778
608
  Args:
779
- img1_shape (tuple): Shape of the source image.
609
+ img1_shape (tuple): Source image shape as HWC or HW (supports both).
780
610
  coords (torch.Tensor): Coordinates to scale with shape (N, 2).
781
- img0_shape (tuple): Shape of the target image.
611
+ img0_shape (tuple): Image 0 shape as HWC or HW (supports both).
782
612
  ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
783
613
  normalize (bool): Whether to normalize coordinates to range [0, 1].
784
614
  padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.
@@ -786,9 +616,11 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
786
616
  Returns:
787
617
  (torch.Tensor): Scaled coordinates.
788
618
  """
619
+ img0_h, img0_w = img0_shape[:2] # supports both HWC or HW shapes
789
620
  if ratio_pad is None: # calculate from img0_shape
790
- gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
791
- pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
621
+ img1_h, img1_w = img1_shape[:2] # supports both HWC or HW shapes
622
+ gain = min(img1_h / img0_h, img1_w / img0_w) # gain = old / new
623
+ pad = (img1_w - img0_w * gain) / 2, (img1_h - img0_h * gain) / 2 # wh padding
792
624
  else:
793
625
  gain = ratio_pad[0][0]
794
626
  pad = ratio_pad[1]
@@ -800,8 +632,8 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
800
632
  coords[..., 1] /= gain
801
633
  coords = clip_coords(coords, img0_shape)
802
634
  if normalize:
803
- coords[..., 0] /= img0_shape[1] # width
804
- coords[..., 1] /= img0_shape[0] # height
635
+ coords[..., 0] /= img0_w # width
636
+ coords[..., 1] /= img0_h # height
805
637
  return coords
806
638
 
807
639
 
@@ -1,11 +1,13 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Monkey patches to update/extend functionality of existing functions."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import time
5
7
  from contextlib import contextmanager
6
8
  from copy import copy
7
9
  from pathlib import Path
8
- from typing import Any, Dict, List, Optional
10
+ from typing import Any
9
11
 
10
12
  import cv2
11
13
  import numpy as np
@@ -15,7 +17,7 @@ import torch
15
17
  _imshow = cv2.imshow # copy to avoid recursion errors
16
18
 
17
19
 
18
- def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> Optional[np.ndarray]:
20
+ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
19
21
  """
20
22
  Read an image from a file with multilanguage filename support.
21
23
 
@@ -42,7 +44,7 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> Optional[np.ndarray]
42
44
  return im[..., None] if im is not None and im.ndim == 2 else im # Always ensure 3 dimensions
43
45
 
44
46
 
45
- def imwrite(filename: str, img: np.ndarray, params: Optional[List[int]] = None) -> bool:
47
+ def imwrite(filename: str, img: np.ndarray, params: list[int] | None = None) -> bool:
46
48
  """
47
49
  Write an image to a file with multilanguage filename support.
48
50
 
@@ -164,7 +166,7 @@ def arange_patch(args):
164
166
 
165
167
 
166
168
  @contextmanager
167
- def override_configs(args, overrides: Optional[Dict[str, Any]] = None):
169
+ def override_configs(args, overrides: dict[str, Any] | None = None):
168
170
  """
169
171
  Context manager to temporarily override configurations in args.
170
172
 
@@ -1,9 +1,11 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import math
4
6
  import warnings
5
7
  from pathlib import Path
6
- from typing import Any, Callable, Dict, List, Optional, Union
8
+ from typing import Any, Callable
7
9
 
8
10
  import cv2
9
11
  import numpy as np
@@ -142,12 +144,12 @@ class Colors:
142
144
  dtype=np.uint8,
143
145
  )
144
146
 
145
- def __call__(self, i: int, bgr: bool = False) -> tuple:
147
+ def __call__(self, i: int | torch.Tensor, bgr: bool = False) -> tuple:
146
148
  """
147
149
  Convert hex color codes to RGB values.
148
150
 
149
151
  Args:
150
- i (int): Color index.
152
+ i (int | torch.Tensor): Color index.
151
153
  bgr (bool, optional): Whether to return BGR format instead of RGB.
152
154
 
153
155
  Returns:
@@ -190,8 +192,8 @@ class Annotator:
190
192
  def __init__(
191
193
  self,
192
194
  im,
193
- line_width: Optional[int] = None,
194
- font_size: Optional[int] = None,
195
+ line_width: int | None = None,
196
+ font_size: int | None = None,
195
197
  font: str = "Arial.ttf",
196
198
  pil: bool = False,
197
199
  example: str = "abc",
@@ -409,10 +411,10 @@ class Annotator:
409
411
  self,
410
412
  kpts,
411
413
  shape: tuple = (640, 640),
412
- radius: Optional[int] = None,
414
+ radius: int | None = None,
413
415
  kpt_line: bool = True,
414
416
  conf_thres: float = 0.25,
415
- kpt_color: Optional[tuple] = None,
417
+ kpt_color: tuple | None = None,
416
418
  ):
417
419
  """
418
420
  Plot keypoints on the image.
@@ -517,7 +519,7 @@ class Annotator:
517
519
  """Return annotated image as array."""
518
520
  return np.asarray(self.im)
519
521
 
520
- def show(self, title: Optional[str] = None):
522
+ def show(self, title: str | None = None):
521
523
  """Show the annotated image."""
522
524
  im = Image.fromarray(np.asarray(self.im)[..., ::-1]) # Convert numpy array to PIL Image with RGB to BGR
523
525
  if IS_COLAB or IS_KAGGLE: # can not use IS_JUPYTER as will run for all ipython environments
@@ -533,7 +535,7 @@ class Annotator:
533
535
  cv2.imwrite(filename, np.asarray(self.im))
534
536
 
535
537
  @staticmethod
536
- def get_bbox_dimension(bbox: Optional[tuple] = None):
538
+ def get_bbox_dimension(bbox: tuple | None = None):
537
539
  """
538
540
  Calculate the dimensions and area of a bounding box.
539
541
 
@@ -678,17 +680,17 @@ def save_one_box(
678
680
 
679
681
  @threaded
680
682
  def plot_images(
681
- labels: Dict[str, Any],
682
- images: Union[torch.Tensor, np.ndarray] = np.zeros((0, 3, 640, 640), dtype=np.float32),
683
- paths: Optional[List[str]] = None,
683
+ labels: dict[str, Any],
684
+ images: torch.Tensor | np.ndarray = np.zeros((0, 3, 640, 640), dtype=np.float32),
685
+ paths: list[str] | None = None,
684
686
  fname: str = "images.jpg",
685
- names: Optional[Dict[int, str]] = None,
686
- on_plot: Optional[Callable] = None,
687
+ names: dict[int, str] | None = None,
688
+ on_plot: Callable | None = None,
687
689
  max_size: int = 1920,
688
690
  max_subplots: int = 16,
689
691
  save: bool = True,
690
692
  conf_thres: float = 0.25,
691
- ) -> Optional[np.ndarray]:
693
+ ) -> np.ndarray | None:
692
694
  """
693
695
  Plot image grid with labels, bounding boxes, masks, and keypoints.
694
696
 
@@ -851,7 +853,7 @@ def plot_results(
851
853
  segment: bool = False,
852
854
  pose: bool = False,
853
855
  classify: bool = False,
854
- on_plot: Optional[Callable] = None,
856
+ on_plot: Callable | None = None,
855
857
  ):
856
858
  """
857
859
  Plot training results from a results CSV file. The function supports various types of data including segmentation,
ultralytics/utils/tal.py CHANGED
@@ -387,7 +387,7 @@ def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
387
387
  if xywh:
388
388
  c_xy = (x1y1 + x2y2) / 2
389
389
  wh = x2y2 - x1y1
390
- return torch.cat((c_xy, wh), dim) # xywh bbox
390
+ return torch.cat([c_xy, wh], dim) # xywh bbox
391
391
  return torch.cat((x1y1, x2y2), dim) # xyxy bbox
392
392
 
393
393
 
@@ -1,5 +1,7 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import functools
4
6
  import gc
5
7
  import math
@@ -10,7 +12,7 @@ from contextlib import contextmanager
10
12
  from copy import deepcopy
11
13
  from datetime import datetime
12
14
  from pathlib import Path
13
- from typing import Any, Dict, Union
15
+ from typing import Any
14
16
 
15
17
  import numpy as np
16
18
  import torch
@@ -708,7 +710,7 @@ class ModelEMA:
708
710
  copy_attr(self.ema, model, include, exclude)
709
711
 
710
712
 
711
- def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "", updates: Dict[str, Any] = None) -> Dict[str, Any]:
713
+ def strip_optimizer(f: str | Path = "best.pt", s: str = "", updates: dict[str, Any] = None) -> dict[str, Any]:
712
714
  """
713
715
  Strip optimizer from 'f' to finalize training, optionally save as 's'.
714
716