dgenerate-ultralytics-headless 8.3.253__py3-none-any.whl → 8.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/METADATA +41 -49
  2. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/RECORD +85 -74
  3. tests/__init__.py +2 -2
  4. tests/conftest.py +1 -1
  5. tests/test_cuda.py +8 -2
  6. tests/test_engine.py +8 -8
  7. tests/test_exports.py +11 -4
  8. tests/test_integrations.py +9 -9
  9. tests/test_python.py +14 -14
  10. tests/test_solutions.py +3 -3
  11. ultralytics/__init__.py +1 -1
  12. ultralytics/cfg/__init__.py +25 -27
  13. ultralytics/cfg/default.yaml +3 -1
  14. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  15. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  16. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  17. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  18. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  19. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  20. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  21. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  22. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  23. ultralytics/data/annotator.py +2 -2
  24. ultralytics/data/augment.py +7 -0
  25. ultralytics/data/converter.py +57 -38
  26. ultralytics/data/dataset.py +1 -1
  27. ultralytics/engine/exporter.py +31 -26
  28. ultralytics/engine/model.py +34 -34
  29. ultralytics/engine/predictor.py +17 -17
  30. ultralytics/engine/results.py +14 -12
  31. ultralytics/engine/trainer.py +59 -29
  32. ultralytics/engine/tuner.py +19 -11
  33. ultralytics/engine/validator.py +16 -16
  34. ultralytics/models/fastsam/predict.py +1 -1
  35. ultralytics/models/yolo/classify/predict.py +1 -1
  36. ultralytics/models/yolo/classify/train.py +1 -1
  37. ultralytics/models/yolo/classify/val.py +1 -1
  38. ultralytics/models/yolo/detect/predict.py +2 -2
  39. ultralytics/models/yolo/detect/train.py +4 -3
  40. ultralytics/models/yolo/detect/val.py +7 -1
  41. ultralytics/models/yolo/model.py +8 -8
  42. ultralytics/models/yolo/obb/predict.py +2 -2
  43. ultralytics/models/yolo/obb/train.py +3 -3
  44. ultralytics/models/yolo/obb/val.py +1 -1
  45. ultralytics/models/yolo/pose/predict.py +1 -1
  46. ultralytics/models/yolo/pose/train.py +3 -1
  47. ultralytics/models/yolo/pose/val.py +1 -1
  48. ultralytics/models/yolo/segment/predict.py +3 -3
  49. ultralytics/models/yolo/segment/train.py +4 -4
  50. ultralytics/models/yolo/segment/val.py +4 -2
  51. ultralytics/models/yolo/yoloe/train.py +6 -1
  52. ultralytics/models/yolo/yoloe/train_seg.py +6 -1
  53. ultralytics/nn/autobackend.py +5 -5
  54. ultralytics/nn/modules/__init__.py +8 -0
  55. ultralytics/nn/modules/block.py +128 -8
  56. ultralytics/nn/modules/head.py +788 -203
  57. ultralytics/nn/tasks.py +86 -41
  58. ultralytics/nn/text_model.py +5 -2
  59. ultralytics/optim/__init__.py +5 -0
  60. ultralytics/optim/muon.py +338 -0
  61. ultralytics/solutions/ai_gym.py +3 -3
  62. ultralytics/solutions/config.py +1 -1
  63. ultralytics/solutions/heatmap.py +1 -1
  64. ultralytics/solutions/instance_segmentation.py +2 -2
  65. ultralytics/solutions/parking_management.py +1 -1
  66. ultralytics/solutions/solutions.py +2 -2
  67. ultralytics/trackers/track.py +1 -1
  68. ultralytics/utils/__init__.py +8 -8
  69. ultralytics/utils/benchmarks.py +23 -23
  70. ultralytics/utils/callbacks/platform.py +11 -7
  71. ultralytics/utils/checks.py +6 -6
  72. ultralytics/utils/downloads.py +5 -3
  73. ultralytics/utils/export/engine.py +19 -10
  74. ultralytics/utils/export/imx.py +19 -13
  75. ultralytics/utils/export/tensorflow.py +21 -21
  76. ultralytics/utils/files.py +2 -2
  77. ultralytics/utils/loss.py +587 -203
  78. ultralytics/utils/metrics.py +1 -0
  79. ultralytics/utils/ops.py +11 -2
  80. ultralytics/utils/tal.py +98 -19
  81. ultralytics/utils/tuner.py +2 -2
  82. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/WHEEL +0 -0
  83. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/entry_points.txt +0 -0
  84. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/licenses/LICENSE +0 -0
  85. {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ OKS_SIGMA = (
21
21
  )
22
22
  / 10.0
23
23
  )
24
+ RLE_WEIGHT = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5])
24
25
 
25
26
 
26
27
  def bbox_ioa(box1: np.ndarray, box2: np.ndarray, iou: bool = False, eps: float = 1e-7) -> np.ndarray:
ultralytics/utils/ops.py CHANGED
@@ -344,7 +344,7 @@ def xyxyxyxy2xywhr(x):
344
344
 
345
345
  Returns:
346
346
  (np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5). Rotation
347
- values are in radians from 0 to pi/2.
347
+ values are in radians from [-pi/4, 3pi/4).
348
348
  """
349
349
  is_torch = isinstance(x, torch.Tensor)
350
350
  points = x.cpu().numpy() if is_torch else x
@@ -354,7 +354,16 @@ def xyxyxyxy2xywhr(x):
354
354
  # NOTE: Use cv2.minAreaRect to get accurate xywhr,
355
355
  # especially some objects are cut off by augmentations in dataloader.
356
356
  (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
357
- rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
357
+ # convert angle to radian and normalize to [-pi/4, 3pi/4)
358
+ theta = angle / 180 * np.pi
359
+ if w < h:
360
+ w, h = h, w
361
+ theta += np.pi / 2
362
+ while theta >= 3 * np.pi / 4:
363
+ theta -= np.pi
364
+ while theta < -np.pi / 4:
365
+ theta += np.pi
366
+ rboxes.append([cx, cy, w, h, theta])
358
367
  return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)
359
368
 
360
369
 
ultralytics/utils/tal.py CHANGED
@@ -1,11 +1,13 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import torch
4
6
  import torch.nn as nn
5
7
 
6
8
  from . import LOGGER
7
9
  from .metrics import bbox_iou, probiou
8
- from .ops import xywhr2xyxyxyxy
10
+ from .ops import xywh2xyxy, xywhr2xyxyxyxy, xyxy2xywh
9
11
  from .torch_utils import TORCH_1_11
10
12
 
11
13
 
@@ -17,13 +19,24 @@ class TaskAlignedAssigner(nn.Module):
17
19
 
18
20
  Attributes:
19
21
  topk (int): The number of top candidates to consider.
22
+ topk2 (int): Secondary topk value for additional filtering.
20
23
  num_classes (int): The number of object classes.
21
24
  alpha (float): The alpha parameter for the classification component of the task-aligned metric.
22
25
  beta (float): The beta parameter for the localization component of the task-aligned metric.
26
+ stride (list): List of stride values for different feature levels.
23
27
  eps (float): A small value to prevent division by zero.
24
28
  """
25
29
 
26
- def __init__(self, topk: int = 13, num_classes: int = 80, alpha: float = 1.0, beta: float = 6.0, eps: float = 1e-9):
30
+ def __init__(
31
+ self,
32
+ topk: int = 13,
33
+ num_classes: int = 80,
34
+ alpha: float = 1.0,
35
+ beta: float = 6.0,
36
+ stride: list = [8, 16, 32],
37
+ eps: float = 1e-9,
38
+ topk2=None,
39
+ ):
27
40
  """Initialize a TaskAlignedAssigner object with customizable hyperparameters.
28
41
 
29
42
  Args:
@@ -31,13 +44,17 @@ class TaskAlignedAssigner(nn.Module):
31
44
  num_classes (int, optional): The number of object classes.
32
45
  alpha (float, optional): The alpha parameter for the classification component of the task-aligned metric.
33
46
  beta (float, optional): The beta parameter for the localization component of the task-aligned metric.
47
+ stride (list, optional): List of stride values for different feature levels.
34
48
  eps (float, optional): A small value to prevent division by zero.
49
+ topk2 (int, optional): Secondary topk value for additional filtering.
35
50
  """
36
51
  super().__init__()
37
52
  self.topk = topk
53
+ self.topk2 = topk2 or topk
38
54
  self.num_classes = num_classes
39
55
  self.alpha = alpha
40
56
  self.beta = beta
57
+ self.stride = stride
41
58
  self.eps = eps
42
59
 
43
60
  @torch.no_grad()
@@ -77,12 +94,14 @@ class TaskAlignedAssigner(nn.Module):
77
94
 
78
95
  try:
79
96
  return self._forward(pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)
80
- except torch.cuda.OutOfMemoryError:
81
- # Move tensors to CPU, compute, then move back to original device
82
- LOGGER.warning("CUDA OutOfMemoryError in TaskAlignedAssigner, using CPU")
83
- cpu_tensors = [t.cpu() for t in (pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)]
84
- result = self._forward(*cpu_tensors)
85
- return tuple(t.to(device) for t in result)
97
+ except RuntimeError as e:
98
+ if "out of memory" in str(e).lower():
99
+ # Move tensors to CPU, compute, then move back to original device
100
+ LOGGER.warning("CUDA OutOfMemoryError in TaskAlignedAssigner, using CPU")
101
+ cpu_tensors = [t.cpu() for t in (pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)]
102
+ result = self._forward(*cpu_tensors)
103
+ return tuple(t.to(device) for t in result)
104
+ raise
86
105
 
87
106
  def _forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
88
107
  """Compute the task-aligned assignment.
@@ -106,7 +125,9 @@ class TaskAlignedAssigner(nn.Module):
106
125
  pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt
107
126
  )
108
127
 
109
- target_gt_idx, fg_mask, mask_pos = self.select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes)
128
+ target_gt_idx, fg_mask, mask_pos = self.select_highest_overlaps(
129
+ mask_pos, overlaps, self.n_max_boxes, align_metric
130
+ )
110
131
 
111
132
  # Assigned target
112
133
  target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask)
@@ -136,7 +157,7 @@ class TaskAlignedAssigner(nn.Module):
136
157
  align_metric (torch.Tensor): Alignment metric with shape (bs, max_num_obj, h*w).
137
158
  overlaps (torch.Tensor): Overlaps between predicted vs ground truth boxes with shape (bs, max_num_obj, h*w).
138
159
  """
139
- mask_in_gts = self.select_candidates_in_gts(anc_points, gt_bboxes)
160
+ mask_in_gts = self.select_candidates_in_gts(anc_points, gt_bboxes, mask_gt)
140
161
  # Get anchor_align metric, (b, max_num_obj, h*w)
141
162
  align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes, mask_in_gts * mask_gt)
142
163
  # Get topk_metric mask, (b, max_num_obj, h*w)
@@ -263,13 +284,13 @@ class TaskAlignedAssigner(nn.Module):
263
284
 
264
285
  return target_labels, target_bboxes, target_scores
265
286
 
266
- @staticmethod
267
- def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9):
287
+ def select_candidates_in_gts(self, xy_centers, gt_bboxes, mask_gt, eps=1e-9):
268
288
  """Select positive anchor centers within ground truth bounding boxes.
269
289
 
270
290
  Args:
271
291
  xy_centers (torch.Tensor): Anchor center coordinates, shape (h*w, 2).
272
292
  gt_bboxes (torch.Tensor): Ground truth bounding boxes, shape (b, n_boxes, 4).
293
+ mask_gt (torch.Tensor): Mask for valid ground truth boxes, shape (b, n_boxes, 1).
273
294
  eps (float, optional): Small value for numerical stability.
274
295
 
275
296
  Returns:
@@ -279,20 +300,26 @@ class TaskAlignedAssigner(nn.Module):
279
300
  - b: batch size, n_boxes: number of ground truth boxes, h: height, w: width.
280
301
  - Bounding box format: [x_min, y_min, x_max, y_max].
281
302
  """
303
+ gt_bboxes_xywh = xyxy2xywh(gt_bboxes)
304
+ wh_mask = gt_bboxes_xywh[..., 2:] < self.stride[0] # the smallest stride
305
+ stride_val = torch.tensor(self.stride[1], dtype=gt_bboxes_xywh.dtype, device=gt_bboxes_xywh.device)
306
+ gt_bboxes_xywh[..., 2:] = torch.where((wh_mask * mask_gt).bool(), stride_val, gt_bboxes_xywh[..., 2:])
307
+ gt_bboxes = xywh2xyxy(gt_bboxes_xywh)
308
+
282
309
  n_anchors = xy_centers.shape[0]
283
310
  bs, n_boxes, _ = gt_bboxes.shape
284
311
  lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom
285
312
  bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1)
286
313
  return bbox_deltas.amin(3).gt_(eps)
287
314
 
288
- @staticmethod
289
- def select_highest_overlaps(mask_pos, overlaps, n_max_boxes):
315
+ def select_highest_overlaps(self, mask_pos, overlaps, n_max_boxes, align_metric):
290
316
  """Select anchor boxes with highest IoU when assigned to multiple ground truths.
291
317
 
292
318
  Args:
293
319
  mask_pos (torch.Tensor): Positive mask, shape (b, n_max_boxes, h*w).
294
320
  overlaps (torch.Tensor): IoU overlaps, shape (b, n_max_boxes, h*w).
295
321
  n_max_boxes (int): Maximum number of ground truth boxes.
322
+ align_metric (torch.Tensor): Alignment metric for selecting best matches.
296
323
 
297
324
  Returns:
298
325
  target_gt_idx (torch.Tensor): Indices of assigned ground truths, shape (b, h*w).
@@ -303,12 +330,20 @@ class TaskAlignedAssigner(nn.Module):
303
330
  fg_mask = mask_pos.sum(-2)
304
331
  if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes
305
332
  mask_multi_gts = (fg_mask.unsqueeze(1) > 1).expand(-1, n_max_boxes, -1) # (b, n_max_boxes, h*w)
306
- max_overlaps_idx = overlaps.argmax(1) # (b, h*w)
307
333
 
334
+ max_overlaps_idx = overlaps.argmax(1) # (b, h*w)
308
335
  is_max_overlaps = torch.zeros(mask_pos.shape, dtype=mask_pos.dtype, device=mask_pos.device)
309
336
  is_max_overlaps.scatter_(1, max_overlaps_idx.unsqueeze(1), 1)
310
-
311
337
  mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos).float() # (b, n_max_boxes, h*w)
338
+
339
+ fg_mask = mask_pos.sum(-2)
340
+
341
+ if self.topk2 != self.topk:
342
+ align_metric = align_metric * mask_pos # update overlaps
343
+ max_overlaps_idx = torch.topk(align_metric, self.topk2, dim=-1, largest=True).indices # (b, n_max_boxes)
344
+ topk_idx = torch.zeros(mask_pos.shape, dtype=mask_pos.dtype, device=mask_pos.device) # update mask_pos
345
+ topk_idx.scatter_(-1, max_overlaps_idx, 1.0)
346
+ mask_pos *= topk_idx
312
347
  fg_mask = mask_pos.sum(-2)
313
348
  # Find each grid serve which gt(index)
314
349
  target_gt_idx = mask_pos.argmax(-2) # (b, h*w)
@@ -323,12 +358,14 @@ class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
323
358
  return probiou(gt_bboxes, pd_bboxes).squeeze(-1).clamp_(0)
324
359
 
325
360
  @staticmethod
326
- def select_candidates_in_gts(xy_centers, gt_bboxes):
361
+ def select_candidates_in_gts(xy_centers, gt_bboxes, mask_gt):
327
362
  """Select the positive anchor center in gt for rotated bounding boxes.
328
363
 
329
364
  Args:
330
365
  xy_centers (torch.Tensor): Anchor center coordinates with shape (h*w, 2).
331
366
  gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (b, n_boxes, 5).
367
+ mask_gt (torch.Tensor): Mask for valid ground truth boxes with shape (b, n_boxes, 1).
368
+ stride (list[int]): List of stride values for each feature map level.
332
369
 
333
370
  Returns:
334
371
  (torch.Tensor): Boolean mask of positive anchors with shape (b, n_boxes, h*w).
@@ -377,10 +414,13 @@ def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
377
414
  return torch.cat((x1y1, x2y2), dim) # xyxy bbox
378
415
 
379
416
 
380
- def bbox2dist(anchor_points, bbox, reg_max):
417
+ def bbox2dist(anchor_points: torch.Tensor, bbox: torch.Tensor, reg_max: int | None = None) -> torch.Tensor:
381
418
  """Transform bbox(xyxy) to dist(ltrb)."""
382
419
  x1y1, x2y2 = bbox.chunk(2, -1)
383
- return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp_(0, reg_max - 0.01) # dist (lt, rb)
420
+ dist = torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1)
421
+ if reg_max is not None:
422
+ dist = dist.clamp_(0, reg_max - 0.01) # dist (lt, rb)
423
+ return dist
384
424
 
385
425
 
386
426
  def dist2rbox(pred_dist, pred_angle, anchor_points, dim=-1):
@@ -402,3 +442,42 @@ def dist2rbox(pred_dist, pred_angle, anchor_points, dim=-1):
402
442
  x, y = xf * cos - yf * sin, xf * sin + yf * cos
403
443
  xy = torch.cat([x, y], dim=dim) + anchor_points
404
444
  return torch.cat([xy, lt + rb], dim=dim)
445
+
446
+
447
+ def rbox2dist(
448
+ target_bboxes: torch.Tensor,
449
+ anchor_points: torch.Tensor,
450
+ target_angle: torch.Tensor,
451
+ dim: int = -1,
452
+ reg_max: int | None = None,
453
+ ):
454
+ """Decode rotated bounding box (xywh) to distance(ltrb). This is the inverse of dist2rbox.
455
+
456
+ Args:
457
+ target_bboxes (torch.Tensor): Target rotated bounding boxes with shape (bs, h*w, 4), format [x, y, w, h].
458
+ anchor_points (torch.Tensor): Anchor points with shape (h*w, 2).
459
+ target_angle (torch.Tensor): Target angle with shape (bs, h*w, 1).
460
+ dim (int, optional): Dimension along which to split.
461
+ reg_max (int, optional): Maximum regression value for clamping.
462
+
463
+ Returns:
464
+ (torch.Tensor): Predicted rotated distance with shape (bs, h*w, 4), format [l, t, r, b].
465
+ """
466
+ xy, wh = target_bboxes.split(2, dim=dim)
467
+ offset = xy - anchor_points # (bs, h*w, 2)
468
+ offset_x, offset_y = offset.split(1, dim=dim)
469
+ cos, sin = torch.cos(target_angle), torch.sin(target_angle)
470
+ xf = offset_x * cos + offset_y * sin
471
+ yf = -offset_x * sin + offset_y * cos
472
+
473
+ w, h = wh.split(1, dim=dim)
474
+ target_l = w / 2 - xf
475
+ target_t = h / 2 - yf
476
+ target_r = w / 2 + xf
477
+ target_b = h / 2 + yf
478
+
479
+ dist = torch.cat([target_l, target_t, target_r, target_b], dim=dim)
480
+ if reg_max is not None:
481
+ dist = dist.clamp_(0, reg_max - 0.01)
482
+
483
+ return dist
@@ -29,9 +29,9 @@ def run_ray_tune(
29
29
 
30
30
  Examples:
31
31
  >>> from ultralytics import YOLO
32
- >>> model = YOLO("yolo11n.pt") # Load a YOLO11n model
32
+ >>> model = YOLO("yolo26n.pt") # Load a YOLO26n model
33
33
 
34
- Start tuning hyperparameters for YOLO11n training on the COCO8 dataset
34
+ Start tuning hyperparameters for YOLO26n training on the COCO8 dataset
35
35
  >>> result_grid = model.tune(data="coco8.yaml", use_ray=True)
36
36
  """
37
37
  LOGGER.info("💡 Learn about RayTune at https://docs.ultralytics.com/integrations/ray-tune")