dgenerate-ultralytics-headless 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/METADATA +3 -3
  2. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/RECORD +37 -36
  3. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/WHEEL +1 -1
  4. tests/test_cli.py +10 -3
  5. tests/test_exports.py +64 -43
  6. tests/test_python.py +40 -11
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +5 -4
  9. ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
  10. ultralytics/cfg/default.yaml +2 -1
  11. ultralytics/data/augment.py +8 -0
  12. ultralytics/data/converter.py +32 -9
  13. ultralytics/data/utils.py +2 -2
  14. ultralytics/engine/exporter.py +10 -6
  15. ultralytics/engine/predictor.py +5 -0
  16. ultralytics/engine/trainer.py +6 -4
  17. ultralytics/engine/tuner.py +2 -2
  18. ultralytics/engine/validator.py +5 -0
  19. ultralytics/models/sam/predict.py +2 -2
  20. ultralytics/models/yolo/classify/train.py +14 -1
  21. ultralytics/models/yolo/detect/train.py +8 -4
  22. ultralytics/models/yolo/pose/train.py +2 -1
  23. ultralytics/models/yolo/world/train_world.py +21 -1
  24. ultralytics/models/yolo/yoloe/train.py +1 -2
  25. ultralytics/nn/autobackend.py +1 -1
  26. ultralytics/nn/modules/head.py +13 -2
  27. ultralytics/nn/tasks.py +18 -0
  28. ultralytics/solutions/security_alarm.py +1 -1
  29. ultralytics/utils/benchmarks.py +3 -9
  30. ultralytics/utils/callbacks/wb.py +6 -1
  31. ultralytics/utils/loss.py +18 -9
  32. ultralytics/utils/patches.py +42 -0
  33. ultralytics/utils/tal.py +15 -5
  34. ultralytics/utils/torch_utils.py +1 -1
  35. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/entry_points.txt +0 -0
  36. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
  37. {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/top_level.txt +0 -0
@@ -36,6 +36,7 @@ import platform
36
36
  import re
37
37
  import shutil
38
38
  import time
39
+ from copy import deepcopy
39
40
  from pathlib import Path
40
41
 
41
42
  import numpy as np
@@ -101,7 +102,6 @@ def benchmark(
101
102
  device = select_device(device, verbose=False)
102
103
  if isinstance(model, (str, Path)):
103
104
  model = YOLO(model)
104
- is_end2end = getattr(model.model.model[-1], "end2end", False)
105
105
  data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect
106
106
  key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect
107
107
 
@@ -135,14 +135,12 @@ def benchmark(
135
135
  if format == "paddle":
136
136
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
137
137
  assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
138
- assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
139
138
  assert (LINUX and not IS_JETSON) or MACOS, "Windows and Jetson Paddle exports not supported yet"
140
139
  if format == "mnn":
141
140
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
142
141
  if format == "ncnn":
143
142
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
144
143
  if format == "imx":
145
- assert not is_end2end
146
144
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
147
145
  assert model.task in {"detect", "classify", "pose"}, (
148
146
  "IMX export is only supported for detection, classification and pose estimation tasks"
@@ -150,25 +148,21 @@ def benchmark(
150
148
  assert "C2f" in model.__str__(), "IMX only supported for YOLOv8n and YOLO11n"
151
149
  if format == "rknn":
152
150
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
153
- assert not is_end2end, "End-to-end models not supported by RKNN yet"
154
151
  assert LINUX, "RKNN only supported on Linux"
155
152
  assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices"
156
153
  if format == "executorch":
157
154
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 ExecuTorch exports not supported yet"
158
- assert not is_end2end, "End-to-end models not supported by ExecuTorch yet"
159
155
  if "cpu" in device.type:
160
156
  assert cpu, "inference not supported on CPU"
161
157
  if "cuda" in device.type:
162
158
  assert gpu, "inference not supported on GPU"
163
- if format == "ncnn":
164
- assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
165
159
 
166
160
  # Export
167
161
  if format == "-":
168
162
  filename = model.pt_path or model.ckpt_path or model.model_name
169
- exported_model = model # PyTorch format
163
+ exported_model = deepcopy(model) # PyTorch format
170
164
  else:
171
- filename = model.export(
165
+ filename = deepcopy(model).export(
172
166
  imgsz=imgsz, format=format, half=half, int8=int8, data=data, device=device, verbose=False, **kwargs
173
167
  )
174
168
  exported_model = YOLO(filename, task=model.task)
@@ -128,10 +128,15 @@ def _log_plots(plots, step):
128
128
  def on_pretrain_routine_start(trainer):
129
129
  """Initialize and start wandb project if module is present."""
130
130
  if not wb.run:
131
+ from datetime import datetime
132
+
133
+ name = str(trainer.args.name).replace("/", "-").replace(" ", "_")
131
134
  wb.init(
132
135
  project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
133
- name=str(trainer.args.name).replace("/", "-"),
136
+ name=name,
134
137
  config=vars(trainer.args),
138
+ id=f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", # add unique id
139
+ dir=str(trainer.save_dir),
135
140
  )
136
141
 
137
142
 
ultralytics/utils/loss.py CHANGED
@@ -512,9 +512,19 @@ class v8SegmentationLoss(v8DetectionLoss):
512
512
  )
513
513
  if pred_semseg is not None:
514
514
  sem_masks = batch["sem_masks"].to(self.device) # NxHxW
515
- mask_zero = sem_masks == 0 # NxHxW
516
515
  sem_masks = F.one_hot(sem_masks.long(), num_classes=self.nc).permute(0, 3, 1, 2).float() # NxCxHxW
517
- sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
516
+
517
+ if self.overlap:
518
+ mask_zero = masks == 0 # NxHxW
519
+ sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
520
+ else:
521
+ batch_idx = batch["batch_idx"].view(-1) # [total_instances]
522
+ for i in range(batch_size):
523
+ instance_mask_i = masks[batch_idx == i] # [num_instances_i, H, W]
524
+ if len(instance_mask_i) == 0:
525
+ continue
526
+ sem_masks[i, :, instance_mask_i.sum(dim=0) == 0] = 0
527
+
518
528
  loss[4] = self.bcedice_loss(pred_semseg, sem_masks)
519
529
  loss[4] *= self.hyp.box # seg gain
520
530
 
@@ -798,7 +808,7 @@ class PoseLoss26(v8PoseLoss):
798
808
  loss[0], loss[3], loss[4] = det_loss[0], det_loss[1], det_loss[2]
799
809
 
800
810
  batch_size = pred_kpts.shape[0]
801
- imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=pred_kpts.dtype) # image size (h,w)
811
+ imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=pred_kpts.dtype) * self.stride[0]
802
812
 
803
813
  pred_kpts = pred_kpts.view(batch_size, -1, *self.kpt_shape) # (b, h*w, 17, 3)
804
814
 
@@ -992,7 +1002,7 @@ class v8OBBLoss(v8DetectionLoss):
992
1002
  batch_size = pred_angle.shape[0] # batch size, number of masks, mask height, mask width
993
1003
 
994
1004
  dtype = pred_scores.dtype
995
- imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=dtype) # image size (h,w)
1005
+ imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]
996
1006
 
997
1007
  # targets
998
1008
  try:
@@ -1095,7 +1105,7 @@ class v8OBBLoss(v8DetectionLoss):
1095
1105
  pred_theta = pred_bboxes[..., 4]
1096
1106
  target_theta = target_bboxes[..., 4]
1097
1107
 
1098
- log_ar = torch.log(w_gt / h_gt)
1108
+ log_ar = torch.log((w_gt + 1e-9) / (h_gt + 1e-9))
1099
1109
  scale_weight = torch.exp(-(log_ar**2) / (lambda_val**2))
1100
1110
 
1101
1111
  delta_theta = pred_theta - target_theta
@@ -1164,9 +1174,9 @@ class E2ELoss:
1164
1174
  class TVPDetectLoss:
1165
1175
  """Criterion class for computing training losses for text-visual prompt detection."""
1166
1176
 
1167
- def __init__(self, model, tal_topk=10):
1177
+ def __init__(self, model, tal_topk=10, tal_topk2: int | None = None):
1168
1178
  """Initialize TVPDetectLoss with task-prompt and visual-prompt criteria using the provided model."""
1169
- self.vp_criterion = v8DetectionLoss(model, tal_topk)
1179
+ self.vp_criterion = v8DetectionLoss(model, tal_topk, tal_topk2)
1170
1180
  # NOTE: store following info as it's changeable in __call__
1171
1181
  self.hyp = self.vp_criterion.hyp
1172
1182
  self.ori_nc = self.vp_criterion.nc
@@ -1196,8 +1206,7 @@ class TVPDetectLoss:
1196
1206
 
1197
1207
  def _get_vp_features(self, preds: dict[str, torch.Tensor]) -> list[torch.Tensor]:
1198
1208
  """Extract visual-prompt features from the model output."""
1199
- # NOTE: remove empty placeholder
1200
- scores = preds["scores"][:, self.ori_nc :, :]
1209
+ scores = preds["scores"]
1201
1210
  vnc = scores.shape[1]
1202
1211
 
1203
1212
  self.vp_criterion.nc = vnc
@@ -40,9 +40,51 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
40
40
  return None
41
41
  else:
42
42
  im = cv2.imdecode(file_bytes, flags)
43
+ # Fallback for formats OpenCV imdecode may not support (AVIF, HEIC)
44
+ if im is None and filename.lower().endswith((".avif", ".heic")):
45
+ im = _imread_pil(filename, flags)
43
46
  return im[..., None] if im is not None and im.ndim == 2 else im # Always ensure 3 dimensions
44
47
 
45
48
 
49
+ _pil_plugins_registered = False
50
+
51
+
52
+ def _imread_pil(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
53
+ """Read an image using PIL as fallback for formats not supported by OpenCV.
54
+
55
+ Args:
56
+ filename (str): Path to the file to read.
57
+ flags (int, optional): OpenCV imread flags (used to determine grayscale conversion).
58
+
59
+ Returns:
60
+ (np.ndarray | None): The read image array in BGR format, or None if reading fails.
61
+ """
62
+ global _pil_plugins_registered
63
+ try:
64
+ from PIL import Image
65
+
66
+ # Register HEIF/AVIF plugins once
67
+ if not _pil_plugins_registered:
68
+ try:
69
+ import pillow_heif
70
+
71
+ pillow_heif.register_heif_opener()
72
+ except ImportError:
73
+ pass
74
+ try:
75
+ import pillow_avif # noqa: F401
76
+ except ImportError:
77
+ pass
78
+ _pil_plugins_registered = True
79
+
80
+ with Image.open(filename) as img:
81
+ if flags == cv2.IMREAD_GRAYSCALE:
82
+ return np.asarray(img.convert("L"))
83
+ return cv2.cvtColor(np.asarray(img.convert("RGB")), cv2.COLOR_RGB2BGR)
84
+ except Exception:
85
+ return None
86
+
87
+
46
88
  def imwrite(filename: str, img: np.ndarray, params: list[int] | None = None) -> bool:
47
89
  """Write an image to a file with multilanguage filename support.
48
90
 
ultralytics/utils/tal.py CHANGED
@@ -24,6 +24,7 @@ class TaskAlignedAssigner(nn.Module):
24
24
  alpha (float): The alpha parameter for the classification component of the task-aligned metric.
25
25
  beta (float): The beta parameter for the localization component of the task-aligned metric.
26
26
  stride (list): List of stride values for different feature levels.
27
+ stride_val (int): The stride value used for select_candidates_in_gts.
27
28
  eps (float): A small value to prevent division by zero.
28
29
  """
29
30
 
@@ -55,6 +56,7 @@ class TaskAlignedAssigner(nn.Module):
55
56
  self.alpha = alpha
56
57
  self.beta = beta
57
58
  self.stride = stride
59
+ self.stride_val = self.stride[1] if len(self.stride) > 1 else self.stride[0]
58
60
  self.eps = eps
59
61
 
60
62
  @torch.no_grad()
@@ -302,8 +304,11 @@ class TaskAlignedAssigner(nn.Module):
302
304
  """
303
305
  gt_bboxes_xywh = xyxy2xywh(gt_bboxes)
304
306
  wh_mask = gt_bboxes_xywh[..., 2:] < self.stride[0] # the smallest stride
305
- stride_val = torch.tensor(self.stride[1], dtype=gt_bboxes_xywh.dtype, device=gt_bboxes_xywh.device)
306
- gt_bboxes_xywh[..., 2:] = torch.where((wh_mask * mask_gt).bool(), stride_val, gt_bboxes_xywh[..., 2:])
307
+ gt_bboxes_xywh[..., 2:] = torch.where(
308
+ (wh_mask * mask_gt).bool(),
309
+ torch.tensor(self.stride_val, dtype=gt_bboxes_xywh.dtype, device=gt_bboxes_xywh.device),
310
+ gt_bboxes_xywh[..., 2:],
311
+ )
307
312
  gt_bboxes = xywh2xyxy(gt_bboxes_xywh)
308
313
 
309
314
  n_anchors = xy_centers.shape[0]
@@ -357,19 +362,24 @@ class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
357
362
  """Calculate IoU for rotated bounding boxes."""
358
363
  return probiou(gt_bboxes, pd_bboxes).squeeze(-1).clamp_(0)
359
364
 
360
- @staticmethod
361
- def select_candidates_in_gts(xy_centers, gt_bboxes, mask_gt):
365
+ def select_candidates_in_gts(self, xy_centers, gt_bboxes, mask_gt):
362
366
  """Select the positive anchor center in gt for rotated bounding boxes.
363
367
 
364
368
  Args:
365
369
  xy_centers (torch.Tensor): Anchor center coordinates with shape (h*w, 2).
366
370
  gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (b, n_boxes, 5).
367
371
  mask_gt (torch.Tensor): Mask for valid ground truth boxes with shape (b, n_boxes, 1).
368
- stride (list[int]): List of stride values for each feature map level.
369
372
 
370
373
  Returns:
371
374
  (torch.Tensor): Boolean mask of positive anchors with shape (b, n_boxes, h*w).
372
375
  """
376
+ wh_mask = gt_bboxes[..., 2:4] < self.stride[0]
377
+ gt_bboxes[..., 2:4] = torch.where(
378
+ (wh_mask * mask_gt).bool(),
379
+ torch.tensor(self.stride_val, dtype=gt_bboxes.dtype, device=gt_bboxes.device),
380
+ gt_bboxes[..., 2:4],
381
+ )
382
+
373
383
  # (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
374
384
  corners = xywhr2xyxyxyxy(gt_bboxes)
375
385
  # (b, n_boxes, 1, 2)
@@ -78,7 +78,7 @@ def smart_inference_mode():
78
78
  if TORCH_1_9 and torch.is_inference_mode_enabled():
79
79
  return fn # already in inference_mode, act as a pass-through
80
80
  else:
81
- return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn)
81
+ return (torch.inference_mode if TORCH_1_10 else torch.no_grad)()(fn)
82
82
 
83
83
  return decorate
84
84