dgenerate-ultralytics-headless 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/METADATA +3 -3
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/RECORD +37 -36
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/WHEEL +1 -1
- tests/test_cli.py +10 -3
- tests/test_exports.py +64 -43
- tests/test_python.py +40 -11
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +5 -4
- ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
- ultralytics/cfg/default.yaml +2 -1
- ultralytics/data/augment.py +8 -0
- ultralytics/data/converter.py +32 -9
- ultralytics/data/utils.py +2 -2
- ultralytics/engine/exporter.py +10 -6
- ultralytics/engine/predictor.py +5 -0
- ultralytics/engine/trainer.py +6 -4
- ultralytics/engine/tuner.py +2 -2
- ultralytics/engine/validator.py +5 -0
- ultralytics/models/sam/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +14 -1
- ultralytics/models/yolo/detect/train.py +8 -4
- ultralytics/models/yolo/pose/train.py +2 -1
- ultralytics/models/yolo/world/train_world.py +21 -1
- ultralytics/models/yolo/yoloe/train.py +1 -2
- ultralytics/nn/autobackend.py +1 -1
- ultralytics/nn/modules/head.py +13 -2
- ultralytics/nn/tasks.py +18 -0
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/utils/benchmarks.py +3 -9
- ultralytics/utils/callbacks/wb.py +6 -1
- ultralytics/utils/loss.py +18 -9
- ultralytics/utils/patches.py +42 -0
- ultralytics/utils/tal.py +15 -5
- ultralytics/utils/torch_utils.py +1 -1
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/top_level.txt +0 -0
ultralytics/utils/benchmarks.py
CHANGED
|
@@ -36,6 +36,7 @@ import platform
|
|
|
36
36
|
import re
|
|
37
37
|
import shutil
|
|
38
38
|
import time
|
|
39
|
+
from copy import deepcopy
|
|
39
40
|
from pathlib import Path
|
|
40
41
|
|
|
41
42
|
import numpy as np
|
|
@@ -101,7 +102,6 @@ def benchmark(
|
|
|
101
102
|
device = select_device(device, verbose=False)
|
|
102
103
|
if isinstance(model, (str, Path)):
|
|
103
104
|
model = YOLO(model)
|
|
104
|
-
is_end2end = getattr(model.model.model[-1], "end2end", False)
|
|
105
105
|
data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect
|
|
106
106
|
key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect
|
|
107
107
|
|
|
@@ -135,14 +135,12 @@ def benchmark(
|
|
|
135
135
|
if format == "paddle":
|
|
136
136
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
|
|
137
137
|
assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
|
|
138
|
-
assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
|
|
139
138
|
assert (LINUX and not IS_JETSON) or MACOS, "Windows and Jetson Paddle exports not supported yet"
|
|
140
139
|
if format == "mnn":
|
|
141
140
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
|
|
142
141
|
if format == "ncnn":
|
|
143
142
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
|
|
144
143
|
if format == "imx":
|
|
145
|
-
assert not is_end2end
|
|
146
144
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
|
|
147
145
|
assert model.task in {"detect", "classify", "pose"}, (
|
|
148
146
|
"IMX export is only supported for detection, classification and pose estimation tasks"
|
|
@@ -150,25 +148,21 @@ def benchmark(
|
|
|
150
148
|
assert "C2f" in model.__str__(), "IMX only supported for YOLOv8n and YOLO11n"
|
|
151
149
|
if format == "rknn":
|
|
152
150
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
|
|
153
|
-
assert not is_end2end, "End-to-end models not supported by RKNN yet"
|
|
154
151
|
assert LINUX, "RKNN only supported on Linux"
|
|
155
152
|
assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices"
|
|
156
153
|
if format == "executorch":
|
|
157
154
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 ExecuTorch exports not supported yet"
|
|
158
|
-
assert not is_end2end, "End-to-end models not supported by ExecuTorch yet"
|
|
159
155
|
if "cpu" in device.type:
|
|
160
156
|
assert cpu, "inference not supported on CPU"
|
|
161
157
|
if "cuda" in device.type:
|
|
162
158
|
assert gpu, "inference not supported on GPU"
|
|
163
|
-
if format == "ncnn":
|
|
164
|
-
assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
|
|
165
159
|
|
|
166
160
|
# Export
|
|
167
161
|
if format == "-":
|
|
168
162
|
filename = model.pt_path or model.ckpt_path or model.model_name
|
|
169
|
-
exported_model = model # PyTorch format
|
|
163
|
+
exported_model = deepcopy(model) # PyTorch format
|
|
170
164
|
else:
|
|
171
|
-
filename = model.export(
|
|
165
|
+
filename = deepcopy(model).export(
|
|
172
166
|
imgsz=imgsz, format=format, half=half, int8=int8, data=data, device=device, verbose=False, **kwargs
|
|
173
167
|
)
|
|
174
168
|
exported_model = YOLO(filename, task=model.task)
|
|
@@ -128,10 +128,15 @@ def _log_plots(plots, step):
|
|
|
128
128
|
def on_pretrain_routine_start(trainer):
|
|
129
129
|
"""Initialize and start wandb project if module is present."""
|
|
130
130
|
if not wb.run:
|
|
131
|
+
from datetime import datetime
|
|
132
|
+
|
|
133
|
+
name = str(trainer.args.name).replace("/", "-").replace(" ", "_")
|
|
131
134
|
wb.init(
|
|
132
135
|
project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
|
|
133
|
-
name=
|
|
136
|
+
name=name,
|
|
134
137
|
config=vars(trainer.args),
|
|
138
|
+
id=f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", # add unique id
|
|
139
|
+
dir=str(trainer.save_dir),
|
|
135
140
|
)
|
|
136
141
|
|
|
137
142
|
|
ultralytics/utils/loss.py
CHANGED
|
@@ -512,9 +512,19 @@ class v8SegmentationLoss(v8DetectionLoss):
|
|
|
512
512
|
)
|
|
513
513
|
if pred_semseg is not None:
|
|
514
514
|
sem_masks = batch["sem_masks"].to(self.device) # NxHxW
|
|
515
|
-
mask_zero = sem_masks == 0 # NxHxW
|
|
516
515
|
sem_masks = F.one_hot(sem_masks.long(), num_classes=self.nc).permute(0, 3, 1, 2).float() # NxCxHxW
|
|
517
|
-
|
|
516
|
+
|
|
517
|
+
if self.overlap:
|
|
518
|
+
mask_zero = masks == 0 # NxHxW
|
|
519
|
+
sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
|
|
520
|
+
else:
|
|
521
|
+
batch_idx = batch["batch_idx"].view(-1) # [total_instances]
|
|
522
|
+
for i in range(batch_size):
|
|
523
|
+
instance_mask_i = masks[batch_idx == i] # [num_instances_i, H, W]
|
|
524
|
+
if len(instance_mask_i) == 0:
|
|
525
|
+
continue
|
|
526
|
+
sem_masks[i, :, instance_mask_i.sum(dim=0) == 0] = 0
|
|
527
|
+
|
|
518
528
|
loss[4] = self.bcedice_loss(pred_semseg, sem_masks)
|
|
519
529
|
loss[4] *= self.hyp.box # seg gain
|
|
520
530
|
|
|
@@ -798,7 +808,7 @@ class PoseLoss26(v8PoseLoss):
|
|
|
798
808
|
loss[0], loss[3], loss[4] = det_loss[0], det_loss[1], det_loss[2]
|
|
799
809
|
|
|
800
810
|
batch_size = pred_kpts.shape[0]
|
|
801
|
-
imgsz = torch.tensor(
|
|
811
|
+
imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=pred_kpts.dtype) * self.stride[0]
|
|
802
812
|
|
|
803
813
|
pred_kpts = pred_kpts.view(batch_size, -1, *self.kpt_shape) # (b, h*w, 17, 3)
|
|
804
814
|
|
|
@@ -992,7 +1002,7 @@ class v8OBBLoss(v8DetectionLoss):
|
|
|
992
1002
|
batch_size = pred_angle.shape[0] # batch size, number of masks, mask height, mask width
|
|
993
1003
|
|
|
994
1004
|
dtype = pred_scores.dtype
|
|
995
|
-
imgsz = torch.tensor(
|
|
1005
|
+
imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]
|
|
996
1006
|
|
|
997
1007
|
# targets
|
|
998
1008
|
try:
|
|
@@ -1095,7 +1105,7 @@ class v8OBBLoss(v8DetectionLoss):
|
|
|
1095
1105
|
pred_theta = pred_bboxes[..., 4]
|
|
1096
1106
|
target_theta = target_bboxes[..., 4]
|
|
1097
1107
|
|
|
1098
|
-
log_ar = torch.log(w_gt / h_gt)
|
|
1108
|
+
log_ar = torch.log((w_gt + 1e-9) / (h_gt + 1e-9))
|
|
1099
1109
|
scale_weight = torch.exp(-(log_ar**2) / (lambda_val**2))
|
|
1100
1110
|
|
|
1101
1111
|
delta_theta = pred_theta - target_theta
|
|
@@ -1164,9 +1174,9 @@ class E2ELoss:
|
|
|
1164
1174
|
class TVPDetectLoss:
|
|
1165
1175
|
"""Criterion class for computing training losses for text-visual prompt detection."""
|
|
1166
1176
|
|
|
1167
|
-
def __init__(self, model, tal_topk=10):
|
|
1177
|
+
def __init__(self, model, tal_topk=10, tal_topk2: int | None = None):
|
|
1168
1178
|
"""Initialize TVPDetectLoss with task-prompt and visual-prompt criteria using the provided model."""
|
|
1169
|
-
self.vp_criterion = v8DetectionLoss(model, tal_topk)
|
|
1179
|
+
self.vp_criterion = v8DetectionLoss(model, tal_topk, tal_topk2)
|
|
1170
1180
|
# NOTE: store following info as it's changeable in __call__
|
|
1171
1181
|
self.hyp = self.vp_criterion.hyp
|
|
1172
1182
|
self.ori_nc = self.vp_criterion.nc
|
|
@@ -1196,8 +1206,7 @@ class TVPDetectLoss:
|
|
|
1196
1206
|
|
|
1197
1207
|
def _get_vp_features(self, preds: dict[str, torch.Tensor]) -> list[torch.Tensor]:
|
|
1198
1208
|
"""Extract visual-prompt features from the model output."""
|
|
1199
|
-
|
|
1200
|
-
scores = preds["scores"][:, self.ori_nc :, :]
|
|
1209
|
+
scores = preds["scores"]
|
|
1201
1210
|
vnc = scores.shape[1]
|
|
1202
1211
|
|
|
1203
1212
|
self.vp_criterion.nc = vnc
|
ultralytics/utils/patches.py
CHANGED
|
@@ -40,9 +40,51 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
|
|
|
40
40
|
return None
|
|
41
41
|
else:
|
|
42
42
|
im = cv2.imdecode(file_bytes, flags)
|
|
43
|
+
# Fallback for formats OpenCV imdecode may not support (AVIF, HEIC)
|
|
44
|
+
if im is None and filename.lower().endswith((".avif", ".heic")):
|
|
45
|
+
im = _imread_pil(filename, flags)
|
|
43
46
|
return im[..., None] if im is not None and im.ndim == 2 else im # Always ensure 3 dimensions
|
|
44
47
|
|
|
45
48
|
|
|
49
|
+
_pil_plugins_registered = False
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _imread_pil(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
|
|
53
|
+
"""Read an image using PIL as fallback for formats not supported by OpenCV.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
filename (str): Path to the file to read.
|
|
57
|
+
flags (int, optional): OpenCV imread flags (used to determine grayscale conversion).
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
(np.ndarray | None): The read image array in BGR format, or None if reading fails.
|
|
61
|
+
"""
|
|
62
|
+
global _pil_plugins_registered
|
|
63
|
+
try:
|
|
64
|
+
from PIL import Image
|
|
65
|
+
|
|
66
|
+
# Register HEIF/AVIF plugins once
|
|
67
|
+
if not _pil_plugins_registered:
|
|
68
|
+
try:
|
|
69
|
+
import pillow_heif
|
|
70
|
+
|
|
71
|
+
pillow_heif.register_heif_opener()
|
|
72
|
+
except ImportError:
|
|
73
|
+
pass
|
|
74
|
+
try:
|
|
75
|
+
import pillow_avif # noqa: F401
|
|
76
|
+
except ImportError:
|
|
77
|
+
pass
|
|
78
|
+
_pil_plugins_registered = True
|
|
79
|
+
|
|
80
|
+
with Image.open(filename) as img:
|
|
81
|
+
if flags == cv2.IMREAD_GRAYSCALE:
|
|
82
|
+
return np.asarray(img.convert("L"))
|
|
83
|
+
return cv2.cvtColor(np.asarray(img.convert("RGB")), cv2.COLOR_RGB2BGR)
|
|
84
|
+
except Exception:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
46
88
|
def imwrite(filename: str, img: np.ndarray, params: list[int] | None = None) -> bool:
|
|
47
89
|
"""Write an image to a file with multilanguage filename support.
|
|
48
90
|
|
ultralytics/utils/tal.py
CHANGED
|
@@ -24,6 +24,7 @@ class TaskAlignedAssigner(nn.Module):
|
|
|
24
24
|
alpha (float): The alpha parameter for the classification component of the task-aligned metric.
|
|
25
25
|
beta (float): The beta parameter for the localization component of the task-aligned metric.
|
|
26
26
|
stride (list): List of stride values for different feature levels.
|
|
27
|
+
stride_val (int): The stride value used for select_candidates_in_gts.
|
|
27
28
|
eps (float): A small value to prevent division by zero.
|
|
28
29
|
"""
|
|
29
30
|
|
|
@@ -55,6 +56,7 @@ class TaskAlignedAssigner(nn.Module):
|
|
|
55
56
|
self.alpha = alpha
|
|
56
57
|
self.beta = beta
|
|
57
58
|
self.stride = stride
|
|
59
|
+
self.stride_val = self.stride[1] if len(self.stride) > 1 else self.stride[0]
|
|
58
60
|
self.eps = eps
|
|
59
61
|
|
|
60
62
|
@torch.no_grad()
|
|
@@ -302,8 +304,11 @@ class TaskAlignedAssigner(nn.Module):
|
|
|
302
304
|
"""
|
|
303
305
|
gt_bboxes_xywh = xyxy2xywh(gt_bboxes)
|
|
304
306
|
wh_mask = gt_bboxes_xywh[..., 2:] < self.stride[0] # the smallest stride
|
|
305
|
-
|
|
306
|
-
|
|
307
|
+
gt_bboxes_xywh[..., 2:] = torch.where(
|
|
308
|
+
(wh_mask * mask_gt).bool(),
|
|
309
|
+
torch.tensor(self.stride_val, dtype=gt_bboxes_xywh.dtype, device=gt_bboxes_xywh.device),
|
|
310
|
+
gt_bboxes_xywh[..., 2:],
|
|
311
|
+
)
|
|
307
312
|
gt_bboxes = xywh2xyxy(gt_bboxes_xywh)
|
|
308
313
|
|
|
309
314
|
n_anchors = xy_centers.shape[0]
|
|
@@ -357,19 +362,24 @@ class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
|
|
|
357
362
|
"""Calculate IoU for rotated bounding boxes."""
|
|
358
363
|
return probiou(gt_bboxes, pd_bboxes).squeeze(-1).clamp_(0)
|
|
359
364
|
|
|
360
|
-
|
|
361
|
-
def select_candidates_in_gts(xy_centers, gt_bboxes, mask_gt):
|
|
365
|
+
def select_candidates_in_gts(self, xy_centers, gt_bboxes, mask_gt):
|
|
362
366
|
"""Select the positive anchor center in gt for rotated bounding boxes.
|
|
363
367
|
|
|
364
368
|
Args:
|
|
365
369
|
xy_centers (torch.Tensor): Anchor center coordinates with shape (h*w, 2).
|
|
366
370
|
gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (b, n_boxes, 5).
|
|
367
371
|
mask_gt (torch.Tensor): Mask for valid ground truth boxes with shape (b, n_boxes, 1).
|
|
368
|
-
stride (list[int]): List of stride values for each feature map level.
|
|
369
372
|
|
|
370
373
|
Returns:
|
|
371
374
|
(torch.Tensor): Boolean mask of positive anchors with shape (b, n_boxes, h*w).
|
|
372
375
|
"""
|
|
376
|
+
wh_mask = gt_bboxes[..., 2:4] < self.stride[0]
|
|
377
|
+
gt_bboxes[..., 2:4] = torch.where(
|
|
378
|
+
(wh_mask * mask_gt).bool(),
|
|
379
|
+
torch.tensor(self.stride_val, dtype=gt_bboxes.dtype, device=gt_bboxes.device),
|
|
380
|
+
gt_bboxes[..., 2:4],
|
|
381
|
+
)
|
|
382
|
+
|
|
373
383
|
# (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
|
|
374
384
|
corners = xywhr2xyxyxyxy(gt_bboxes)
|
|
375
385
|
# (b, n_boxes, 1, 2)
|
ultralytics/utils/torch_utils.py
CHANGED
|
@@ -78,7 +78,7 @@ def smart_inference_mode():
|
|
|
78
78
|
if TORCH_1_9 and torch.is_inference_mode_enabled():
|
|
79
79
|
return fn # already in inference_mode, act as a pass-through
|
|
80
80
|
else:
|
|
81
|
-
return (torch.inference_mode if
|
|
81
|
+
return (torch.inference_mode if TORCH_1_10 else torch.no_grad)()(fn)
|
|
82
82
|
|
|
83
83
|
return decorate
|
|
84
84
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|