PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.4.6py3-none-any.whl → 8.4.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/METADATA +3 -3
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/RECORD +37 -36
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/WHEEL +1 -1
tests/test_cli.py +10 -3
tests/test_exports.py +64 -43
tests/test_python.py +40 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +5 -4
ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
ultralytics/cfg/default.yaml +2 -1
ultralytics/data/augment.py +8 -0
ultralytics/data/converter.py +32 -9
ultralytics/data/utils.py +2 -2
ultralytics/engine/exporter.py +10 -6
ultralytics/engine/predictor.py +5 -0
ultralytics/engine/trainer.py +6 -4
ultralytics/engine/tuner.py +2 -2
ultralytics/engine/validator.py +5 -0
ultralytics/models/sam/predict.py +2 -2
ultralytics/models/yolo/classify/train.py +14 -1
ultralytics/models/yolo/detect/train.py +8 -4
ultralytics/models/yolo/pose/train.py +2 -1
ultralytics/models/yolo/world/train_world.py +21 -1
ultralytics/models/yolo/yoloe/train.py +1 -2
ultralytics/nn/autobackend.py +1 -1
ultralytics/nn/modules/head.py +13 -2
ultralytics/nn/tasks.py +18 -0
ultralytics/solutions/security_alarm.py +1 -1
ultralytics/utils/benchmarks.py +3 -9
ultralytics/utils/callbacks/wb.py +6 -1
ultralytics/utils/loss.py +18 -9
ultralytics/utils/patches.py +42 -0
ultralytics/utils/tal.py +15 -5
ultralytics/utils/torch_utils.py +1 -1
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/top_level.txt +0 -0

ultralytics/utils/benchmarks.py CHANGED Viewed

@@ -36,6 +36,7 @@ import platform
 import re
 import shutil
 import time
+from copy import deepcopy
 from pathlib import Path
 import numpy as np
@@ -101,7 +102,6 @@ def benchmark(
     device = select_device(device, verbose=False)
     if isinstance(model, (str, Path)):
         model = YOLO(model)
-    is_end2end = getattr(model.model.model[-1], "end2end", False)
     data = data or TASK2DATA[model.task]  # task to dataset, i.e. coco8.yaml for task=detect
     key = TASK2METRIC[model.task]  # task to metric, i.e. metrics/mAP50-95(B) for task=detect
@@ -135,14 +135,12 @@ def benchmark(
             if format == "paddle":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
                 assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
-                assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
                 assert (LINUX and not IS_JETSON) or MACOS, "Windows and Jetson Paddle exports not supported yet"
             if format == "mnn":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
             if format == "ncnn":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
             if format == "imx":
-                assert not is_end2end
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
                 assert model.task in {"detect", "classify", "pose"}, (
                     "IMX export is only supported for detection, classification and pose estimation tasks"
@@ -150,25 +148,21 @@ def benchmark(
                 assert "C2f" in model.__str__(), "IMX only supported for YOLOv8n and YOLO11n"
             if format == "rknn":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
-                assert not is_end2end, "End-to-end models not supported by RKNN yet"
                 assert LINUX, "RKNN only supported on Linux"
                 assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices"
             if format == "executorch":
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 ExecuTorch exports not supported yet"
-                assert not is_end2end, "End-to-end models not supported by ExecuTorch yet"
             if "cpu" in device.type:
                 assert cpu, "inference not supported on CPU"
             if "cuda" in device.type:
                 assert gpu, "inference not supported on GPU"
-            if format == "ncnn":
-                assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
             # Export
             if format == "-":
                 filename = model.pt_path or model.ckpt_path or model.model_name
-                exported_model = model  # PyTorch format
+                exported_model = deepcopy(model)  # PyTorch format
             else:
-                filename = model.export(
+                filename = deepcopy(model).export(
                     imgsz=imgsz, format=format, half=half, int8=int8, data=data, device=device, verbose=False, **kwargs
                 )
                 exported_model = YOLO(filename, task=model.task)

ultralytics/utils/callbacks/wb.py CHANGED Viewed

@@ -128,10 +128,15 @@ def _log_plots(plots, step):
 def on_pretrain_routine_start(trainer):
     """Initialize and start wandb project if module is present."""
     if not wb.run:
+        from datetime import datetime
+        name = str(trainer.args.name).replace("/", "-").replace(" ", "_")
         wb.init(
             project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
-            name=str(trainer.args.name).replace("/", "-"),
+            name=name,
             config=vars(trainer.args),
+            id=f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",  # add unique id
+            dir=str(trainer.save_dir),
         )

ultralytics/utils/loss.py CHANGED Viewed

@@ -512,9 +512,19 @@ class v8SegmentationLoss(v8DetectionLoss):
             )
             if pred_semseg is not None:
                 sem_masks = batch["sem_masks"].to(self.device)  # NxHxW
-                mask_zero = sem_masks == 0  # NxHxW
                 sem_masks = F.one_hot(sem_masks.long(), num_classes=self.nc).permute(0, 3, 1, 2).float()  # NxCxHxW
-                sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
+                if self.overlap:
+                    mask_zero = masks == 0  # NxHxW
+                    sem_masks[mask_zero.unsqueeze(1).expand_as(sem_masks)] = 0
+                else:
+                    batch_idx = batch["batch_idx"].view(-1)  # [total_instances]
+                    for i in range(batch_size):
+                        instance_mask_i = masks[batch_idx == i]  # [num_instances_i, H, W]
+                        if len(instance_mask_i) == 0:
+                            continue
+                        sem_masks[i, :, instance_mask_i.sum(dim=0) == 0] = 0
                 loss[4] = self.bcedice_loss(pred_semseg, sem_masks)
                 loss[4] *= self.hyp.box  # seg gain
@@ -798,7 +808,7 @@ class PoseLoss26(v8PoseLoss):
         loss[0], loss[3], loss[4] = det_loss[0], det_loss[1], det_loss[2]
         batch_size = pred_kpts.shape[0]
-        imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=pred_kpts.dtype)  # image size (h,w)
+        imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=pred_kpts.dtype) * self.stride[0]
         pred_kpts = pred_kpts.view(batch_size, -1, *self.kpt_shape)  # (b, h*w, 17, 3)
@@ -992,7 +1002,7 @@ class v8OBBLoss(v8DetectionLoss):
         batch_size = pred_angle.shape[0]  # batch size, number of masks, mask height, mask width
         dtype = pred_scores.dtype
-        imgsz = torch.tensor(batch["resized_shape"][0], device=self.device, dtype=dtype)  # image size (h,w)
+        imgsz = torch.tensor(preds["feats"][0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]
         # targets
         try:
@@ -1095,7 +1105,7 @@ class v8OBBLoss(v8DetectionLoss):
         pred_theta = pred_bboxes[..., 4]
         target_theta = target_bboxes[..., 4]
-        log_ar = torch.log(w_gt / h_gt)
+        log_ar = torch.log((w_gt + 1e-9) / (h_gt + 1e-9))
         scale_weight = torch.exp(-(log_ar**2) / (lambda_val**2))
         delta_theta = pred_theta - target_theta
@@ -1164,9 +1174,9 @@ class E2ELoss:
 class TVPDetectLoss:
     """Criterion class for computing training losses for text-visual prompt detection."""
-    def __init__(self, model, tal_topk=10):
+    def __init__(self, model, tal_topk=10, tal_topk2: int | None = None):
         """Initialize TVPDetectLoss with task-prompt and visual-prompt criteria using the provided model."""
-        self.vp_criterion = v8DetectionLoss(model, tal_topk)
+        self.vp_criterion = v8DetectionLoss(model, tal_topk, tal_topk2)
         # NOTE: store following info as it's changeable in __call__
         self.hyp = self.vp_criterion.hyp
         self.ori_nc = self.vp_criterion.nc
@@ -1196,8 +1206,7 @@ class TVPDetectLoss:
     def _get_vp_features(self, preds: dict[str, torch.Tensor]) -> list[torch.Tensor]:
         """Extract visual-prompt features from the model output."""
-        # NOTE: remove empty placeholder
-        scores = preds["scores"][:, self.ori_nc :, :]
+        scores = preds["scores"]
         vnc = scores.shape[1]
         self.vp_criterion.nc = vnc

ultralytics/utils/patches.py CHANGED Viewed

@@ -40,9 +40,51 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
         return None
     else:
         im = cv2.imdecode(file_bytes, flags)
+        # Fallback for formats OpenCV imdecode may not support (AVIF, HEIC)
+        if im is None and filename.lower().endswith((".avif", ".heic")):
+            im = _imread_pil(filename, flags)
         return im[..., None] if im is not None and im.ndim == 2 else im  # Always ensure 3 dimensions
+_pil_plugins_registered = False
+def _imread_pil(filename: str, flags: int = cv2.IMREAD_COLOR) -> np.ndarray | None:
+    """Read an image using PIL as fallback for formats not supported by OpenCV.
+    Args:
+        filename (str): Path to the file to read.
+        flags (int, optional): OpenCV imread flags (used to determine grayscale conversion).
+    Returns:
+        (np.ndarray | None): The read image array in BGR format, or None if reading fails.
+    """
+    global _pil_plugins_registered
+    try:
+        from PIL import Image
+        # Register HEIF/AVIF plugins once
+        if not _pil_plugins_registered:
+            try:
+                import pillow_heif
+                pillow_heif.register_heif_opener()
+            except ImportError:
+                pass
+            try:
+                import pillow_avif  # noqa: F401
+            except ImportError:
+                pass
+            _pil_plugins_registered = True
+        with Image.open(filename) as img:
+            if flags == cv2.IMREAD_GRAYSCALE:
+                return np.asarray(img.convert("L"))
+            return cv2.cvtColor(np.asarray(img.convert("RGB")), cv2.COLOR_RGB2BGR)
+    except Exception:
+        return None
 def imwrite(filename: str, img: np.ndarray, params: list[int] | None = None) -> bool:
     """Write an image to a file with multilanguage filename support.

ultralytics/utils/tal.py CHANGED Viewed

@@ -24,6 +24,7 @@ class TaskAlignedAssigner(nn.Module):
         alpha (float): The alpha parameter for the classification component of the task-aligned metric.
         beta (float): The beta parameter for the localization component of the task-aligned metric.
         stride (list): List of stride values for different feature levels.
+        stride_val (int): The stride value used for select_candidates_in_gts.
         eps (float): A small value to prevent division by zero.
     """
@@ -55,6 +56,7 @@ class TaskAlignedAssigner(nn.Module):
         self.alpha = alpha
         self.beta = beta
         self.stride = stride
+        self.stride_val = self.stride[1] if len(self.stride) > 1 else self.stride[0]
         self.eps = eps
     @torch.no_grad()
@@ -302,8 +304,11 @@ class TaskAlignedAssigner(nn.Module):
         """
         gt_bboxes_xywh = xyxy2xywh(gt_bboxes)
         wh_mask = gt_bboxes_xywh[..., 2:] < self.stride[0]  # the smallest stride
-        stride_val = torch.tensor(self.stride[1], dtype=gt_bboxes_xywh.dtype, device=gt_bboxes_xywh.device)
-        gt_bboxes_xywh[..., 2:] = torch.where((wh_mask * mask_gt).bool(), stride_val, gt_bboxes_xywh[..., 2:])
+        gt_bboxes_xywh[..., 2:] = torch.where(
+            (wh_mask * mask_gt).bool(),
+            torch.tensor(self.stride_val, dtype=gt_bboxes_xywh.dtype, device=gt_bboxes_xywh.device),
+            gt_bboxes_xywh[..., 2:],
+        )
         gt_bboxes = xywh2xyxy(gt_bboxes_xywh)
         n_anchors = xy_centers.shape[0]
@@ -357,19 +362,24 @@ class RotatedTaskAlignedAssigner(TaskAlignedAssigner):
         """Calculate IoU for rotated bounding boxes."""
         return probiou(gt_bboxes, pd_bboxes).squeeze(-1).clamp_(0)
-    @staticmethod
-    def select_candidates_in_gts(xy_centers, gt_bboxes, mask_gt):
+    def select_candidates_in_gts(self, xy_centers, gt_bboxes, mask_gt):
         """Select the positive anchor center in gt for rotated bounding boxes.
         Args:
             xy_centers (torch.Tensor): Anchor center coordinates with shape (h*w, 2).
             gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (b, n_boxes, 5).
             mask_gt (torch.Tensor): Mask for valid ground truth boxes with shape (b, n_boxes, 1).
-            stride (list[int]): List of stride values for each feature map level.
         Returns:
             (torch.Tensor): Boolean mask of positive anchors with shape (b, n_boxes, h*w).
         """
+        wh_mask = gt_bboxes[..., 2:4] < self.stride[0]
+        gt_bboxes[..., 2:4] = torch.where(
+            (wh_mask * mask_gt).bool(),
+            torch.tensor(self.stride_val, dtype=gt_bboxes.dtype, device=gt_bboxes.device),
+            gt_bboxes[..., 2:4],
+        )
         # (b, n_boxes, 5) --> (b, n_boxes, 4, 2)
         corners = xywhr2xyxyxyxy(gt_bboxes)
         # (b, n_boxes, 1, 2)

ultralytics/utils/torch_utils.py CHANGED Viewed

@@ -78,7 +78,7 @@ def smart_inference_mode():
         if TORCH_1_9 and torch.is_inference_mode_enabled():
             return fn  # already in inference_mode, act as a pass-through
         else:
-            return (torch.inference_mode if TORCH_1_9 else torch.no_grad)()(fn)
+            return (torch.inference_mode if TORCH_1_10 else torch.no_grad)()(fn)
     return decorate

{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

dgenerate-ultralytics-headless 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl

dgenerate-ultralytics-headless 8.4.6py3-none-any.whl → 8.4.8py3-none-any.whl