PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.195__py3-none-any.whl → 8.3.196__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.195py3-none-any.whl → 8.3.196py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/METADATA +1 -1
{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/RECORD +35 -35
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +1 -0
ultralytics/cfg/default.yaml +1 -0
ultralytics/data/augment.py +1 -1
ultralytics/data/build.py +5 -1
ultralytics/engine/exporter.py +19 -31
ultralytics/engine/predictor.py +3 -1
ultralytics/engine/trainer.py +15 -4
ultralytics/engine/validator.py +6 -2
ultralytics/models/yolo/classify/train.py +1 -11
ultralytics/models/yolo/detect/train.py +32 -6
ultralytics/models/yolo/detect/val.py +6 -5
ultralytics/models/yolo/obb/train.py +0 -9
ultralytics/models/yolo/pose/train.py +1 -9
ultralytics/models/yolo/pose/val.py +1 -1
ultralytics/models/yolo/segment/train.py +1 -9
ultralytics/models/yolo/segment/val.py +1 -1
ultralytics/models/yolo/world/train.py +4 -4
ultralytics/models/yolo/world/train_world.py +2 -2
ultralytics/models/yolo/yoloe/train.py +3 -12
ultralytics/models/yolo/yoloe/val.py +0 -7
ultralytics/nn/modules/head.py +2 -1
ultralytics/nn/tasks.py +4 -2
ultralytics/utils/__init__.py +30 -19
ultralytics/utils/callbacks/tensorboard.py +2 -2
ultralytics/utils/checks.py +2 -0
ultralytics/utils/loss.py +14 -8
ultralytics/utils/plotting.py +1 -0
ultralytics/utils/torch_utils.py +111 -9
{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt +0 -0

ultralytics/models/yolo/obb/train.py CHANGED Viewed

@@ -37,21 +37,12 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
         """
         Initialize an OBBTrainer object for training Oriented Bounding Box (OBB) models.
-        This trainer extends the DetectionTrainer class to specialize in training models that detect oriented
-        bounding boxes. It automatically sets the task to 'obb' in the configuration.
         Args:
             cfg (dict, optional): Configuration dictionary for the trainer. Contains training parameters and
                 model configuration.
             overrides (dict, optional): Dictionary of parameter overrides for the configuration. Any values here
                 will take precedence over those in cfg.
             _callbacks (list[Any], optional): List of callback functions to be invoked during training.
-        Examples:
-            >>> from ultralytics.models.yolo.obb import OBBTrainer
-            >>> args = dict(model="yolo11n-obb.pt", data="dota8.yaml", epochs=3)
-            >>> trainer = OBBTrainer(overrides=args)
-            >>> trainer.train()
         """
         if overrides is None:
             overrides = {}

ultralytics/models/yolo/pose/train.py CHANGED Viewed

@@ -44,9 +44,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
         """
         Initialize a PoseTrainer object for training YOLO pose estimation models.
-        This initializes a trainer specialized for pose estimation tasks, setting the task to 'pose' and
-        handling specific configurations needed for keypoint detection models.
         Args:
             cfg (dict, optional): Default configuration dictionary containing training parameters.
             overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
@@ -55,17 +52,12 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
         Notes:
             This trainer will automatically set the task to 'pose' regardless of what is provided in overrides.
             A warning is issued when using Apple MPS device due to known bugs with pose models.
-        Examples:
-            >>> from ultralytics.models.yolo.pose import PoseTrainer
-            >>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml", epochs=3)
-            >>> trainer = PoseTrainer(overrides=args)
-            >>> trainer.train()
         """
         if overrides is None:
             overrides = {}
         overrides["task"] = "pose"
         super().__init__(cfg, overrides, _callbacks)
+        self.dynamic_tensors = ["batch_idx", "cls", "bboxes", "keypoints"]
         if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
             LOGGER.warning(

ultralytics/models/yolo/pose/val.py CHANGED Viewed

@@ -86,7 +86,7 @@ class PoseValidator(DetectionValidator):
     def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
         """Preprocess batch by converting keypoints data to float and moving it to the device."""
         batch = super().preprocess(batch)
-        batch["keypoints"] = batch["keypoints"].to(self.device, non_blocking=True).float()
+        batch["keypoints"] = batch["keypoints"].float()
         return batch
     def get_desc(self) -> str:

ultralytics/models/yolo/segment/train.py CHANGED Viewed

@@ -32,24 +32,16 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
         """
         Initialize a SegmentationTrainer object.
-        This initializes a trainer for segmentation tasks, extending the detection trainer with segmentation-specific
-        functionality. It sets the task to 'segment' and prepares the trainer for training segmentation models.
         Args:
             cfg (dict): Configuration dictionary with default training settings.
             overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
             _callbacks (list, optional): List of callback functions to be executed during training.
-        Examples:
-            >>> from ultralytics.models.yolo.segment import SegmentationTrainer
-            >>> args = dict(model="yolo11n-seg.pt", data="coco8-seg.yaml", epochs=3)
-            >>> trainer = SegmentationTrainer(overrides=args)
-            >>> trainer.train()
         """
         if overrides is None:
             overrides = {}
         overrides["task"] = "segment"
         super().__init__(cfg, overrides, _callbacks)
+        self.dynamic_tensors = ["batch_idx", "cls", "bboxes", "masks"]
     def get_model(self, cfg: dict | str | None = None, weights: str | Path | None = None, verbose: bool = True):
         """

ultralytics/models/yolo/segment/val.py CHANGED Viewed

@@ -63,7 +63,7 @@ class SegmentationValidator(DetectionValidator):
             (dict[str, Any]): Preprocessed batch.
         """
         batch = super().preprocess(batch)
-        batch["masks"] = batch["masks"].to(self.device, non_blocking=True).float()
+        batch["masks"] = batch["masks"].float()
         return batch
     def init_metrics(self, model: torch.nn.Module) -> None:

ultralytics/models/yolo/world/train.py CHANGED Viewed

@@ -12,7 +12,7 @@ from ultralytics.data import build_yolo_dataset
 from ultralytics.models.yolo.detect import DetectionTrainer
 from ultralytics.nn.tasks import WorldModel
 from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
-from ultralytics.utils.torch_utils import de_parallel
+from ultralytics.utils.torch_utils import unwrap_model
 def on_pretrain_routine_end(trainer) -> None:
@@ -20,7 +20,7 @@ def on_pretrain_routine_end(trainer) -> None:
     if RANK in {-1, 0}:
         # Set class names for evaluation
         names = [name.split("/", 1)[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
-        de_parallel(trainer.ema.ema).set_classes(names, cache_clip_model=False)
+        unwrap_model(trainer.ema.ema).set_classes(names, cache_clip_model=False)
 class WorldTrainer(DetectionTrainer):
@@ -105,7 +105,7 @@ class WorldTrainer(DetectionTrainer):
         Returns:
             (Any): YOLO dataset configured for training or validation.
         """
-        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
+        gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
         dataset = build_yolo_dataset(
             self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
         )
@@ -160,7 +160,7 @@ class WorldTrainer(DetectionTrainer):
                 return txt_map
         LOGGER.info(f"Caching text embeddings to '{cache_path}'")
         assert self.model is not None
-        txt_feats = de_parallel(self.model).get_text_pe(texts, batch, cache_clip_model=False)
+        txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)
         return txt_map

ultralytics/models/yolo/world/train_world.py CHANGED Viewed

@@ -6,7 +6,7 @@ from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_data
 from ultralytics.data.utils import check_det_dataset
 from ultralytics.models.yolo.world import WorldTrainer
 from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
-from ultralytics.utils.torch_utils import de_parallel
+from ultralytics.utils.torch_utils import unwrap_model
 class WorldTrainerFromScratch(WorldTrainer):
@@ -101,7 +101,7 @@ class WorldTrainerFromScratch(WorldTrainer):
         Returns:
             (YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
         """
-        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
+        gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
         if mode != "train":
             return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=False, stride=gs)
         datasets = [

ultralytics/models/yolo/yoloe/train.py CHANGED Viewed

@@ -13,7 +13,7 @@ from ultralytics.data.augment import LoadVisualPrompt
 from ultralytics.models.yolo.detect import DetectionTrainer, DetectionValidator
 from ultralytics.nn.tasks import YOLOEModel
 from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
-from ultralytics.utils.torch_utils import de_parallel
+from ultralytics.utils.torch_utils import unwrap_model
 from ..world.train_world import WorldTrainerFromScratch
 from .val import YOLOEDetectValidator
@@ -39,9 +39,6 @@ class YOLOETrainer(DetectionTrainer):
         """
         Initialize the YOLOE Trainer with specified configurations.
-        This method sets up the YOLOE trainer with the provided configuration and overrides, initializing
-        the training environment, model, and callbacks for YOLOE object detection training.
         Args:
             cfg (dict): Configuration dictionary with default training settings from DEFAULT_CFG.
             overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
@@ -102,7 +99,7 @@ class YOLOETrainer(DetectionTrainer):
         Returns:
             (Dataset): YOLO dataset configured for training or validation.
         """
-        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
+        gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
         return build_yolo_dataset(
             self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
         )
@@ -223,7 +220,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
                 return txt_map
         LOGGER.info(f"Caching text embeddings to '{cache_path}'")
         assert self.model is not None
-        txt_feats = de_parallel(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
+        txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)
         return txt_map
@@ -313,9 +310,3 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
                 d.transforms.append(LoadVisualPrompt())
         else:
             self.train_loader.dataset.transforms.append(LoadVisualPrompt())
-    def preprocess_batch(self, batch):
-        """Preprocess a batch of images for YOLOE training, moving visual prompts to the appropriate device."""
-        batch = super().preprocess_batch(batch)
-        batch["visuals"] = batch["visuals"].to(self.device, non_blocking=True)
-        return batch

ultralytics/models/yolo/yoloe/val.py CHANGED Viewed

@@ -98,13 +98,6 @@ class YOLOEDetectValidator(DetectionValidator):
         visual_pe[cls_visual_num == 0] = 0
         return visual_pe.unsqueeze(0)
-    def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
-        """Preprocess batch data, ensuring visuals are on the same device as images."""
-        batch = super().preprocess(batch)
-        if "visuals" in batch:
-            batch["visuals"] = batch["visuals"].to(batch["img"].device, non_blocking=True)
-        return batch
     def get_vpe_dataloader(self, data: dict[str, Any]) -> torch.utils.data.DataLoader:
         """
         Create a dataloader for LVIS training visual prompt samples.

ultralytics/nn/modules/head.py CHANGED Viewed

@@ -13,7 +13,7 @@ from torch.nn.init import constant_, xavier_uniform_
 from ultralytics.utils import NOT_MACOS14
 from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
-from ultralytics.utils.torch_utils import fuse_conv_and_bn, smart_inference_mode
+from ultralytics.utils.torch_utils import disable_dynamo, fuse_conv_and_bn, smart_inference_mode
 from .block import DFL, SAVPE, BNContrastiveHead, ContrastiveHead, Proto, Residual, SwiGLUFFN
 from .conv import Conv, DWConv
@@ -149,6 +149,7 @@ class Detect(nn.Module):
         y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
         return y if self.export else (y, {"one2many": x, "one2one": one2one})
+    @disable_dynamo
     def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
         """
         Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.

ultralytics/nn/tasks.py CHANGED Viewed

@@ -334,7 +334,8 @@ class BaseModel(torch.nn.Module):
         if getattr(self, "criterion", None) is None:
             self.criterion = self.init_criterion()
-        preds = self.forward(batch["img"]) if preds is None else preds
+        if preds is None:
+            preds = self.forward(batch["img"])
         return self.criterion(preds, batch)
     def init_criterion(self):
@@ -775,7 +776,8 @@ class RTDETRDetectionModel(DetectionModel):
             "gt_groups": gt_groups,
         }
-        preds = self.predict(img, batch=targets) if preds is None else preds
+        if preds is None:
+            preds = self.predict(img, batch=targets)
         dec_bboxes, dec_scores, enc_bboxes, enc_scores, dn_meta = preds if self.training else preds[1]
         if dn_meta is None:
             dn_bboxes, dn_scores = None, None

ultralytics/utils/__init__.py CHANGED Viewed

@@ -857,7 +857,7 @@ def get_ubuntu_version():
 def get_user_config_dir(sub_dir="Ultralytics"):
     """
-    Return the appropriate config directory based on the environment operating system.
+    Return a writable config dir, preferring YOLO_CONFIG_DIR and being OS-aware.
     Args:
         sub_dir (str): The name of the subdirectory to create.
@@ -865,27 +865,38 @@ def get_user_config_dir(sub_dir="Ultralytics"):
     Returns:
         (Path): The path to the user config directory.
     """
-    if WINDOWS:
-        path = Path.home() / "AppData" / "Roaming" / sub_dir
-    elif MACOS:  # macOS
-        path = Path.home() / "Library" / "Application Support" / sub_dir
+    if env_dir := os.getenv("YOLO_CONFIG_DIR"):
+        p = Path(env_dir).expanduser() / sub_dir
     elif LINUX:
-        path = Path.home() / ".config" / sub_dir
+        p = Path(os.getenv("XDG_CONFIG_HOME", Path.home() / ".config")) / sub_dir
+    elif WINDOWS:
+        p = Path.home() / "AppData" / "Roaming" / sub_dir
+    elif MACOS:
+        p = Path.home() / "Library" / "Application Support" / sub_dir
     else:
         raise ValueError(f"Unsupported operating system: {platform.system()}")
-    # GCP and AWS lambda fix, only /tmp is writeable
-    if not is_dir_writeable(path.parent):
-        LOGGER.warning(
-            f"user config directory '{path}' is not writeable, defaulting to '/tmp' or CWD. "
-            "Alternatively you can define a YOLO_CONFIG_DIR environment variable for this path."
-        )
-        path = Path("/tmp") / sub_dir if is_dir_writeable("/tmp") else Path().cwd() / sub_dir
-    # Create the subdirectory if it does not exist
-    path.mkdir(parents=True, exist_ok=True)
+    if p.exists():  # already created → trust it
+        return p
+    if is_dir_writeable(p.parent):  # create if possible
+        p.mkdir(parents=True, exist_ok=True)
+        return p
+    # Fallbacks for Docker, GCP/AWS functions where only /tmp is writeable
+    for alt in [Path("/tmp") / sub_dir, Path.cwd() / sub_dir]:
+        if alt.exists():
+            return alt
+        if is_dir_writeable(alt.parent):
+            alt.mkdir(parents=True, exist_ok=True)
+            LOGGER.warning(
+                f"user config directory '{p}' is not writeable, using '{alt}'. Set YOLO_CONFIG_DIR to override."
+            )
+            return alt
-    return path
+    # Last fallback → CWD
+    p = Path.cwd() / sub_dir
+    p.mkdir(parents=True, exist_ok=True)
+    return p
 # Define constants (required below)
@@ -899,7 +910,7 @@ IS_JUPYTER = is_jupyter()
 IS_PIP_PACKAGE = is_pip_package()
 IS_RASPBERRYPI = is_raspberrypi()
 GIT = GitRepo()
-USER_CONFIG_DIR = Path(os.getenv("YOLO_CONFIG_DIR") or get_user_config_dir())  # Ultralytics settings dir
+USER_CONFIG_DIR = get_user_config_dir()  # Ultralytics settings dir
 SETTINGS_FILE = USER_CONFIG_DIR / "settings.json"
@@ -1383,7 +1394,7 @@ class SettingsManager(JSONDict):
 def deprecation_warn(arg, new_arg=None):
     """Issue a deprecation warning when a deprecated argument is used, suggesting an updated argument."""
-    msg = f"'{arg}' is deprecated and will be removed in in the future."
+    msg = f"'{arg}' is deprecated and will be removed in the future."
     if new_arg is not None:
         msg += f" Use '{new_arg}' instead."
     LOGGER.warning(msg)

ultralytics/utils/callbacks/tensorboard.py CHANGED Viewed

@@ -70,14 +70,14 @@ def _log_tensorboard_graph(trainer) -> None:
         # Try simple method first (YOLO)
         try:
             trainer.model.eval()  # place in .eval() mode to avoid BatchNorm statistics changes
-            WRITER.add_graph(torch.jit.trace(torch_utils.de_parallel(trainer.model), im, strict=False), [])
+            WRITER.add_graph(torch.jit.trace(torch_utils.unwrap_model(trainer.model), im, strict=False), [])
             LOGGER.info(f"{PREFIX}model graph visualization added ✅")
             return
         except Exception:
             # Fallback to TorchScript export steps (RTDETR)
             try:
-                model = deepcopy(torch_utils.de_parallel(trainer.model))
+                model = deepcopy(torch_utils.unwrap_model(trainer.model))
                 model.eval()
                 model = model.fuse(verbose=False)
                 for m in model.modules():

ultralytics/utils/checks.py CHANGED Viewed

@@ -452,6 +452,8 @@ def check_torchvision():
     to the compatibility table based on: https://github.com/pytorch/vision#installation.
     """
     compatibility_table = {
+        "2.9": ["0.24"],
+        "2.8": ["0.23"],
         "2.7": ["0.22"],
         "2.6": ["0.21"],
         "2.5": ["0.20"],

ultralytics/utils/loss.py CHANGED Viewed

@@ -11,7 +11,7 @@ import torch.nn.functional as F
 from ultralytics.utils.metrics import OKS_SIGMA
 from ultralytics.utils.ops import crop_mask, xywh2xyxy, xyxy2xywh
 from ultralytics.utils.tal import RotatedTaskAlignedAssigner, TaskAlignedAssigner, dist2bbox, dist2rbox, make_anchors
-from ultralytics.utils.torch_utils import autocast
+from ultralytics.utils.torch_utils import autocast, disable_dynamo
 from .metrics import bbox_iou, probiou
 from .tal import bbox2dist
@@ -215,6 +215,7 @@ class v8DetectionLoss:
         self.assigner = TaskAlignedAssigner(topk=tal_topk, num_classes=self.nc, alpha=0.5, beta=6.0)
         self.bbox_loss = BboxLoss(m.reg_max).to(device)
         self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
+        disable_dynamo(self.__class__)  # exclude from compile
     def preprocess(self, targets: torch.Tensor, batch_size: int, scale_tensor: torch.Tensor) -> torch.Tensor:
         """Preprocess targets by converting to tensor format and scaling coordinates."""
@@ -260,7 +261,7 @@ class v8DetectionLoss:
         # Targets
         targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
-        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
+        targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
         gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
         mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)
@@ -287,9 +288,14 @@ class v8DetectionLoss:
         # Bbox loss
         if fg_mask.sum():
-            target_bboxes /= stride_tensor
             loss[0], loss[2] = self.bbox_loss(
-                pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
+                pred_distri,
+                pred_bboxes,
+                anchor_points,
+                target_bboxes / stride_tensor,
+                target_scores,
+                target_scores_sum,
+                fg_mask,
             )
         loss[0] *= self.hyp.box  # box gain
@@ -329,7 +335,7 @@ class v8SegmentationLoss(v8DetectionLoss):
         try:
             batch_idx = batch["batch_idx"].view(-1, 1)
             targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
-            targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
+            targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
             gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
             mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)
         except RuntimeError as e:
@@ -388,7 +394,7 @@ class v8SegmentationLoss(v8DetectionLoss):
         loss[2] *= self.hyp.cls  # cls gain
         loss[3] *= self.hyp.dfl  # dfl gain
-        return loss * batch_size, loss.detach()  # loss(box, cls, dfl)
+        return loss * batch_size, loss.detach()  # loss(box, seg, cls, dfl)
     @staticmethod
     def single_mask_loss(
@@ -516,7 +522,7 @@ class v8PoseLoss(v8DetectionLoss):
         batch_size = pred_scores.shape[0]
         batch_idx = batch["batch_idx"].view(-1, 1)
         targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"]), 1)
-        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
+        targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
         gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
         mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)
@@ -704,7 +710,7 @@ class v8OBBLoss(v8DetectionLoss):
             targets = torch.cat((batch_idx, batch["cls"].view(-1, 1), batch["bboxes"].view(-1, 5)), 1)
             rw, rh = targets[:, 4] * imgsz[0].item(), targets[:, 5] * imgsz[1].item()
             targets = targets[(rw >= 2) & (rh >= 2)]  # filter rboxes of tiny size to stabilize training
-            targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
+            targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
             gt_labels, gt_bboxes = targets.split((1, 5), 2)  # cls, xywhr
             mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)
         except RuntimeError as e:

ultralytics/utils/plotting.py CHANGED Viewed

@@ -1004,6 +1004,7 @@ def plot_tune_results(csv_file: str = "tune_results.csv"):
     _save_one_file(csv_file.with_name("tune_fitness.png"))
+@plt_settings()
 def feature_visualization(x, module_type: str, stage: int, n: int = 32, save_dir: Path = Path("runs/detect/exp")):
     """
     Visualize feature maps of a given model module during inference.

ultralytics/utils/torch_utils.py CHANGED Viewed

@@ -429,7 +429,7 @@ def get_flops(model, imgsz=640):
         return 0.0  # if not installed return 0.0 GFLOPs
     try:
-        model = de_parallel(model)
+        model = unwrap_model(model)
         p = next(model.parameters())
         if not isinstance(imgsz, list):
             imgsz = [imgsz, imgsz]  # expand if int/float
@@ -460,7 +460,7 @@ def get_flops_with_torch_profiler(model, imgsz=640):
     """
     if not TORCH_2_0:  # torch profiler implemented in torch>=2.0
         return 0.0
-    model = de_parallel(model)
+    model = unwrap_model(model)
     p = next(model.parameters())
     if not isinstance(imgsz, list):
         imgsz = [imgsz, imgsz]  # expand if int/float
@@ -577,17 +577,24 @@ def is_parallel(model):
     return isinstance(model, (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel))
-def de_parallel(model):
+def unwrap_model(m: nn.Module) -> nn.Module:
     """
-    De-parallelize a model: return single-GPU model if model is of type DP or DDP.
+    Unwrap compiled and parallel models to get the base model.
     Args:
-        model (nn.Module): Model to de-parallelize.
+        m (nn.Module): A model that may be wrapped by torch.compile (._orig_mod) or parallel wrappers such as
+            DataParallel/DistributedDataParallel (.module).
     Returns:
-        (nn.Module): De-parallelized model.
+        m (nn.Module): The unwrapped base model without compile or parallel wrappers.
     """
-    return model.module if is_parallel(model) else model
+    while True:
+        if hasattr(m, "_orig_mod") and isinstance(m._orig_mod, nn.Module):
+            m = m._orig_mod
+        elif hasattr(m, "module") and isinstance(m.module, nn.Module):
+            m = m.module
+        else:
+            return m
 def one_cycle(y1=0.0, y2=1.0, steps=100):
@@ -669,7 +676,7 @@ class ModelEMA:
             tau (int, optional): EMA decay time constant.
             updates (int, optional): Initial number of updates.
         """
-        self.ema = deepcopy(de_parallel(model)).eval()  # FP32 EMA
+        self.ema = deepcopy(unwrap_model(model)).eval()  # FP32 EMA
         self.updates = updates  # number of EMA updates
         self.decay = lambda x: decay * (1 - math.exp(-x / tau))  # decay exponential ramp (to help early epochs)
         for p in self.ema.parameters():
@@ -687,7 +694,7 @@ class ModelEMA:
             self.updates += 1
             d = self.decay(self.updates)
-            msd = de_parallel(model).state_dict()  # model state_dict
+            msd = unwrap_model(model).state_dict()  # model state_dict
             for k, v in self.ema.state_dict().items():
                 if v.dtype.is_floating_point:  # true for FP16 and FP32
                     v *= d
@@ -997,3 +1004,98 @@ class FXModel(nn.Module):
             x = m(x)  # run
             y.append(x)  # save output
         return x
+def disable_dynamo(func: Any) -> Any:
+    """
+    Disable torch.compile/dynamo for a callable when available.
+    Args:
+        func (Any): Callable object to wrap. Could be a function, method, or class.
+    Returns:
+        func (Any): Same callable, wrapped by torch._dynamo.disable when available, otherwise unchanged.
+    Examples:
+        >>> @disable_dynamo
+        ... def fn(x):
+        ...     return x + 1
+        >>> # Works even if torch._dynamo is not available
+        >>> _ = fn(1)
+    """
+    if hasattr(torch, "_dynamo"):
+        return torch._dynamo.disable(func)
+    return func
+def attempt_compile(
+    model: torch.nn.Module,
+    device: torch.device,
+    imgsz: int = 640,
+    use_autocast: bool = False,
+    warmup: bool = False,
+    prefix: str = colorstr("compile:"),
+) -> torch.nn.Module:
+    """
+    Compile a model with torch.compile and optionally warm up the graph to reduce first-iteration latency.
+    This utility attempts to compile the provided model using the inductor backend with dynamic shapes enabled and an
+    autotuning mode. If compilation is unavailable or fails, the original model is returned unchanged. An optional
+    warmup performs a single forward pass on a dummy input to prime the compiled graph and measure compile/warmup time.
+    Args:
+        model (torch.nn.Module): Model to compile.
+        device (torch.device): Inference device used for warmup and autocast decisions.
+        imgsz (int, optional): Square input size to create a dummy tensor with shape (1, 3, imgsz, imgsz) for warmup.
+        use_autocast (bool, optional): Whether to run warmup under autocast on CUDA or MPS devices.
+        warmup (bool, optional): Whether to execute a single dummy forward pass to warm up the compiled model.
+        prefix (str, optional): Message prefix for logger output.
+    Returns:
+        model (torch.nn.Module): Compiled model if compilation succeeds, otherwise the original unmodified model.
+    Notes:
+        - If the current PyTorch build does not provide torch.compile, the function returns the input model immediately.
+        - Warmup runs under torch.inference_mode and may use torch.autocast for CUDA/MPS to align compute precision.
+        - CUDA devices are synchronized after warmup to account for asynchronous kernel execution.
+    Examples:
+        >>> device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        >>> # Try to compile and warm up a model with a 640x640 input
+        >>> model = attempt_compile(model, device=device, imgsz=640, use_autocast=True, warmup=True)
+    """
+    if not hasattr(torch, "compile"):
+        return model
+    LOGGER.info(f"{prefix} starting torch.compile...")
+    t0 = time.perf_counter()
+    try:
+        model = torch.compile(model, mode="max-autotune", backend="inductor")
+    except Exception as e:
+        LOGGER.warning(f"{prefix} torch.compile failed, continuing uncompiled: {e}")
+        return model
+    t_compile = time.perf_counter() - t0
+    t_warm = 0.0
+    if warmup:
+        # Use a single dummy tensor to build the graph shape state and reduce first-iteration latency
+        dummy = torch.zeros(1, 3, imgsz, imgsz, device=device)
+        if use_autocast and device.type == "cuda":
+            dummy = dummy.half()
+        t1 = time.perf_counter()
+        with torch.inference_mode():
+            if use_autocast and device.type in {"cuda", "mps"}:
+                with torch.autocast(device.type):
+                    _ = model(dummy)
+            else:
+                _ = model(dummy)
+        if device.type == "cuda":
+            torch.cuda.synchronize(device)
+        t_warm = time.perf_counter() - t1
+    total = t_compile + t_warm
+    if warmup:
+        LOGGER.info(f"{prefix} complete in {total:.1f}s (compile {t_compile:.1f}s + warmup {t_warm:.1f}s)")
+    else:
+        LOGGER.info(f"{prefix} compile complete in {t_compile:.1f}s (no warmup)")
+    return model

{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dgenerate_ultralytics_headless-8.3.195.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt RENAMED Viewed

File without changes

dgenerate-ultralytics-headless 8.3.195__py3-none-any.whl → 8.3.196__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.195py3-none-any.whl → 8.3.196py3-none-any.whl