PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.214__py3-none-any.whl → 8.3.248__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.214py3-none-any.whl → 8.3.248py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (236) hide show

{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +13 -14
dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
tests/__init__.py +5 -7
tests/conftest.py +8 -15
tests/test_cli.py +1 -1
tests/test_cuda.py +5 -8
tests/test_engine.py +1 -1
tests/test_exports.py +57 -12
tests/test_integrations.py +4 -4
tests/test_python.py +84 -53
tests/test_solutions.py +160 -151
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +56 -62
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +15 -16
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco-pose.yaml +21 -0
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
ultralytics/cfg/datasets/dog-pose.yaml +28 -0
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
ultralytics/cfg/datasets/kitti.yaml +27 -0
ultralytics/cfg/datasets/lvis.yaml +5 -5
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/default.yaml +1 -1
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
ultralytics/cfg/models/v6/yolov6.yaml +1 -1
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +3 -4
ultralytics/data/augment.py +285 -475
ultralytics/data/base.py +18 -26
ultralytics/data/build.py +147 -25
ultralytics/data/converter.py +36 -46
ultralytics/data/dataset.py +46 -74
ultralytics/data/loaders.py +42 -49
ultralytics/data/split.py +5 -6
ultralytics/data/split_dota.py +8 -15
ultralytics/data/utils.py +34 -43
ultralytics/engine/exporter.py +319 -237
ultralytics/engine/model.py +148 -188
ultralytics/engine/predictor.py +29 -38
ultralytics/engine/results.py +177 -311
ultralytics/engine/trainer.py +83 -59
ultralytics/engine/tuner.py +23 -34
ultralytics/engine/validator.py +39 -22
ultralytics/hub/__init__.py +16 -19
ultralytics/hub/auth.py +6 -12
ultralytics/hub/google/__init__.py +7 -10
ultralytics/hub/session.py +15 -25
ultralytics/hub/utils.py +5 -8
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +8 -10
ultralytics/models/fastsam/predict.py +17 -29
ultralytics/models/fastsam/utils.py +1 -2
ultralytics/models/fastsam/val.py +5 -7
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +5 -8
ultralytics/models/nas/predict.py +7 -9
ultralytics/models/nas/val.py +1 -2
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +5 -8
ultralytics/models/rtdetr/predict.py +15 -19
ultralytics/models/rtdetr/train.py +10 -13
ultralytics/models/rtdetr/val.py +21 -23
ultralytics/models/sam/__init__.py +15 -2
ultralytics/models/sam/amg.py +14 -20
ultralytics/models/sam/build.py +26 -19
ultralytics/models/sam/build_sam3.py +377 -0
ultralytics/models/sam/model.py +29 -32
ultralytics/models/sam/modules/blocks.py +83 -144
ultralytics/models/sam/modules/decoders.py +19 -37
ultralytics/models/sam/modules/encoders.py +44 -101
ultralytics/models/sam/modules/memory_attention.py +16 -30
ultralytics/models/sam/modules/sam.py +200 -73
ultralytics/models/sam/modules/tiny_encoder.py +64 -83
ultralytics/models/sam/modules/transformer.py +18 -28
ultralytics/models/sam/modules/utils.py +174 -50
ultralytics/models/sam/predict.py +2248 -350
ultralytics/models/sam/sam3/__init__.py +3 -0
ultralytics/models/sam/sam3/decoder.py +546 -0
ultralytics/models/sam/sam3/encoder.py +529 -0
ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
ultralytics/models/sam/sam3/model_misc.py +199 -0
ultralytics/models/sam/sam3/necks.py +129 -0
ultralytics/models/sam/sam3/sam3_image.py +339 -0
ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
ultralytics/models/sam/sam3/vitdet.py +547 -0
ultralytics/models/sam/sam3/vl_combiner.py +160 -0
ultralytics/models/utils/loss.py +14 -26
ultralytics/models/utils/ops.py +13 -17
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +9 -12
ultralytics/models/yolo/classify/train.py +11 -32
ultralytics/models/yolo/classify/val.py +29 -28
ultralytics/models/yolo/detect/predict.py +7 -10
ultralytics/models/yolo/detect/train.py +11 -20
ultralytics/models/yolo/detect/val.py +70 -58
ultralytics/models/yolo/model.py +36 -53
ultralytics/models/yolo/obb/predict.py +5 -14
ultralytics/models/yolo/obb/train.py +11 -14
ultralytics/models/yolo/obb/val.py +39 -36
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +6 -21
ultralytics/models/yolo/pose/train.py +10 -15
ultralytics/models/yolo/pose/val.py +38 -57
ultralytics/models/yolo/segment/predict.py +14 -18
ultralytics/models/yolo/segment/train.py +3 -6
ultralytics/models/yolo/segment/val.py +93 -45
ultralytics/models/yolo/world/train.py +8 -14
ultralytics/models/yolo/world/train_world.py +11 -34
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +16 -23
ultralytics/models/yolo/yoloe/train.py +30 -43
ultralytics/models/yolo/yoloe/train_seg.py +5 -10
ultralytics/models/yolo/yoloe/val.py +15 -20
ultralytics/nn/__init__.py +7 -7
ultralytics/nn/autobackend.py +145 -77
ultralytics/nn/modules/__init__.py +60 -60
ultralytics/nn/modules/activation.py +4 -6
ultralytics/nn/modules/block.py +132 -216
ultralytics/nn/modules/conv.py +52 -97
ultralytics/nn/modules/head.py +50 -103
ultralytics/nn/modules/transformer.py +76 -88
ultralytics/nn/modules/utils.py +16 -21
ultralytics/nn/tasks.py +94 -154
ultralytics/nn/text_model.py +40 -67
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +11 -17
ultralytics/solutions/analytics.py +15 -16
ultralytics/solutions/config.py +5 -6
ultralytics/solutions/distance_calculation.py +10 -13
ultralytics/solutions/heatmap.py +7 -13
ultralytics/solutions/instance_segmentation.py +5 -8
ultralytics/solutions/object_blurrer.py +7 -10
ultralytics/solutions/object_counter.py +12 -19
ultralytics/solutions/object_cropper.py +8 -14
ultralytics/solutions/parking_management.py +33 -31
ultralytics/solutions/queue_management.py +10 -12
ultralytics/solutions/region_counter.py +9 -12
ultralytics/solutions/security_alarm.py +15 -20
ultralytics/solutions/similarity_search.py +10 -15
ultralytics/solutions/solutions.py +75 -74
ultralytics/solutions/speed_estimation.py +7 -10
ultralytics/solutions/streamlit_inference.py +2 -4
ultralytics/solutions/templates/similarity-search.html +7 -18
ultralytics/solutions/trackzone.py +7 -10
ultralytics/solutions/vision_eye.py +5 -8
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +3 -5
ultralytics/trackers/bot_sort.py +10 -27
ultralytics/trackers/byte_tracker.py +14 -30
ultralytics/trackers/track.py +3 -6
ultralytics/trackers/utils/gmc.py +11 -22
ultralytics/trackers/utils/kalman_filter.py +37 -48
ultralytics/trackers/utils/matching.py +12 -15
ultralytics/utils/__init__.py +116 -116
ultralytics/utils/autobatch.py +2 -4
ultralytics/utils/autodevice.py +17 -18
ultralytics/utils/benchmarks.py +32 -46
ultralytics/utils/callbacks/base.py +8 -10
ultralytics/utils/callbacks/clearml.py +5 -13
ultralytics/utils/callbacks/comet.py +32 -46
ultralytics/utils/callbacks/dvc.py +13 -18
ultralytics/utils/callbacks/mlflow.py +4 -5
ultralytics/utils/callbacks/neptune.py +7 -15
ultralytics/utils/callbacks/platform.py +314 -38
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +23 -31
ultralytics/utils/callbacks/wb.py +10 -13
ultralytics/utils/checks.py +99 -76
ultralytics/utils/cpu.py +3 -8
ultralytics/utils/dist.py +8 -12
ultralytics/utils/downloads.py +20 -30
ultralytics/utils/errors.py +6 -14
ultralytics/utils/events.py +2 -4
ultralytics/utils/export/__init__.py +4 -236
ultralytics/utils/export/engine.py +237 -0
ultralytics/utils/export/imx.py +91 -55
ultralytics/utils/export/tensorflow.py +231 -0
ultralytics/utils/files.py +24 -28
ultralytics/utils/git.py +9 -11
ultralytics/utils/instance.py +30 -51
ultralytics/utils/logger.py +212 -114
ultralytics/utils/loss.py +14 -22
ultralytics/utils/metrics.py +126 -155
ultralytics/utils/nms.py +13 -16
ultralytics/utils/ops.py +107 -165
ultralytics/utils/patches.py +33 -21
ultralytics/utils/plotting.py +72 -80
ultralytics/utils/tal.py +25 -39
ultralytics/utils/torch_utils.py +52 -78
ultralytics/utils/tqdm.py +20 -20
ultralytics/utils/triton.py +13 -19
ultralytics/utils/tuner.py +17 -5
dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0

ultralytics/engine/trainer.py CHANGED Viewed

@@ -6,6 +6,8 @@ Usage:
     $ yolo mode=train model=yolo11n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16
 """
+from __future__ import annotations
 import gc
 import math
 import os
@@ -61,8 +63,7 @@ from ultralytics.utils.torch_utils import (
 class BaseTrainer:
-    """
-    A base class for creating trainers.
+    """A base class for creating trainers.
     This class provides the foundation for training YOLO models, handling the training loop, validation, checkpointing,
     and various training utilities. It supports both single-GPU and multi-GPU distributed training.
@@ -112,8 +113,7 @@ class BaseTrainer:
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """
-        Initialize the BaseTrainer class.
+        """Initialize the BaseTrainer class.
         Args:
             cfg (str, optional): Path to a configuration file.
@@ -138,7 +138,12 @@ class BaseTrainer:
         if RANK in {-1, 0}:
             self.wdir.mkdir(parents=True, exist_ok=True)  # make dir
             self.args.save_dir = str(self.save_dir)
-            YAML.save(self.save_dir / "args.yaml", vars(self.args))  # save run args
+            # Save run args, serializing augmentations as reprs for resume compatibility
+            args_dict = vars(self.args).copy()
+            if args_dict.get("augmentations") is not None:
+                # Serialize Albumentations transforms as their repr strings for checkpoint compatibility
+                args_dict["augmentations"] = [repr(t) for t in args_dict["augmentations"]]
+            YAML.save(self.save_dir / "args.yaml", args_dict)  # save run args
         self.last, self.best = self.wdir / "last.pt", self.wdir / "best.pt"  # checkpoint paths
         self.save_period = self.args.save_period
@@ -318,18 +323,18 @@ class BaseTrainer:
         self.train_loader = self.get_dataloader(
             self.data["train"], batch_size=batch_size, rank=LOCAL_RANK, mode="train"
         )
+        # Note: When training DOTA dataset, double batch size could get OOM on images with >2000 objects.
+        self.test_loader = self.get_dataloader(
+            self.data.get("val") or self.data.get("test"),
+            batch_size=batch_size if self.args.task == "obb" else batch_size * 2,
+            rank=LOCAL_RANK,
+            mode="val",
+        )
+        self.validator = self.get_validator()
+        self.ema = ModelEMA(self.model)
         if RANK in {-1, 0}:
-            # Note: When training DOTA dataset, double batch size could get OOM on images with >2000 objects.
-            self.test_loader = self.get_dataloader(
-                self.data.get("val") or self.data.get("test"),
-                batch_size=batch_size if self.args.task == "obb" else batch_size * 2,
-                rank=-1,
-                mode="val",
-            )
-            self.validator = self.get_validator()
             metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val")
             self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
-            self.ema = ModelEMA(self.model)
             if self.args.plots:
                 self.plot_training_labels()
@@ -464,13 +469,13 @@ class BaseTrainer:
             self.run_callbacks("on_train_epoch_end")
             if RANK in {-1, 0}:
-                final_epoch = epoch + 1 >= self.epochs
                 self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
-                # Validation
-                if self.args.val or final_epoch or self.stopper.possible_stop or self.stop:
-                    self._clear_memory(threshold=0.5)  # prevent VRAM spike
-                    self.metrics, self.fitness = self.validate()
+            # Validation
+            final_epoch = epoch + 1 >= self.epochs
+            if self.args.val or final_epoch or self.stopper.possible_stop or self.stop:
+                self._clear_memory(threshold=0.5)  # prevent VRAM spike
+                self.metrics, self.fitness = self.validate()
             # NaN recovery
             if self._handle_nan_recovery(epoch):
@@ -510,11 +515,11 @@ class BaseTrainer:
                 break  # must break all DDP ranks
             epoch += 1
+        seconds = time.time() - self.train_time_start
+        LOGGER.info(f"\n{epoch - self.start_epoch + 1} epochs completed in {seconds / 3600:.3f} hours.")
+        # Do final val with best.pt
+        self.final_eval()
         if RANK in {-1, 0}:
-            # Do final val with best.pt
-            seconds = time.time() - self.train_time_start
-            LOGGER.info(f"\n{epoch - self.start_epoch + 1} epochs completed in {seconds / 3600:.3f} hours.")
-            self.final_eval()
             if self.args.plots:
                 self.plot_metrics()
             self.run_callbacks("on_train_end")
@@ -545,7 +550,7 @@ class BaseTrainer:
                 total = torch.cuda.get_device_properties(self.device).total_memory
         return ((memory / total) if total > 0 else 0) if fraction else (memory / 2**30)
-    def _clear_memory(self, threshold: float = None):
+    def _clear_memory(self, threshold: float | None = None):
         """Clear accelerator memory by calling garbage collector and emptying cache."""
         if threshold:
             assert 0 <= threshold <= 1, "Threshold must be between 0 and 1."
@@ -618,8 +623,7 @@ class BaseTrainer:
             (self.wdir / f"epoch{self.epoch}.pt").write_bytes(serialized_ckpt)  # save epoch, i.e. 'epoch3.pt'
     def get_dataset(self):
-        """
-        Get train and validation datasets from data dictionary.
+        """Get train and validation datasets from data dictionary.
         Returns:
             (dict): A dictionary containing the training/validation/test dataset and category names.
@@ -627,7 +631,7 @@ class BaseTrainer:
         try:
             if self.args.task == "classify":
                 data = check_cls_dataset(self.args.data)
-            elif self.args.data.rsplit(".", 1)[-1] == "ndjson":
+            elif str(self.args.data).rsplit(".", 1)[-1] == "ndjson":
                 # Convert NDJSON to YOLO format
                 import asyncio
@@ -636,7 +640,7 @@ class BaseTrainer:
                 yaml_path = asyncio.run(convert_ndjson_to_yolo(self.args.data))
                 self.args.data = str(yaml_path)
                 data = check_det_dataset(self.args.data)
-            elif self.args.data.rsplit(".", 1)[-1] in {"yaml", "yml"} or self.args.task in {
+            elif str(self.args.data).rsplit(".", 1)[-1] in {"yaml", "yml"} or self.args.task in {
                 "detect",
                 "segment",
                 "pose",
@@ -654,8 +658,7 @@ class BaseTrainer:
         return data
     def setup_model(self):
-        """
-        Load, create, or download model for any task.
+        """Load, create, or download model for any task.
         Returns:
             (dict): Optional checkpoint to resume training from.
@@ -688,14 +691,19 @@ class BaseTrainer:
         return batch
     def validate(self):
-        """
-        Run validation on val set using self.validator.
+        """Run validation on val set using self.validator.
         Returns:
             metrics (dict): Dictionary of validation metrics.
             fitness (float): Fitness score for the validation.
         """
+        if self.ema and self.world_size > 1:
+            # Sync EMA buffers from rank 0 to all ranks
+            for buffer in self.ema.ema.buffers():
+                dist.broadcast(buffer, src=0)
         metrics = self.validator(self)
+        if metrics is None:
+            return None, None
         fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy())  # use loss as fitness measure if not found
         if not self.best_fitness or self.best_fitness < fitness:
             self.best_fitness = fitness
@@ -706,11 +714,11 @@ class BaseTrainer:
         raise NotImplementedError("This task trainer doesn't support loading cfg files")
     def get_validator(self):
-        """Return a NotImplementedError when the get_validator function is called."""
+        """Raise NotImplementedError (must be implemented by subclasses)."""
         raise NotImplementedError("get_validator function not implemented in trainer")
     def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
-        """Return dataloader derived from torch.data.Dataloader."""
+        """Raise NotImplementedError (must return a `torch.utils.data.DataLoader` in subclasses)."""
         raise NotImplementedError("get_dataloader function not implemented in trainer")
     def build_dataset(self, img_path, mode="train", batch=None):
@@ -718,10 +726,9 @@ class BaseTrainer:
         raise NotImplementedError("build_dataset function not implemented in trainer")
     def label_loss_items(self, loss_items=None, prefix="train"):
-        """
-        Return a loss dict with labelled training loss items tensor.
+        """Return a loss dict with labeled training loss items tensor.
-        Note:
+        Notes:
             This is not needed for classification but necessary for segmentation & detection
         """
         return {"loss": loss_items} if loss_items is not None else ["loss"]
@@ -753,9 +760,9 @@ class BaseTrainer:
         n = len(metrics) + 2  # number of cols
         t = time.time() - self.train_time_start
         self.csv.parent.mkdir(parents=True, exist_ok=True)  # ensure parent directory exists
-        s = "" if self.csv.exists() else (("%s," * n % tuple(["epoch", "time"] + keys)).rstrip(",") + "\n")  # header
+        s = "" if self.csv.exists() else ("%s," * n % ("epoch", "time", *keys)).rstrip(",") + "\n"
         with open(self.csv, "a", encoding="utf-8") as f:
-            f.write(s + ("%.6g," * n % tuple([self.epoch + 1, t] + vals)).rstrip(",") + "\n")
+            f.write(s + ("%.6g," * n % (self.epoch + 1, t, *vals)).rstrip(",") + "\n")
     def plot_metrics(self):
         """Plot metrics from a CSV file."""
@@ -768,20 +775,20 @@ class BaseTrainer:
     def final_eval(self):
         """Perform final evaluation and validation for object detection YOLO model."""
-        ckpt = {}
-        for f in self.last, self.best:
-            if f.exists():
-                if f is self.last:
-                    ckpt = strip_optimizer(f)
-                elif f is self.best:
-                    k = "train_results"  # update best.pt train_metrics from last.pt
-                    strip_optimizer(f, updates={k: ckpt[k]} if k in ckpt else None)
-                    LOGGER.info(f"\nValidating {f}...")
-                    self.validator.args.plots = self.args.plots
-                    self.validator.args.compile = False  # disable final val compile as too slow
-                    self.metrics = self.validator(model=f)
-                    self.metrics.pop("fitness", None)
-                    self.run_callbacks("on_fit_epoch_end")
+        model = self.best if self.best.exists() else None
+        with torch_distributed_zero_first(LOCAL_RANK):  # strip only on GPU 0; other GPUs should wait
+            if RANK in {-1, 0}:
+                ckpt = strip_optimizer(self.last) if self.last.exists() else {}
+                if model:
+                    # update best.pt train_metrics from last.pt
+                    strip_optimizer(self.best, updates={"train_results": ckpt.get("train_results")})
+        if model:
+            LOGGER.info(f"\nValidating {model}...")
+            self.validator.args.plots = self.args.plots
+            self.validator.args.compile = False  # disable final val compile as too slow
+            self.metrics = self.validator(model=model)
+            self.metrics.pop("fitness", None)
+            self.run_callbacks("on_fit_epoch_end")
     def check_resume(self, overrides):
         """Check if resume checkpoint exists and update arguments accordingly."""
@@ -804,10 +811,29 @@ class BaseTrainer:
                     "batch",
                     "device",
                     "close_mosaic",
+                    "augmentations",
+                    "save_period",
+                    "workers",
+                    "cache",
+                    "patience",
+                    "time",
+                    "freeze",
+                    "val",
+                    "plots",
                 ):  # allow arg updates to reduce memory or update device on resume
                     if k in overrides:
                         setattr(self.args, k, overrides[k])
+                # Handle augmentations parameter for resume: check if user provided custom augmentations
+                if ckpt_args.get("augmentations") is not None:
+                    # Augmentations were saved in checkpoint as reprs but can't be restored automatically
+                    LOGGER.warning(
+                        "Custom Albumentations transforms were used in the original training run but are not "
+                        "being restored. To preserve custom augmentations when resuming, you need to pass the "
+                        "'augmentations' parameter again to get expected results. Example: \n"
+                        f"model.train(resume=True, augmentations={ckpt_args['augmentations']})"
+                    )
             except Exception as e:
                 raise FileNotFoundError(
                     "Resume checkpoint not found. Please pass a valid checkpoint to resume from, "
@@ -887,18 +913,16 @@ class BaseTrainer:
             self.train_loader.dataset.close_mosaic(hyp=copy(self.args))
     def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
-        """
-        Construct an optimizer for the given model.
+        """Construct an optimizer for the given model.
         Args:
             model (torch.nn.Module): The model for which to build an optimizer.
-            name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected
-                based on the number of iterations.
+            name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected based on the
+                number of iterations.
             lr (float, optional): The learning rate for the optimizer.
             momentum (float, optional): The momentum factor for the optimizer.
             decay (float, optional): The weight decay for the optimizer.
-            iterations (float, optional): The number of iterations, which determines the optimizer if
-                name is 'auto'.
+            iterations (float, optional): The number of iterations, which determines the optimizer if name is 'auto'.
         Returns:
             (torch.optim.Optimizer): The constructed optimizer.

ultralytics/engine/tuner.py CHANGED Viewed

@@ -8,7 +8,7 @@ that yield the best model performance. This is particularly crucial in deep lear
 where small changes in hyperparameters can lead to significant differences in model accuracy and efficiency.
 Examples:
-    Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+    Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
     >>> from ultralytics import YOLO
     >>> model = YOLO("yolo11n.pt")
     >>> model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
@@ -34,12 +34,11 @@ from ultralytics.utils.plotting import plot_tune_results
 class Tuner:
-    """
-    A class for hyperparameter tuning of YOLO models.
+    """A class for hyperparameter tuning of YOLO models.
     The class evolves YOLO model hyperparameters over a given number of iterations by mutating them according to the
-    search space and retraining the model to evaluate their performance. Supports both local CSV storage and
-    distributed MongoDB Atlas coordination for multi-machine hyperparameter optimization.
+    search space and retraining the model to evaluate their performance. Supports both local CSV storage and distributed
+    MongoDB Atlas coordination for multi-machine hyperparameter optimization.
     Attributes:
         space (dict[str, tuple]): Hyperparameter search space containing bounds and scaling factors for mutation.
@@ -56,7 +55,7 @@ class Tuner:
         __call__: Execute the hyperparameter evolution across multiple iterations.
     Examples:
-        Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
+        Tune hyperparameters for YOLO11n on COCO8 at imgsz=640 and epochs=10 for 300 tuning iterations.
         >>> from ultralytics import YOLO
         >>> model = YOLO("yolo11n.pt")
         >>> model.tune(
@@ -83,8 +82,7 @@ class Tuner:
     """
     def __init__(self, args=DEFAULT_CFG, _callbacks: list | None = None):
-        """
-        Initialize the Tuner with configurations.
+        """Initialize the Tuner with configurations.
         Args:
             args (dict): Configuration for hyperparameter evolution.
@@ -142,8 +140,7 @@ class Tuner:
         )
     def _connect(self, uri: str = "mongodb+srv://username:password@cluster.mongodb.net/", max_retries: int = 3):
-        """
-        Create MongoDB client with exponential backoff retry on connection failures.
+        """Create MongoDB client with exponential backoff retry on connection failures.
         Args:
             uri (str): MongoDB connection string with credentials and cluster information.
@@ -183,12 +180,10 @@ class Tuner:
                 time.sleep(wait_time)
     def _init_mongodb(self, mongodb_uri="", mongodb_db="", mongodb_collection=""):
-        """
-        Initialize MongoDB connection for distributed tuning.
+        """Initialize MongoDB connection for distributed tuning.
-        Connects to MongoDB Atlas for distributed hyperparameter optimization across multiple machines.
-        Each worker saves results to a shared collection and reads the latest best hyperparameters
-        from all workers for evolution.
+        Connects to MongoDB Atlas for distributed hyperparameter optimization across multiple machines. Each worker
+        saves results to a shared collection and reads the latest best hyperparameters from all workers for evolution.
         Args:
             mongodb_uri (str): MongoDB connection string, e.g. 'mongodb+srv://username:password@cluster.mongodb.net/'.
@@ -206,8 +201,7 @@ class Tuner:
         LOGGER.info(f"{self.prefix}Using MongoDB Atlas for distributed tuning")
     def _get_mongodb_results(self, n: int = 5) -> list:
-        """
-        Get top N results from MongoDB sorted by fitness.
+        """Get top N results from MongoDB sorted by fitness.
         Args:
             n (int): Number of top results to retrieve.
@@ -221,8 +215,7 @@ class Tuner:
             return []
     def _save_to_mongodb(self, fitness: float, hyperparameters: dict[str, float], metrics: dict, iteration: int):
-        """
-        Save results to MongoDB with proper type conversion.
+        """Save results to MongoDB with proper type conversion.
         Args:
             fitness (float): Fitness score achieved with these hyperparameters.
@@ -233,7 +226,7 @@ class Tuner:
         try:
             self.collection.insert_one(
                 {
-                    "fitness": float(fitness),
+                    "fitness": fitness,
                     "hyperparameters": {k: (v.item() if hasattr(v, "item") else v) for k, v in hyperparameters.items()},
                     "metrics": metrics,
                     "timestamp": datetime.now(),
@@ -244,8 +237,7 @@ class Tuner:
             LOGGER.warning(f"{self.prefix}MongoDB save failed: {e}")
     def _sync_mongodb_to_csv(self):
-        """
-        Sync MongoDB results to CSV for plotting compatibility.
+        """Sync MongoDB results to CSV for plotting compatibility.
         Downloads all results from MongoDB and writes them to the local CSV file in chronological order. This enables
         the existing plotting functions to work seamlessly with distributed MongoDB data.
@@ -257,19 +249,20 @@ class Tuner:
                 return
             # Write to CSV
-            headers = ",".join(["fitness"] + list(self.space.keys())) + "\n"
+            headers = ",".join(["fitness", *list(self.space.keys())]) + "\n"
             with open(self.tune_csv, "w", encoding="utf-8") as f:
                 f.write(headers)
                 for result in all_results:
                     fitness = result["fitness"]
                     hyp_values = [result["hyperparameters"][k] for k in self.space.keys()]
-                    log_row = [round(fitness, 5)] + hyp_values
+                    log_row = [round(fitness, 5), *hyp_values]
                     f.write(",".join(map(str, log_row)) + "\n")
         except Exception as e:
             LOGGER.warning(f"{self.prefix}MongoDB to CSV sync failed: {e}")
-    def _crossover(self, x: np.ndarray, alpha: float = 0.2, k: int = 9) -> np.ndarray:
+    @staticmethod
+    def _crossover(x: np.ndarray, alpha: float = 0.2, k: int = 9) -> np.ndarray:
         """BLX-α crossover from up to top-k parents (x[:,0]=fitness, rest=genes)."""
         k = min(k, len(x))
         # fitness weights (shifted to >0); fallback to uniform if degenerate
@@ -288,11 +281,9 @@ class Tuner:
         mutation: float = 0.5,
         sigma: float = 0.2,
     ) -> dict[str, float]:
-        """
-        Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
+        """Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
         Args:
-            parent (str): Parent selection method (kept for API compatibility, unused in BLX mode).
             n (int): Number of top parents to consider.
             mutation (float): Probability of a parameter mutation in any given iteration.
             sigma (float): Standard deviation for Gaussian random number generator.
@@ -304,8 +295,7 @@ class Tuner:
         # Try MongoDB first if available
         if self.mongodb:
-            results = self._get_mongodb_results(n)
-            if results:
+            if results := self._get_mongodb_results(n):
                 # MongoDB already sorted by fitness DESC, so results[0] is best
                 x = np.array([[r["fitness"]] + [r["hyperparameters"][k] for k in self.space.keys()] for r in results])
             elif self.collection.name in self.collection.database.list_collection_names():  # Tuner started elsewhere
@@ -344,13 +334,12 @@ class Tuner:
         # Update types
         if "close_mosaic" in hyp:
-            hyp["close_mosaic"] = int(round(hyp["close_mosaic"]))
+            hyp["close_mosaic"] = round(hyp["close_mosaic"])
         return hyp
     def __call__(self, model=None, iterations: int = 10, cleanup: bool = True):
-        """
-        Execute the hyperparameter evolution process when the Tuner instance is called.
+        """Execute the hyperparameter evolution process when the Tuner instance is called.
         This method iterates through the specified number of iterations, performing the following steps:
         1. Sync MongoDB results to CSV (if using distributed mode)
@@ -421,7 +410,7 @@ class Tuner:
             else:
                 # Save to CSV only if no MongoDB
                 log_row = [round(fitness, 5)] + [mutated_hyp[k] for k in self.space.keys()]
-                headers = "" if self.tune_csv.exists() else (",".join(["fitness"] + list(self.space.keys())) + "\n")
+                headers = "" if self.tune_csv.exists() else (",".join(["fitness", *list(self.space.keys())]) + "\n")
                 with open(self.tune_csv, "a", encoding="utf-8") as f:
                     f.write(headers + ",".join(map(str, log_row)) + "\n")

ultralytics/engine/validator.py CHANGED Viewed

@@ -29,26 +29,26 @@ from pathlib import Path
 import numpy as np
 import torch
+import torch.distributed as dist
 from ultralytics.cfg import get_cfg, get_save_dir
 from ultralytics.data.utils import check_cls_dataset, check_det_dataset
 from ultralytics.nn.autobackend import AutoBackend
-from ultralytics.utils import LOGGER, TQDM, callbacks, colorstr, emojis
+from ultralytics.utils import LOGGER, RANK, TQDM, callbacks, colorstr, emojis
 from ultralytics.utils.checks import check_imgsz
 from ultralytics.utils.ops import Profile
 from ultralytics.utils.torch_utils import attempt_compile, select_device, smart_inference_mode, unwrap_model
 class BaseValidator:
-    """
-    A base class for creating validators.
+    """A base class for creating validators.
     This class provides the foundation for validation processes, including model evaluation, metric computation, and
     result visualization.
     Attributes:
         args (SimpleNamespace): Configuration for the validator.
-        dataloader (DataLoader): Dataloader to use for validation.
+        dataloader (DataLoader): DataLoader to use for validation.
         model (nn.Module): Model to validate.
         data (dict): Data dictionary containing dataset information.
         device (torch.device): Device to use for validation.
@@ -61,8 +61,8 @@ class BaseValidator:
         nc (int): Number of classes.
         iouv (torch.Tensor): IoU thresholds from 0.50 to 0.95 in spaces of 0.05.
         jdict (list): List to store JSON validation results.
-        speed (dict): Dictionary with keys 'preprocess', 'inference', 'loss', 'postprocess' and their respective
-            batch processing times in milliseconds.
+        speed (dict): Dictionary with keys 'preprocess', 'inference', 'loss', 'postprocess' and their respective batch
+            processing times in milliseconds.
         save_dir (Path): Directory to save results.
         plots (dict): Dictionary to store plots for visualization.
         callbacks (dict): Dictionary to store various callback functions.
@@ -92,11 +92,10 @@ class BaseValidator:
     """
     def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None):
-        """
-        Initialize a BaseValidator instance.
+        """Initialize a BaseValidator instance.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
             save_dir (Path, optional): Directory to save results.
             args (SimpleNamespace, optional): Configuration for the validator.
             _callbacks (dict, optional): Dictionary to store various callback functions.
@@ -130,8 +129,7 @@ class BaseValidator:
     @smart_inference_mode()
     def __call__(self, trainer=None, model=None):
-        """
-        Execute validation process, running inference on dataloader and computing performance metrics.
+        """Execute validation process, running inference on dataloader and computing performance metrics.
         Args:
             trainer (object, optional): Trainer object that contains the model to validate.
@@ -160,7 +158,7 @@ class BaseValidator:
             callbacks.add_integration_callbacks(self)
             model = AutoBackend(
                 model=model or self.args.model,
-                device=select_device(self.args.device),
+                device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),
                 dnn=self.args.dnn,
                 data=self.args.data,
                 fp16=self.args.half,
@@ -223,21 +221,34 @@ class BaseValidator:
                 preds = self.postprocess(preds)
             self.update_metrics(preds, batch)
-            if self.args.plots and batch_i < 3:
+            if self.args.plots and batch_i < 3 and RANK in {-1, 0}:
                 self.plot_val_samples(batch, batch_i)
                 self.plot_predictions(batch, preds, batch_i)
             self.run_callbacks("on_val_batch_end")
-        stats = self.get_stats()
-        self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
-        self.finalize_metrics()
-        self.print_results()
-        self.run_callbacks("on_val_end")
+        stats = {}
+        self.gather_stats()
+        if RANK in {-1, 0}:
+            stats = self.get_stats()
+            self.speed = dict(zip(self.speed.keys(), (x.t / len(self.dataloader.dataset) * 1e3 for x in dt)))
+            self.finalize_metrics()
+            self.print_results()
+            self.run_callbacks("on_val_end")
         if self.training:
             model.float()
-            results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")}
+            # Reduce loss across all GPUs
+            loss = self.loss.clone().detach()
+            if trainer.world_size > 1:
+                dist.reduce(loss, dst=0, op=dist.ReduceOp.AVG)
+            if RANK > 0:
+                return
+            results = {**stats, **trainer.label_loss_items(loss.cpu() / len(self.dataloader), prefix="val")}
             return {k: round(float(v), 5) for k, v in results.items()}  # return results as 5 decimal place floats
         else:
+            if RANK > 0:
+                return stats
             LOGGER.info(
                 "Speed: {:.1f}ms preprocess, {:.1f}ms inference, {:.1f}ms loss, {:.1f}ms postprocess per image".format(
                     *tuple(self.speed.values())
@@ -255,8 +266,7 @@ class BaseValidator:
     def match_predictions(
         self, pred_classes: torch.Tensor, true_classes: torch.Tensor, iou: torch.Tensor, use_scipy: bool = False
     ) -> torch.Tensor:
-        """
-        Match predictions to ground truth objects using IoU.
+        """Match predictions to ground truth objects using IoU.
         Args:
             pred_classes (torch.Tensor): Predicted class indices of shape (N,).
@@ -336,6 +346,10 @@ class BaseValidator:
         """Return statistics about the model's performance."""
         return {}
+    def gather_stats(self):
+        """Gather statistics from all the GPUs during DDP training to GPU 0."""
+        pass
     def print_results(self):
         """Print the results of the model's predictions."""
         pass
@@ -350,7 +364,10 @@ class BaseValidator:
         return []
     def on_plot(self, name, data=None):
-        """Register plots for visualization."""
+        """Register plots for visualization, deduplicating by type."""
+        plot_type = data.get("type") if data else None
+        if plot_type and any((v.get("data") or {}).get("type") == plot_type for v in self.plots.values()):
+            return  # Skip duplicate plot types
         self.plots[Path(name)] = {"data": data, "timestamp": time.time()}
     def plot_val_samples(self, batch, ni):

dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.3.248__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.214py3-none-any.whl → 8.3.248py3-none-any.whl