PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
tests/__init__.py +7 -6
tests/conftest.py +15 -39
tests/test_cli.py +17 -17
tests/test_cuda.py +17 -8
tests/test_engine.py +36 -10
tests/test_exports.py +98 -37
tests/test_integrations.py +12 -15
tests/test_python.py +126 -82
tests/test_solutions.py +319 -135
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +83 -87
ultralytics/cfg/datasets/Argoverse.yaml +4 -4
ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
ultralytics/cfg/datasets/ImageNet.yaml +3 -3
ultralytics/cfg/datasets/Objects365.yaml +24 -20
ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
ultralytics/cfg/datasets/VOC.yaml +10 -13
ultralytics/cfg/datasets/VisDrone.yaml +43 -33
ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
ultralytics/cfg/datasets/coco-pose.yaml +26 -4
ultralytics/cfg/datasets/coco.yaml +4 -4
ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
ultralytics/cfg/datasets/coco128.yaml +2 -2
ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
ultralytics/cfg/datasets/coco8.yaml +2 -2
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/crack-seg.yaml +5 -5
ultralytics/cfg/datasets/dog-pose.yaml +32 -4
ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
ultralytics/cfg/datasets/lvis.yaml +9 -9
ultralytics/cfg/datasets/medical-pills.yaml +4 -5
ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
ultralytics/cfg/datasets/package-seg.yaml +5 -5
ultralytics/cfg/datasets/signature.yaml +4 -4
ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
ultralytics/cfg/datasets/xView.yaml +5 -5
ultralytics/cfg/default.yaml +96 -93
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +12 -12
ultralytics/data/augment.py +531 -564
ultralytics/data/base.py +76 -81
ultralytics/data/build.py +206 -42
ultralytics/data/converter.py +179 -78
ultralytics/data/dataset.py +121 -121
ultralytics/data/loaders.py +114 -91
ultralytics/data/split.py +28 -15
ultralytics/data/split_dota.py +67 -48
ultralytics/data/utils.py +110 -89
ultralytics/engine/exporter.py +422 -460
ultralytics/engine/model.py +224 -252
ultralytics/engine/predictor.py +94 -89
ultralytics/engine/results.py +345 -595
ultralytics/engine/trainer.py +231 -134
ultralytics/engine/tuner.py +279 -73
ultralytics/engine/validator.py +53 -46
ultralytics/hub/__init__.py +26 -28
ultralytics/hub/auth.py +30 -16
ultralytics/hub/google/__init__.py +34 -36
ultralytics/hub/session.py +53 -77
ultralytics/hub/utils.py +23 -109
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +36 -18
ultralytics/models/fastsam/predict.py +33 -44
ultralytics/models/fastsam/utils.py +4 -5
ultralytics/models/fastsam/val.py +12 -14
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +16 -20
ultralytics/models/nas/predict.py +12 -14
ultralytics/models/nas/val.py +4 -5
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +9 -9
ultralytics/models/rtdetr/predict.py +22 -17
ultralytics/models/rtdetr/train.py +20 -16
ultralytics/models/rtdetr/val.py +79 -59
ultralytics/models/sam/__init__.py +8 -2
ultralytics/models/sam/amg.py +53 -38
ultralytics/models/sam/build.py +29 -31
ultralytics/models/sam/model.py +33 -38
ultralytics/models/sam/modules/blocks.py +159 -182
ultralytics/models/sam/modules/decoders.py +38 -47
ultralytics/models/sam/modules/encoders.py +114 -133
ultralytics/models/sam/modules/memory_attention.py +38 -31
ultralytics/models/sam/modules/sam.py +114 -93
ultralytics/models/sam/modules/tiny_encoder.py +268 -291
ultralytics/models/sam/modules/transformer.py +59 -66
ultralytics/models/sam/modules/utils.py +55 -72
ultralytics/models/sam/predict.py +745 -341
ultralytics/models/utils/loss.py +118 -107
ultralytics/models/utils/ops.py +118 -71
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +28 -26
ultralytics/models/yolo/classify/train.py +50 -81
ultralytics/models/yolo/classify/val.py +68 -61
ultralytics/models/yolo/detect/predict.py +12 -15
ultralytics/models/yolo/detect/train.py +56 -46
ultralytics/models/yolo/detect/val.py +279 -223
ultralytics/models/yolo/model.py +167 -86
ultralytics/models/yolo/obb/predict.py +7 -11
ultralytics/models/yolo/obb/train.py +23 -25
ultralytics/models/yolo/obb/val.py +107 -99
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +12 -14
ultralytics/models/yolo/pose/train.py +31 -69
ultralytics/models/yolo/pose/val.py +119 -254
ultralytics/models/yolo/segment/predict.py +21 -25
ultralytics/models/yolo/segment/train.py +12 -66
ultralytics/models/yolo/segment/val.py +126 -305
ultralytics/models/yolo/world/train.py +53 -45
ultralytics/models/yolo/world/train_world.py +51 -32
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +30 -37
ultralytics/models/yolo/yoloe/train.py +89 -71
ultralytics/models/yolo/yoloe/train_seg.py +15 -17
ultralytics/models/yolo/yoloe/val.py +56 -41
ultralytics/nn/__init__.py +9 -11
ultralytics/nn/autobackend.py +179 -107
ultralytics/nn/modules/__init__.py +67 -67
ultralytics/nn/modules/activation.py +8 -7
ultralytics/nn/modules/block.py +302 -323
ultralytics/nn/modules/conv.py +61 -104
ultralytics/nn/modules/head.py +488 -186
ultralytics/nn/modules/transformer.py +183 -123
ultralytics/nn/modules/utils.py +15 -20
ultralytics/nn/tasks.py +327 -203
ultralytics/nn/text_model.py +81 -65
ultralytics/py.typed +1 -0
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +19 -27
ultralytics/solutions/analytics.py +36 -26
ultralytics/solutions/config.py +29 -28
ultralytics/solutions/distance_calculation.py +23 -24
ultralytics/solutions/heatmap.py +17 -19
ultralytics/solutions/instance_segmentation.py +21 -19
ultralytics/solutions/object_blurrer.py +16 -17
ultralytics/solutions/object_counter.py +48 -53
ultralytics/solutions/object_cropper.py +22 -16
ultralytics/solutions/parking_management.py +61 -58
ultralytics/solutions/queue_management.py +19 -19
ultralytics/solutions/region_counter.py +63 -50
ultralytics/solutions/security_alarm.py +22 -25
ultralytics/solutions/similarity_search.py +107 -60
ultralytics/solutions/solutions.py +343 -262
ultralytics/solutions/speed_estimation.py +35 -31
ultralytics/solutions/streamlit_inference.py +104 -40
ultralytics/solutions/templates/similarity-search.html +31 -24
ultralytics/solutions/trackzone.py +24 -24
ultralytics/solutions/vision_eye.py +11 -12
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +18 -27
ultralytics/trackers/bot_sort.py +48 -39
ultralytics/trackers/byte_tracker.py +94 -94
ultralytics/trackers/track.py +7 -16
ultralytics/trackers/utils/gmc.py +37 -69
ultralytics/trackers/utils/kalman_filter.py +68 -76
ultralytics/trackers/utils/matching.py +13 -17
ultralytics/utils/__init__.py +251 -275
ultralytics/utils/autobatch.py +19 -7
ultralytics/utils/autodevice.py +68 -38
ultralytics/utils/benchmarks.py +169 -130
ultralytics/utils/callbacks/base.py +12 -13
ultralytics/utils/callbacks/clearml.py +14 -15
ultralytics/utils/callbacks/comet.py +139 -66
ultralytics/utils/callbacks/dvc.py +19 -27
ultralytics/utils/callbacks/hub.py +8 -6
ultralytics/utils/callbacks/mlflow.py +6 -10
ultralytics/utils/callbacks/neptune.py +11 -19
ultralytics/utils/callbacks/platform.py +73 -0
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +9 -12
ultralytics/utils/callbacks/wb.py +33 -30
ultralytics/utils/checks.py +163 -114
ultralytics/utils/cpu.py +89 -0
ultralytics/utils/dist.py +24 -20
ultralytics/utils/downloads.py +176 -146
ultralytics/utils/errors.py +11 -13
ultralytics/utils/events.py +113 -0
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +81 -63
ultralytics/utils/export/imx.py +294 -0
ultralytics/utils/export/tensorflow.py +217 -0
ultralytics/utils/files.py +33 -36
ultralytics/utils/git.py +137 -0
ultralytics/utils/instance.py +105 -120
ultralytics/utils/logger.py +404 -0
ultralytics/utils/loss.py +99 -61
ultralytics/utils/metrics.py +649 -478
ultralytics/utils/nms.py +337 -0
ultralytics/utils/ops.py +263 -451
ultralytics/utils/patches.py +70 -31
ultralytics/utils/plotting.py +253 -223
ultralytics/utils/tal.py +48 -61
ultralytics/utils/torch_utils.py +244 -251
ultralytics/utils/tqdm.py +438 -0
ultralytics/utils/triton.py +22 -23
ultralytics/utils/tuner.py +11 -10
dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0

ultralytics/models/yolo/yoloe/train.py CHANGED Viewed

@@ -1,7 +1,9 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-import itertools
+from __future__ import annotations
 from copy import copy, deepcopy
+from pathlib import Path
 import torch
@@ -10,21 +12,29 @@ from ultralytics.data.augment import LoadVisualPrompt
 from ultralytics.models.yolo.detect import DetectionTrainer, DetectionValidator
 from ultralytics.nn.tasks import YOLOEModel
 from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
-from ultralytics.utils.torch_utils import de_parallel
+from ultralytics.utils.torch_utils import unwrap_model
 from ..world.train_world import WorldTrainerFromScratch
 from .val import YOLOEDetectValidator
 class YOLOETrainer(DetectionTrainer):
-    """A base trainer for YOLOE training."""
+    """A trainer class for YOLOE object detection models.
-    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """
-        Initialize the YOLOE Trainer with specified configurations.
+    This class extends DetectionTrainer to provide specialized training functionality for YOLOE models, including custom
+    model initialization, validation, and dataset building with multi-modal support.
+    Attributes:
+        loss_names (tuple): Names of loss components used during training.
-        This method sets up the YOLOE trainer with the provided configuration and overrides, initializing
-        the training environment, model, and callbacks for YOLOE object detection training.
+    Methods:
+        get_model: Initialize and return a YOLOEModel with specified configuration.
+        get_validator: Return a YOLOEDetectValidator for model validation.
+        build_dataset: Build YOLO dataset with multi-modal support for training.
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides: dict | None = None, _callbacks=None):
+        """Initialize the YOLOE Trainer with specified configurations.
         Args:
             cfg (dict): Configuration dictionary with default training settings from DEFAULT_CFG.
@@ -33,17 +43,17 @@ class YOLOETrainer(DetectionTrainer):
         """
         if overrides is None:
             overrides = {}
+        assert not overrides.get("compile"), f"Training with 'model={overrides['model']}' requires 'compile=False'"
         overrides["overlap_mask"] = False
         super().__init__(cfg, overrides, _callbacks)
-    def get_model(self, cfg=None, weights=None, verbose=True):
-        """
-        Return a YOLOEModel initialized with the specified configuration and weights.
+    def get_model(self, cfg=None, weights=None, verbose: bool = True):
+        """Return a YOLOEModel initialized with the specified configuration and weights.
         Args:
-            cfg (dict | str | None): Model configuration. Can be a dictionary containing a 'yaml_file' key,
-                a direct path to a YAML file, or None to use default configuration.
-            weights (str | Path | None): Path to pretrained weights file to load into the model.
+            cfg (dict | str, optional): Model configuration. Can be a dictionary containing a 'yaml_file' key, a direct
+                path to a YAML file, or None to use default configuration.
+            weights (str | Path, optional): Path to pretrained weights file to load into the model.
             verbose (bool): Whether to display model information during initialization.
         Returns:
@@ -68,36 +78,41 @@ class YOLOETrainer(DetectionTrainer):
         return model
     def get_validator(self):
-        """Returns a DetectionValidator for YOLO model validation."""
+        """Return a YOLOEDetectValidator for YOLOE model validation."""
         self.loss_names = "box", "cls", "dfl"
         return YOLOEDetectValidator(
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
         )
-    def build_dataset(self, img_path, mode="train", batch=None):
-        """
-        Build YOLO Dataset.
+    def build_dataset(self, img_path: str, mode: str = "train", batch: int | None = None):
+        """Build YOLO Dataset.
         Args:
             img_path (str): Path to the folder containing images.
-            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
-            batch (int, optional): Size of batches, this is for `rect`.
+            mode (str): 'train' mode or 'val' mode, users are able to customize different augmentations for each mode.
+            batch (int, optional): Size of batches, this is for rectangular training.
         Returns:
             (Dataset): YOLO dataset configured for training or validation.
         """
-        gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
+        gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
         return build_yolo_dataset(
             self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
         )
 class YOLOEPETrainer(DetectionTrainer):
-    """Fine-tune YOLOE model in linear probing way."""
+    """Fine-tune YOLOE model using linear probing approach.
-    def get_model(self, cfg=None, weights=None, verbose=True):
-        """
-        Return YOLOEModel initialized with specified config and weights.
+    This trainer freezes most model layers and only trains specific projection layers for efficient fine-tuning on new
+    datasets while preserving pretrained features.
+    Methods:
+        get_model: Initialize YOLOEModel with frozen layers except projection layers.
+    """
+    def get_model(self, cfg=None, weights=None, verbose: bool = True):
+        """Return YOLOEModel initialized with specified config and weights.
         Args:
             cfg (dict | str, optional): Model configuration.
@@ -139,17 +154,24 @@ class YOLOEPETrainer(DetectionTrainer):
 class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
-    """Train YOLOE models from scratch."""
+    """Train YOLOE models from scratch with text embedding support.
-    def build_dataset(self, img_path, mode="train", batch=None):
-        """
-        Build YOLO Dataset for training or validation.
+    This trainer combines YOLOE training capabilities with world training features, enabling training from scratch with
+    text embeddings and grounding datasets.
+    Methods:
+        build_dataset: Build datasets for training with grounding support.
+        generate_text_embeddings: Generate and cache text embeddings for training.
+    """
+    def build_dataset(self, img_path: list[str] | str, mode: str = "train", batch: int | None = None):
+        """Build YOLO Dataset for training or validation.
-        This method constructs appropriate datasets based on the mode and input paths, handling both
-        standard YOLO datasets and grounding datasets with different formats.
+        This method constructs appropriate datasets based on the mode and input paths, handling both standard YOLO
+        datasets and grounding datasets with different formats.
         Args:
-            img_path (List[str] | str): Path to the folder containing images or list of paths.
+            img_path (list[str] | str): Path to the folder containing images or list of paths.
             mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
             batch (int, optional): Size of batches, used for rectangular training/validation.
@@ -158,22 +180,11 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
         """
         return WorldTrainerFromScratch.build_dataset(self, img_path, mode, batch)
-    def preprocess_batch(self, batch):
-        """Process batch for training, moving text features to the appropriate device."""
-        batch = DetectionTrainer.preprocess_batch(self, batch)
-        texts = list(itertools.chain(*batch["texts"]))
-        txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device)
-        txt_feats = txt_feats.reshape(len(batch["texts"]), -1, txt_feats.shape[-1])
-        batch["txt_feats"] = txt_feats
-        return batch
-    def generate_text_embeddings(self, texts, batch, cache_dir):
-        """
-        Generate text embeddings for a list of text samples.
+    def generate_text_embeddings(self, texts: list[str], batch: int, cache_dir: Path):
+        """Generate text embeddings for a list of text samples.
         Args:
-            texts (List[str]): List of text samples to encode.
+            texts (list[str]): List of text samples to encode.
             batch (int): Batch size for processing.
             cache_dir (Path): Directory to save/load cached embeddings.
@@ -184,42 +195,49 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
         cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
         if cache_path.exists():
             LOGGER.info(f"Reading existed cache from '{cache_path}'")
-            txt_map = torch.load(cache_path)
+            txt_map = torch.load(cache_path, map_location=self.device)
             if sorted(txt_map.keys()) == sorted(texts):
                 return txt_map
         LOGGER.info(f"Caching text embeddings to '{cache_path}'")
         assert self.model is not None
-        txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
+        txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
         txt_map = dict(zip(texts, txt_feats.squeeze(0)))
         torch.save(txt_map, cache_path)
         return txt_map
 class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
-    """Train prompt-free YOLOE model."""
+    """Train prompt-free YOLOE model.
+    This trainer combines linear probing capabilities with from-scratch training for prompt-free YOLOE models that don't
+    require text prompts during inference.
+    Methods:
+        get_validator: Return standard DetectionValidator for validation.
+        preprocess_batch: Preprocess batches without text features.
+        set_text_embeddings: Set text embeddings for datasets (no-op for prompt-free).
+    """
     def get_validator(self):
-        """Returns a DetectionValidator for YOLO model validation."""
+        """Return a DetectionValidator for YOLO model validation."""
         self.loss_names = "box", "cls", "dfl"
         return DetectionValidator(
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
         )
     def preprocess_batch(self, batch):
-        """Preprocesses a batch of images for YOLOE training, adjusting formatting and dimensions as needed."""
-        batch = DetectionTrainer.preprocess_batch(self, batch)
-        return batch
+        """Preprocess a batch of images for YOLOE training, adjusting formatting and dimensions as needed."""
+        return DetectionTrainer.preprocess_batch(self, batch)
-    def set_text_embeddings(self, datasets, batch):
-        """
-        Set text embeddings for datasets to accelerate training by caching category names.
+    def set_text_embeddings(self, datasets, batch: int):
+        """Set text embeddings for datasets to accelerate training by caching category names.
-        This method collects unique category names from all datasets, generates text embeddings for them,
-        and caches these embeddings to improve training efficiency. The embeddings are stored in a file
-        in the parent directory of the first dataset's image path.
+        This method collects unique category names from all datasets, generates text embeddings for them, and caches
+        these embeddings to improve training efficiency. The embeddings are stored in a file in the parent directory of
+        the first dataset's image path.
         Args:
-            datasets (List[Dataset]): List of datasets containing category names to process.
+            datasets (list[Dataset]): List of datasets containing category names to process.
             batch (int): Batch size for processing text embeddings.
         Notes:
@@ -231,14 +249,20 @@ class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
 class YOLOEVPTrainer(YOLOETrainerFromScratch):
-    """Train YOLOE model with visual prompts."""
+    """Train YOLOE model with visual prompts.
-    def build_dataset(self, img_path, mode="train", batch=None):
-        """
-        Build YOLO Dataset for training or validation with visual prompts.
+    This trainer extends YOLOETrainerFromScratch to support visual prompt-based training, where visual cues are provided
+    alongside images to guide the detection process.
+    Methods:
+        build_dataset: Build dataset with visual prompt loading transforms.
+    """
+    def build_dataset(self, img_path: list[str] | str, mode: str = "train", batch: int | None = None):
+        """Build YOLO Dataset for training or validation with visual prompts.
         Args:
-            img_path (List[str] | str): Path to the folder containing images or list of paths.
+            img_path (list[str] | str): Path to the folder containing images or list of paths.
             mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
             batch (int, optional): Size of batches, used for rectangular training/validation.
@@ -261,9 +285,3 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
                 d.transforms.append(LoadVisualPrompt())
         else:
             self.train_loader.dataset.transforms.append(LoadVisualPrompt())
-    def preprocess_batch(self, batch):
-        """Preprocesses a batch of images for YOLOE training, moving visual prompts to the appropriate device."""
-        batch = super().preprocess_batch(batch)
-        batch["visuals"] = batch["visuals"].to(self.device)
-        return batch

ultralytics/models/yolo/yoloe/train_seg.py CHANGED Viewed

@@ -11,11 +11,10 @@ from .val import YOLOESegValidator
 class YOLOESegTrainer(YOLOETrainer, SegmentationTrainer):
-    """
-    Trainer class for YOLOE segmentation models.
+    """Trainer class for YOLOE segmentation models.
-    This class combines YOLOETrainer and SegmentationTrainer to provide training functionality
-    specifically for YOLOE segmentation models.
+    This class combines YOLOETrainer and SegmentationTrainer to provide training functionality specifically for YOLOE
+    segmentation models, enabling both object detection and instance segmentation capabilities.
     Attributes:
         cfg (dict): Configuration dictionary with training parameters.
@@ -24,11 +23,10 @@ class YOLOESegTrainer(YOLOETrainer, SegmentationTrainer):
     """
     def get_model(self, cfg=None, weights=None, verbose=True):
-        """
-        Return YOLOESegModel initialized with specified config and weights.
+        """Return YOLOESegModel initialized with specified config and weights.
         Args:
-            cfg (dict | str): Model configuration dictionary or YAML file path.
+            cfg (dict | str, optional): Model configuration dictionary or YAML file path.
             weights (str, optional): Path to pretrained weights file.
             verbose (bool): Whether to display model information.
@@ -49,8 +47,7 @@ class YOLOESegTrainer(YOLOETrainer, SegmentationTrainer):
         return model
     def get_validator(self):
-        """
-        Create and return a validator for YOLOE segmentation model evaluation.
+        """Create and return a validator for YOLOE segmentation model evaluation.
         Returns:
             (YOLOESegValidator): Validator for YOLOE segmentation models.
@@ -62,19 +59,20 @@ class YOLOESegTrainer(YOLOETrainer, SegmentationTrainer):
 class YOLOEPESegTrainer(SegmentationTrainer):
-    """
-    Fine-tune YOLOESeg model in linear probing way.
+    """Fine-tune YOLOESeg model in linear probing way.
     This trainer specializes in fine-tuning YOLOESeg models using a linear probing approach, which involves freezing
-    most of the model and only training specific layers.
+    most of the model and only training specific layers for efficient adaptation to new tasks.
+    Attributes:
+        data (dict): Dataset configuration containing channels, class names, and number of classes.
     """
     def get_model(self, cfg=None, weights=None, verbose=True):
-        """
-        Return YOLOESegModel initialized with specified config and weights for linear probing.
+        """Return YOLOESegModel initialized with specified config and weights for linear probing.
         Args:
-            cfg (dict | str): Model configuration dictionary or YAML file path.
+            cfg (dict | str, optional): Model configuration dictionary or YAML file path.
             weights (str, optional): Path to pretrained weights file.
             verbose (bool): Whether to display model information.
@@ -113,12 +111,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
 class YOLOESegTrainerFromScratch(YOLOETrainerFromScratch, YOLOESegTrainer):
-    """Trainer for YOLOE segmentation from scratch."""
+    """Trainer for YOLOE segmentation models trained from scratch without pretrained weights."""
     pass
 class YOLOESegVPTrainer(YOLOEVPTrainer, YOLOESegTrainerFromScratch):
-    """Trainer for YOLOE segmentation with VP."""
+    """Trainer for YOLOE segmentation models with Vision Prompt (VP) capabilities."""
     pass

ultralytics/models/yolo/yoloe/val.py CHANGED Viewed

@@ -1,6 +1,10 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 from copy import deepcopy
+from pathlib import Path
+from typing import Any
 import torch
 from torch.nn import functional as F
@@ -17,27 +21,39 @@ from ultralytics.utils.torch_utils import select_device, smart_inference_mode
 class YOLOEDetectValidator(DetectionValidator):
-    """
-    A mixin class for YOLOE model validation that handles both text and visual prompt embeddings.
+    """A validator class for YOLOE detection models that handles both text and visual prompt embeddings.
-    This mixin provides functionality to validate YOLOE models using either text or visual prompt embeddings.
-    It includes methods for extracting visual prompt embeddings from samples, preprocessing batches, and
-    running validation with different prompt types.
+    This class extends DetectionValidator to provide specialized validation functionality for YOLOE models. It supports
+    validation using either text prompts or visual prompt embeddings extracted from training samples, enabling flexible
+    evaluation strategies for prompt-based object detection.
     Attributes:
         device (torch.device): The device on which validation is performed.
         args (namespace): Configuration arguments for validation.
         dataloader (DataLoader): DataLoader for validation data.
+    Methods:
+        get_visual_pe: Extract visual prompt embeddings from training samples.
+        preprocess: Preprocess batch data ensuring visuals are on the same device as images.
+        get_vpe_dataloader: Create a dataloader for LVIS training visual prompt samples.
+        __call__: Run validation using either text or visual prompt embeddings.
+    Examples:
+        Validate with text prompts
+        >>> validator = YOLOEDetectValidator()
+        >>> stats = validator(model=model, load_vp=False)
+        Validate with visual prompts
+        >>> stats = validator(model=model, refer_data="path/to/data.yaml", load_vp=True)
     """
     @smart_inference_mode()
-    def get_visual_pe(self, dataloader, model):
-        """
-        Extract visual prompt embeddings from training samples.
+    def get_visual_pe(self, dataloader: torch.utils.data.DataLoader, model: YOLOEModel) -> torch.Tensor:
+        """Extract visual prompt embeddings from training samples.
-        This function processes a dataloader to compute visual prompt embeddings for each class
-        using a YOLOE model. It normalizes the embeddings and handles cases where no samples
-        exist for a class.
+        This method processes a dataloader to compute visual prompt embeddings for each class using a YOLOE model. It
+        normalizes the embeddings and handles cases where no samples exist for a class by setting their embeddings to
+        zero.
         Args:
             dataloader (torch.utils.data.DataLoader): The dataloader providing training samples.
@@ -47,12 +63,13 @@ class YOLOEDetectValidator(DetectionValidator):
             (torch.Tensor): Visual prompt embeddings with shape (1, num_classes, embed_dim).
         """
         assert isinstance(model, YOLOEModel)
-        names = [name.split("/")[0] for name in list(dataloader.dataset.data["names"].values())]
+        names = [name.split("/", 1)[0] for name in list(dataloader.dataset.data["names"].values())]
         visual_pe = torch.zeros(len(names), model.model[-1].embed, device=self.device)
         cls_visual_num = torch.zeros(len(names))
         desc = "Get visual prompt embeddings from samples"
+        # Count samples per class
         for batch in dataloader:
             cls = batch["cls"].squeeze(-1).to(torch.int).unique()
             count = torch.bincount(cls, minlength=len(names))
@@ -60,6 +77,7 @@ class YOLOEDetectValidator(DetectionValidator):
         cls_visual_num = cls_visual_num.to(self.device)
+        # Extract visual prompt embeddings
         pbar = TQDM(dataloader, total=len(dataloader), desc=desc)
         for batch in pbar:
             batch = self.preprocess(batch)
@@ -69,34 +87,26 @@ class YOLOEDetectValidator(DetectionValidator):
             for i in range(preds.shape[0]):
                 cls = batch["cls"][batch_idx == i].squeeze(-1).to(torch.int).unique(sorted=True)
                 pad_cls = torch.ones(preds.shape[1], device=self.device) * -1
-                pad_cls[: len(cls)] = cls
+                pad_cls[: cls.shape[0]] = cls
                 for c in cls:
                     visual_pe[c] += preds[i][pad_cls == c].sum(0) / cls_visual_num[c]
+        # Normalize embeddings for classes with samples, set others to zero
         visual_pe[cls_visual_num != 0] = F.normalize(visual_pe[cls_visual_num != 0], dim=-1, p=2)
         visual_pe[cls_visual_num == 0] = 0
         return visual_pe.unsqueeze(0)
-    def preprocess(self, batch):
-        """Preprocess batch data, ensuring visuals are on the same device as images."""
-        batch = super().preprocess(batch)
-        if "visuals" in batch:
-            batch["visuals"] = batch["visuals"].to(batch["img"].device)
-        return batch
-    def get_vpe_dataloader(self, data):
-        """
-        Create a dataloader for LVIS training visual prompt samples.
+    def get_vpe_dataloader(self, data: dict[str, Any]) -> torch.utils.data.DataLoader:
+        """Create a dataloader for LVIS training visual prompt samples.
-        This function prepares a dataloader for visual prompt embeddings (VPE) using the LVIS dataset.
-        It applies necessary transformations and configurations to the dataset and returns a dataloader
-        for validation purposes.
+        This method prepares a dataloader for visual prompt embeddings (VPE) using the specified dataset. It applies
+        necessary transformations including LoadVisualPrompt and configurations to the dataset for validation purposes.
         Args:
             data (dict): Dataset configuration dictionary containing paths and settings.
         Returns:
-            (torch.utils.data.DataLoader): The dataLoader for visual prompt samples.
+            (torch.utils.data.DataLoader): The dataloader for visual prompt samples.
         """
         dataset = build_yolo_dataset(
             self.args,
@@ -120,17 +130,22 @@ class YOLOEDetectValidator(DetectionValidator):
         )
     @smart_inference_mode()
-    def __call__(self, trainer=None, model=None, refer_data=None, load_vp=False):
-        """
-        Run validation on the model using either text or visual prompt embeddings.
-        This method validates the model using either text prompts or visual prompts, depending
-        on the `load_vp` flag. It supports validation during training (using a trainer object)
-        or standalone validation with a provided model.
+    def __call__(
+        self,
+        trainer: Any | None = None,
+        model: YOLOEModel | str | None = None,
+        refer_data: str | None = None,
+        load_vp: bool = False,
+    ) -> dict[str, Any]:
+        """Run validation on the model using either text or visual prompt embeddings.
+        This method validates the model using either text prompts or visual prompts, depending on the load_vp flag. It
+        supports validation during training (using a trainer object) or standalone validation with a provided model. For
+        visual prompts, reference data can be specified to extract embeddings from a different dataset.
         Args:
             trainer (object, optional): Trainer object containing the model and device.
-            model (YOLOEModel, optional): Model to validate. Required if `trainer` is not provided.
+            model (YOLOEModel | str, optional): Model to validate. Required if trainer is not provided.
             refer_data (str, optional): Path to reference data for visual prompts.
             load_vp (bool): Whether to load visual prompts. If False, text prompts are used.
@@ -140,7 +155,7 @@ class YOLOEDetectValidator(DetectionValidator):
         if trainer is not None:
             self.device = trainer.device
             model = trainer.ema.ema
-            names = [name.split("/")[0] for name in list(self.dataloader.dataset.data["names"].values())]
+            names = [name.split("/", 1)[0] for name in list(self.dataloader.dataset.data["names"].values())]
             if load_vp:
                 LOGGER.info("Validate using the visual prompt.")
@@ -156,15 +171,15 @@ class YOLOEDetectValidator(DetectionValidator):
         else:
             if refer_data is not None:
                 assert load_vp, "Refer data is only used for visual prompt validation."
-            self.device = select_device(self.args.device)
+            self.device = select_device(self.args.device, verbose=False)
-            if isinstance(model, str):
-                from ultralytics.nn.tasks import attempt_load_weights
+            if isinstance(model, (str, Path)):
+                from ultralytics.nn.tasks import load_checkpoint
-                model = attempt_load_weights(model, device=self.device, inplace=True)
+                model, _ = load_checkpoint(model, device=self.device)  # model, ckpt
             model.eval().to(self.device)
             data = check_det_dataset(refer_data or self.args.data)
-            names = [name.split("/")[0] for name in list(data["names"].values())]
+            names = [name.split("/", 1)[0] for name in list(data["names"].values())]
             if load_vp:
                 LOGGER.info("Validate using the visual prompt.")

ultralytics/nn/__init__.py CHANGED Viewed

@@ -5,25 +5,23 @@ from .tasks import (
     ClassificationModel,
     DetectionModel,
     SegmentationModel,
-    attempt_load_one_weight,
-    attempt_load_weights,
     guess_model_scale,
     guess_model_task,
+    load_checkpoint,
     parse_model,
     torch_safe_load,
     yaml_model_load,
 )
 __all__ = (
-    "attempt_load_one_weight",
-    "attempt_load_weights",
-    "parse_model",
-    "yaml_model_load",
-    "guess_model_task",
-    "guess_model_scale",
-    "torch_safe_load",
+    "BaseModel",
+    "ClassificationModel",
     "DetectionModel",
     "SegmentationModel",
-    "ClassificationModel",
-    "BaseModel",
+    "guess_model_scale",
+    "guess_model_task",
+    "load_checkpoint",
+    "parse_model",
+    "torch_safe_load",
+    "yaml_model_load",
 )

dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl