PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (243) hide show

{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
tests/__init__.py +5 -7
tests/conftest.py +8 -15
tests/test_cli.py +8 -10
tests/test_cuda.py +9 -10
tests/test_engine.py +29 -2
tests/test_exports.py +69 -21
tests/test_integrations.py +8 -11
tests/test_python.py +109 -71
tests/test_solutions.py +170 -159
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +57 -64
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/Objects365.yaml +19 -15
ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +19 -21
ultralytics/cfg/datasets/VisDrone.yaml +5 -5
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco-pose.yaml +24 -2
ultralytics/cfg/datasets/coco.yaml +2 -2
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/dog-pose.yaml +28 -0
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
ultralytics/cfg/datasets/kitti.yaml +27 -0
ultralytics/cfg/datasets/lvis.yaml +7 -7
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/default.yaml +96 -94
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
ultralytics/cfg/models/v6/yolov6.yaml +1 -1
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +3 -4
ultralytics/data/augment.py +286 -476
ultralytics/data/base.py +18 -26
ultralytics/data/build.py +151 -26
ultralytics/data/converter.py +38 -50
ultralytics/data/dataset.py +47 -75
ultralytics/data/loaders.py +42 -49
ultralytics/data/split.py +5 -6
ultralytics/data/split_dota.py +8 -15
ultralytics/data/utils.py +41 -45
ultralytics/engine/exporter.py +462 -462
ultralytics/engine/model.py +150 -191
ultralytics/engine/predictor.py +30 -40
ultralytics/engine/results.py +177 -311
ultralytics/engine/trainer.py +193 -120
ultralytics/engine/tuner.py +77 -63
ultralytics/engine/validator.py +39 -22
ultralytics/hub/__init__.py +16 -19
ultralytics/hub/auth.py +6 -12
ultralytics/hub/google/__init__.py +7 -10
ultralytics/hub/session.py +15 -25
ultralytics/hub/utils.py +5 -8
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +8 -10
ultralytics/models/fastsam/predict.py +19 -30
ultralytics/models/fastsam/utils.py +1 -2
ultralytics/models/fastsam/val.py +5 -7
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +5 -8
ultralytics/models/nas/predict.py +7 -9
ultralytics/models/nas/val.py +1 -2
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +7 -8
ultralytics/models/rtdetr/predict.py +15 -19
ultralytics/models/rtdetr/train.py +10 -13
ultralytics/models/rtdetr/val.py +21 -23
ultralytics/models/sam/__init__.py +15 -2
ultralytics/models/sam/amg.py +14 -20
ultralytics/models/sam/build.py +26 -19
ultralytics/models/sam/build_sam3.py +377 -0
ultralytics/models/sam/model.py +29 -32
ultralytics/models/sam/modules/blocks.py +83 -144
ultralytics/models/sam/modules/decoders.py +22 -40
ultralytics/models/sam/modules/encoders.py +44 -101
ultralytics/models/sam/modules/memory_attention.py +16 -30
ultralytics/models/sam/modules/sam.py +206 -79
ultralytics/models/sam/modules/tiny_encoder.py +64 -83
ultralytics/models/sam/modules/transformer.py +18 -28
ultralytics/models/sam/modules/utils.py +174 -50
ultralytics/models/sam/predict.py +2268 -366
ultralytics/models/sam/sam3/__init__.py +3 -0
ultralytics/models/sam/sam3/decoder.py +546 -0
ultralytics/models/sam/sam3/encoder.py +529 -0
ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
ultralytics/models/sam/sam3/model_misc.py +199 -0
ultralytics/models/sam/sam3/necks.py +129 -0
ultralytics/models/sam/sam3/sam3_image.py +339 -0
ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
ultralytics/models/sam/sam3/vitdet.py +547 -0
ultralytics/models/sam/sam3/vl_combiner.py +160 -0
ultralytics/models/utils/loss.py +14 -26
ultralytics/models/utils/ops.py +13 -17
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +9 -12
ultralytics/models/yolo/classify/train.py +15 -41
ultralytics/models/yolo/classify/val.py +34 -32
ultralytics/models/yolo/detect/predict.py +8 -11
ultralytics/models/yolo/detect/train.py +13 -32
ultralytics/models/yolo/detect/val.py +75 -63
ultralytics/models/yolo/model.py +37 -53
ultralytics/models/yolo/obb/predict.py +5 -14
ultralytics/models/yolo/obb/train.py +11 -14
ultralytics/models/yolo/obb/val.py +42 -39
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +7 -22
ultralytics/models/yolo/pose/train.py +10 -22
ultralytics/models/yolo/pose/val.py +40 -59
ultralytics/models/yolo/segment/predict.py +16 -20
ultralytics/models/yolo/segment/train.py +3 -12
ultralytics/models/yolo/segment/val.py +106 -56
ultralytics/models/yolo/world/train.py +12 -16
ultralytics/models/yolo/world/train_world.py +11 -34
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +16 -23
ultralytics/models/yolo/yoloe/train.py +31 -56
ultralytics/models/yolo/yoloe/train_seg.py +5 -10
ultralytics/models/yolo/yoloe/val.py +16 -21
ultralytics/nn/__init__.py +7 -7
ultralytics/nn/autobackend.py +152 -80
ultralytics/nn/modules/__init__.py +60 -60
ultralytics/nn/modules/activation.py +4 -6
ultralytics/nn/modules/block.py +133 -217
ultralytics/nn/modules/conv.py +52 -97
ultralytics/nn/modules/head.py +64 -116
ultralytics/nn/modules/transformer.py +79 -89
ultralytics/nn/modules/utils.py +16 -21
ultralytics/nn/tasks.py +111 -156
ultralytics/nn/text_model.py +40 -67
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +11 -17
ultralytics/solutions/analytics.py +15 -16
ultralytics/solutions/config.py +5 -6
ultralytics/solutions/distance_calculation.py +10 -13
ultralytics/solutions/heatmap.py +7 -13
ultralytics/solutions/instance_segmentation.py +5 -8
ultralytics/solutions/object_blurrer.py +7 -10
ultralytics/solutions/object_counter.py +12 -19
ultralytics/solutions/object_cropper.py +8 -14
ultralytics/solutions/parking_management.py +33 -31
ultralytics/solutions/queue_management.py +10 -12
ultralytics/solutions/region_counter.py +9 -12
ultralytics/solutions/security_alarm.py +15 -20
ultralytics/solutions/similarity_search.py +13 -17
ultralytics/solutions/solutions.py +75 -74
ultralytics/solutions/speed_estimation.py +7 -10
ultralytics/solutions/streamlit_inference.py +4 -7
ultralytics/solutions/templates/similarity-search.html +7 -18
ultralytics/solutions/trackzone.py +7 -10
ultralytics/solutions/vision_eye.py +5 -8
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +3 -5
ultralytics/trackers/bot_sort.py +10 -27
ultralytics/trackers/byte_tracker.py +14 -30
ultralytics/trackers/track.py +3 -6
ultralytics/trackers/utils/gmc.py +11 -22
ultralytics/trackers/utils/kalman_filter.py +37 -48
ultralytics/trackers/utils/matching.py +12 -15
ultralytics/utils/__init__.py +116 -116
ultralytics/utils/autobatch.py +2 -4
ultralytics/utils/autodevice.py +17 -18
ultralytics/utils/benchmarks.py +70 -70
ultralytics/utils/callbacks/base.py +8 -10
ultralytics/utils/callbacks/clearml.py +5 -13
ultralytics/utils/callbacks/comet.py +32 -46
ultralytics/utils/callbacks/dvc.py +13 -18
ultralytics/utils/callbacks/mlflow.py +4 -5
ultralytics/utils/callbacks/neptune.py +7 -15
ultralytics/utils/callbacks/platform.py +314 -38
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +23 -31
ultralytics/utils/callbacks/wb.py +10 -13
ultralytics/utils/checks.py +151 -87
ultralytics/utils/cpu.py +3 -8
ultralytics/utils/dist.py +19 -15
ultralytics/utils/downloads.py +29 -41
ultralytics/utils/errors.py +6 -14
ultralytics/utils/events.py +2 -4
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +16 -16
ultralytics/utils/export/imx.py +325 -0
ultralytics/utils/export/tensorflow.py +231 -0
ultralytics/utils/files.py +24 -28
ultralytics/utils/git.py +9 -11
ultralytics/utils/instance.py +30 -51
ultralytics/utils/logger.py +212 -114
ultralytics/utils/loss.py +15 -24
ultralytics/utils/metrics.py +131 -160
ultralytics/utils/nms.py +21 -30
ultralytics/utils/ops.py +107 -165
ultralytics/utils/patches.py +33 -21
ultralytics/utils/plotting.py +122 -119
ultralytics/utils/tal.py +28 -44
ultralytics/utils/torch_utils.py +70 -187
ultralytics/utils/tqdm.py +20 -20
ultralytics/utils/triton.py +13 -19
ultralytics/utils/tuner.py +17 -5
dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0

ultralytics/models/yolo/segment/train.py CHANGED Viewed

@@ -8,12 +8,10 @@ from pathlib import Path
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import SegmentationModel
 from ultralytics.utils import DEFAULT_CFG, RANK
-from ultralytics.utils.plotting import plot_results
 class SegmentationTrainer(yolo.detect.DetectionTrainer):
-    """
-    A class extending the DetectionTrainer class for training based on a segmentation model.
+    """A class extending the DetectionTrainer class for training based on a segmentation model.
     This trainer specializes in handling segmentation tasks, extending the detection trainer with segmentation-specific
     functionality including model initialization, validation, and visualization.
@@ -29,8 +27,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides: dict | None = None, _callbacks=None):
-        """
-        Initialize a SegmentationTrainer object.
+        """Initialize a SegmentationTrainer object.
         Args:
             cfg (dict): Configuration dictionary with default training settings.
@@ -41,11 +38,9 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
             overrides = {}
         overrides["task"] = "segment"
         super().__init__(cfg, overrides, _callbacks)
-        self.dynamic_tensors = ["batch_idx", "cls", "bboxes", "masks"]
     def get_model(self, cfg: dict | str | None = None, weights: str | Path | None = None, verbose: bool = True):
-        """
-        Initialize and return a SegmentationModel with specified configuration and weights.
+        """Initialize and return a SegmentationModel with specified configuration and weights.
         Args:
             cfg (dict | str, optional): Model configuration. Can be a dictionary, a path to a YAML file, or None.
@@ -72,7 +67,3 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
         return yolo.segment.SegmentationValidator(
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
         )
-    def plot_metrics(self):
-        """Plot training/validation metrics."""
-        plot_results(file=self.csv, segment=True, on_plot=self.on_plot)  # save results.png

ultralytics/models/yolo/segment/val.py CHANGED Viewed

@@ -2,7 +2,6 @@
 from __future__ import annotations
-from multiprocessing.pool import ThreadPool
 from pathlib import Path
 from typing import Any
@@ -11,17 +10,16 @@ import torch
 import torch.nn.functional as F
 from ultralytics.models.yolo.detect import DetectionValidator
-from ultralytics.utils import LOGGER, NUM_THREADS, ops
+from ultralytics.utils import LOGGER, ops
 from ultralytics.utils.checks import check_requirements
 from ultralytics.utils.metrics import SegmentMetrics, mask_iou
 class SegmentationValidator(DetectionValidator):
-    """
-    A class extending the DetectionValidator class for validation based on a segmentation model.
+    """A class extending the DetectionValidator class for validation based on a segmentation model.
-    This validator handles the evaluation of segmentation models, processing both bounding box and mask predictions
-    to compute metrics such as mAP for both detection and segmentation tasks.
+    This validator handles the evaluation of segmentation models, processing both bounding box and mask predictions to
+    compute metrics such as mAP for both detection and segmentation tasks.
     Attributes:
         plot_masks (list): List to store masks for plotting.
@@ -38,11 +36,10 @@ class SegmentationValidator(DetectionValidator):
     """
     def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None) -> None:
-        """
-        Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.
+        """Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.
         Args:
-            dataloader (torch.utils.data.DataLoader, optional): Dataloader to use for validation.
+            dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
             save_dir (Path, optional): Directory to save results.
             args (namespace, optional): Arguments for the validator.
             _callbacks (list, optional): List of callback functions.
@@ -53,8 +50,7 @@ class SegmentationValidator(DetectionValidator):
         self.metrics = SegmentMetrics()
     def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
-        """
-        Preprocess batch of images for YOLO segmentation validation.
+        """Preprocess batch of images for YOLO segmentation validation.
         Args:
             batch (dict[str, Any]): Batch containing images and annotations.
@@ -67,8 +63,7 @@ class SegmentationValidator(DetectionValidator):
         return batch
     def init_metrics(self, model: torch.nn.Module) -> None:
-        """
-        Initialize metrics and select mask processing function based on save_json flag.
+        """Initialize metrics and select mask processing function based on save_json flag.
         Args:
             model (torch.nn.Module): Model to validate.
@@ -96,8 +91,7 @@ class SegmentationValidator(DetectionValidator):
         )
     def postprocess(self, preds: list[torch.Tensor]) -> list[dict[str, torch.Tensor]]:
-        """
-        Post-process YOLO predictions and return output detections with proto.
+        """Post-process YOLO predictions and return output detections with proto.
         Args:
             preds (list[torch.Tensor]): Raw predictions from the model.
@@ -112,7 +106,7 @@ class SegmentationValidator(DetectionValidator):
             coefficient = pred.pop("extra")
             pred["masks"] = (
                 self.process(proto[i], coefficient, pred["bboxes"], shape=imgsz)
-                if len(coefficient)
+                if coefficient.shape[0]
                 else torch.zeros(
                     (0, *(imgsz if self.process is ops.process_mask_native else proto.shape[2:])),
                     dtype=torch.uint8,
@@ -122,8 +116,7 @@ class SegmentationValidator(DetectionValidator):
         return preds
     def _prepare_batch(self, si: int, batch: dict[str, Any]) -> dict[str, Any]:
-        """
-        Prepare a batch for training or inference by processing images and targets.
+        """Prepare a batch for training or inference by processing images and targets.
         Args:
             si (int): Batch index.
@@ -133,22 +126,23 @@ class SegmentationValidator(DetectionValidator):
             (dict[str, Any]): Prepared batch with processed annotations.
         """
         prepared_batch = super()._prepare_batch(si, batch)
-        nl = len(prepared_batch["cls"])
+        nl = prepared_batch["cls"].shape[0]
         if self.args.overlap_mask:
             masks = batch["masks"][si]
             index = torch.arange(1, nl + 1, device=masks.device).view(nl, 1, 1)
             masks = (masks == index).float()
         else:
             masks = batch["masks"][batch["batch_idx"] == si]
-        if nl and self.process is ops.process_mask_native:
-            masks = F.interpolate(masks[None], prepared_batch["imgsz"], mode="bilinear", align_corners=False)[0]
-            masks = masks.gt_(0.5)
+        if nl:
+            mask_size = [s if self.process is ops.process_mask_native else s // 4 for s in prepared_batch["imgsz"]]
+            if masks.shape[1:] != mask_size:
+                masks = F.interpolate(masks[None], mask_size, mode="bilinear", align_corners=False)[0]
+                masks = masks.gt_(0.5)
         prepared_batch["masks"] = masks
         return prepared_batch
     def _process_batch(self, preds: dict[str, torch.Tensor], batch: dict[str, Any]) -> dict[str, np.ndarray]:
-        """
-        Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
+        """Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
         Args:
             preds (dict[str, torch.Tensor]): Dictionary containing predictions with keys like 'cls' and 'masks'.
@@ -157,28 +151,27 @@ class SegmentationValidator(DetectionValidator):
         Returns:
             (dict[str, np.ndarray]): A dictionary containing correct prediction matrices including 'tp_m' for mask IoU.
-        Notes:
-            - If `masks` is True, the function computes IoU between predicted and ground truth masks.
-            - If `overlap` is True and `masks` is True, overlapping masks are taken into account when computing IoU.
         Examples:
             >>> preds = {"cls": torch.tensor([1, 0]), "masks": torch.rand(2, 640, 640), "bboxes": torch.rand(2, 4)}
             >>> batch = {"cls": torch.tensor([1, 0]), "masks": torch.rand(2, 640, 640), "bboxes": torch.rand(2, 4)}
             >>> correct_preds = validator._process_batch(preds, batch)
+        Notes:
+            - If `masks` is True, the function computes IoU between predicted and ground truth masks.
+            - If `overlap` is True and `masks` is True, overlapping masks are taken into account when computing IoU.
         """
         tp = super()._process_batch(preds, batch)
         gt_cls = batch["cls"]
-        if len(gt_cls) == 0 or len(preds["cls"]) == 0:
-            tp_m = np.zeros((len(preds["cls"]), self.niou), dtype=bool)
+        if gt_cls.shape[0] == 0 or preds["cls"].shape[0] == 0:
+            tp_m = np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)
         else:
-            iou = mask_iou(batch["masks"].flatten(1), preds["masks"].flatten(1))
+            iou = mask_iou(batch["masks"].flatten(1), preds["masks"].flatten(1).float())  # float, uint8
             tp_m = self.match_predictions(preds["cls"], gt_cls, iou).cpu().numpy()
         tp.update({"tp_m": tp_m})  # update tp with mask IoU
         return tp
     def plot_predictions(self, batch: dict[str, Any], preds: list[dict[str, torch.Tensor]], ni: int) -> None:
-        """
-        Plot batch predictions with masks and bounding boxes.
+        """Plot batch predictions with masks and bounding boxes.
         Args:
             batch (dict[str, Any]): Batch containing images and annotations.
@@ -187,14 +180,13 @@ class SegmentationValidator(DetectionValidator):
         """
         for p in preds:
             masks = p["masks"]
-            if masks.shape[0] > 50:
-                LOGGER.warning("Limiting validation plots to first 50 items per image for speed...")
-            p["masks"] = torch.as_tensor(masks[:50], dtype=torch.uint8).cpu()
-        super().plot_predictions(batch, preds, ni, max_det=50)  # plot bboxes
+            if masks.shape[0] > self.args.max_det:
+                LOGGER.warning(f"Limiting validation plots to 'max_det={self.args.max_det}' items.")
+            p["masks"] = torch.as_tensor(masks[: self.args.max_det], dtype=torch.uint8).cpu()
+        super().plot_predictions(batch, preds, ni, max_det=self.args.max_det)  # plot bboxes
     def save_one_txt(self, predn: torch.Tensor, save_conf: bool, shape: tuple[int, int], file: Path) -> None:
-        """
-        Save YOLO detections to a txt file in normalized coordinates in a specific format.
+        """Save YOLO detections to a txt file in normalized coordinates in a specific format.
         Args:
             predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
@@ -213,24 +205,84 @@ class SegmentationValidator(DetectionValidator):
         ).save_txt(file, save_conf=save_conf)
     def pred_to_json(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> None:
-        """
-        Save one JSON result for COCO evaluation.
+        """Save one JSON result for COCO evaluation.
         Args:
             predn (dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
             pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         """
-        from faster_coco_eval.core.mask import encode  # noqa
-        def single_encode(x):
-            """Encode predicted masks as RLE and append results to jdict."""
-            rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
-            rle["counts"] = rle["counts"].decode("utf-8")
-            return rle
-        pred_masks = np.transpose(predn["masks"], (2, 0, 1))
-        with ThreadPool(NUM_THREADS) as pool:
-            rles = pool.map(single_encode, pred_masks)
+        def to_string(counts: list[int]) -> str:
+            """Converts the RLE object into a compact string representation. Each count is delta-encoded and
+            variable-length encoded as a string.
+            Args:
+                counts (list[int]): List of RLE counts.
+            """
+            result = []
+            for i in range(len(counts)):
+                x = int(counts[i])
+                # Apply delta encoding for all counts after the second entry
+                if i > 2:
+                    x -= int(counts[i - 2])
+                # Variable-length encode the value
+                while True:
+                    c = x & 0x1F  # Take 5 bits
+                    x >>= 5
+                    # If the sign bit (0x10) is set, continue if x != -1;
+                    # otherwise, continue if x != 0
+                    more = (x != -1) if (c & 0x10) else (x != 0)
+                    if more:
+                        c |= 0x20  # Set continuation bit
+                    c += 48  # Shift to ASCII
+                    result.append(chr(c))
+                    if not more:
+                        break
+            return "".join(result)
+        def multi_encode(pixels: torch.Tensor) -> list[int]:
+            """Convert multiple binary masks using Run-Length Encoding (RLE).
+            Args:
+                pixels (torch.Tensor): A 2D tensor where each row represents a flattened binary mask with shape [N,
+                    H*W].
+            Returns:
+                (list[int]): A list of RLE counts for each mask.
+            """
+            transitions = pixels[:, 1:] != pixels[:, :-1]
+            row_idx, col_idx = torch.where(transitions)
+            col_idx = col_idx + 1
+            # Compute run lengths
+            counts = []
+            for i in range(pixels.shape[0]):
+                positions = col_idx[row_idx == i]
+                if len(positions):
+                    count = torch.diff(positions).tolist()
+                    count.insert(0, positions[0].item())
+                    count.append(len(pixels[i]) - positions[-1].item())
+                else:
+                    count = [len(pixels[i])]
+                # Ensure starting with background (0) count
+                if pixels[i][0].item() == 1:
+                    count = [0, *count]
+                counts.append(count)
+            return counts
+        pred_masks = predn["masks"].transpose(2, 1).contiguous().view(len(predn["masks"]), -1)  # N, H*W
+        h, w = predn["masks"].shape[1:3]
+        counts = multi_encode(pred_masks)
+        rles = []
+        for c in counts:
+            rles.append({"size": [h, w], "counts": to_string(c)})
         super().pred_to_json(predn, pbatch)
         for i, r in enumerate(rles):
             self.jdict[-len(rles) + i]["segmentation"] = r  # segmentation
@@ -239,11 +291,9 @@ class SegmentationValidator(DetectionValidator):
         """Scales predictions to the original image size."""
         return {
             **super().scale_preds(predn, pbatch),
-            "masks": ops.scale_image(
-                torch.as_tensor(predn["masks"], dtype=torch.uint8).permute(1, 2, 0).contiguous().cpu().numpy(),
-                pbatch["ori_shape"],
-                ratio_pad=pbatch["ratio_pad"],
-            ),
+            "masks": ops.scale_masks(predn["masks"][None], pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"])[
+                0
+            ].byte(),
         }
     def eval_json(self, stats: dict[str, Any]) -> dict[str, Any]:

ultralytics/models/yolo/world/train.py CHANGED Viewed

@@ -24,8 +24,7 @@ def on_pretrain_routine_end(trainer) -> None:
 class WorldTrainer(DetectionTrainer):
-    """
-    A trainer class for fine-tuning YOLO World models on close-set datasets.
+    """A trainer class for fine-tuning YOLO World models on close-set datasets.
     This trainer extends the DetectionTrainer to support training YOLO World models, which combine visual and textual
     features for improved object detection and understanding. It handles text embedding generation and caching to
@@ -54,8 +53,7 @@ class WorldTrainer(DetectionTrainer):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides: dict[str, Any] | None = None, _callbacks=None):
-        """
-        Initialize a WorldTrainer object with given arguments.
+        """Initialize a WorldTrainer object with given arguments.
         Args:
             cfg (dict[str, Any]): Configuration for the trainer.
@@ -64,12 +62,12 @@ class WorldTrainer(DetectionTrainer):
         """
         if overrides is None:
             overrides = {}
+        assert not overrides.get("compile"), f"Training with 'model={overrides['model']}' requires 'compile=False'"
         super().__init__(cfg, overrides, _callbacks)
         self.text_embeddings = None
     def get_model(self, cfg=None, weights: str | None = None, verbose: bool = True) -> WorldModel:
-        """
-        Return WorldModel initialized with specified config and weights.
+        """Return WorldModel initialized with specified config and weights.
         Args:
             cfg (dict[str, Any] | str, optional): Model configuration.
@@ -94,8 +92,7 @@ class WorldTrainer(DetectionTrainer):
         return model
     def build_dataset(self, img_path: str, mode: str = "train", batch: int | None = None):
-        """
-        Build YOLO Dataset for training or validation.
+        """Build YOLO Dataset for training or validation.
         Args:
             img_path (str): Path to the folder containing images.
@@ -114,11 +111,10 @@ class WorldTrainer(DetectionTrainer):
         return dataset
     def set_text_embeddings(self, datasets: list[Any], batch: int | None) -> None:
-        """
-        Set text embeddings for datasets to accelerate training by caching category names.
+        """Set text embeddings for datasets to accelerate training by caching category names.
-        This method collects unique category names from all datasets, then generates and caches text embeddings
-        for these categories to improve training efficiency.
+        This method collects unique category names from all datasets, then generates and caches text embeddings for
+        these categories to improve training efficiency.
         Args:
             datasets (list[Any]): List of datasets from which to extract category names.
@@ -140,8 +136,7 @@ class WorldTrainer(DetectionTrainer):
         self.text_embeddings = text_embeddings
     def generate_text_embeddings(self, texts: list[str], batch: int, cache_dir: Path) -> dict[str, torch.Tensor]:
-        """
-        Generate text embeddings for a list of text samples.
+        """Generate text embeddings for a list of text samples.
         Args:
             texts (list[str]): List of text samples to encode.
@@ -171,7 +166,8 @@ class WorldTrainer(DetectionTrainer):
         # Add text features
         texts = list(itertools.chain(*batch["texts"]))
-        txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device, non_blocking=True)
-        txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
+        txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(
+            self.device, non_blocking=self.device.type == "cuda"
+        )
         batch["txt_feats"] = txt_feats.reshape(len(batch["texts"]), -1, txt_feats.shape[-1])
         return batch

ultralytics/models/yolo/world/train_world.py CHANGED Viewed

@@ -10,8 +10,7 @@ from ultralytics.utils.torch_utils import unwrap_model
 class WorldTrainerFromScratch(WorldTrainer):
-    """
-    A class extending the WorldTrainer for training a world model from scratch on open-set datasets.
+    """A class extending the WorldTrainer for training a world model from scratch on open-set datasets.
     This trainer specializes in handling mixed datasets including both object detection and grounding datasets,
     supporting training YOLO-World models with combined vision-language capabilities.
@@ -53,45 +52,25 @@ class WorldTrainerFromScratch(WorldTrainer):
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        """
-        Initialize a WorldTrainerFromScratch object.
+        """Initialize a WorldTrainerFromScratch object.
-        This initializes a trainer for YOLO-World models from scratch, supporting mixed datasets including both
-        object detection and grounding datasets for vision-language capabilities.
+        This initializes a trainer for YOLO-World models from scratch, supporting mixed datasets including both object
+        detection and grounding datasets for vision-language capabilities.
         Args:
             cfg (dict): Configuration dictionary with default parameters for model training.
             overrides (dict, optional): Dictionary of parameter overrides to customize the configuration.
             _callbacks (list, optional): List of callback functions to be executed during different stages of training.
-        Examples:
-            >>> from ultralytics.models.yolo.world.train_world import WorldTrainerFromScratch
-            >>> from ultralytics import YOLOWorld
-            >>> data = dict(
-            ...     train=dict(
-            ...         yolo_data=["Objects365.yaml"],
-            ...         grounding_data=[
-            ...             dict(
-            ...                 img_path="flickr30k/images",
-            ...                 json_file="flickr30k/final_flickr_separateGT_train.json",
-            ...             ),
-            ...         ],
-            ...     ),
-            ...     val=dict(yolo_data=["lvis.yaml"]),
-            ... )
-            >>> model = YOLOWorld("yolov8s-worldv2.yaml")
-            >>> model.train(data=data, trainer=WorldTrainerFromScratch)
         """
         if overrides is None:
             overrides = {}
         super().__init__(cfg, overrides, _callbacks)
     def build_dataset(self, img_path, mode="train", batch=None):
-        """
-        Build YOLO Dataset for training or validation.
+        """Build YOLO Dataset for training or validation.
-        This method constructs appropriate datasets based on the mode and input paths, handling both
-        standard YOLO datasets and grounding datasets with different formats.
+        This method constructs appropriate datasets based on the mode and input paths, handling both standard YOLO
+        datasets and grounding datasets with different formats.
         Args:
             img_path (list[str] | str): Path to the folder containing images or list of paths.
@@ -122,11 +101,10 @@ class WorldTrainerFromScratch(WorldTrainer):
         return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
     def get_dataset(self):
-        """
-        Get train and validation paths from data dictionary.
+        """Get train and validation paths from data dictionary.
-        Processes the data configuration to extract paths for training and validation datasets,
-        handling both YOLO detection datasets and grounding datasets.
+        Processes the data configuration to extract paths for training and validation datasets, handling both YOLO
+        detection datasets and grounding datasets.
         Returns:
             train_path (str): Train dataset path.
@@ -187,8 +165,7 @@ class WorldTrainerFromScratch(WorldTrainer):
         pass
     def final_eval(self):
-        """
-        Perform final evaluation and validation for the YOLO-World model.
+        """Perform final evaluation and validation for the YOLO-World model.
         Configures the validator with appropriate dataset and split information before running evaluation.

ultralytics/models/yolo/yoloe/__init__.py CHANGED Viewed

@@ -6,17 +6,17 @@ from .train_seg import YOLOEPESegTrainer, YOLOESegTrainer, YOLOESegTrainerFromSc
 from .val import YOLOEDetectValidator, YOLOESegValidator
 __all__ = [
-    "YOLOETrainer",
-    "YOLOEPETrainer",
-    "YOLOESegTrainer",
     "YOLOEDetectValidator",
-    "YOLOESegValidator",
+    "YOLOEPEFreeTrainer",
     "YOLOEPESegTrainer",
+    "YOLOEPETrainer",
+    "YOLOESegTrainer",
     "YOLOESegTrainerFromScratch",
     "YOLOESegVPTrainer",
-    "YOLOEVPTrainer",
-    "YOLOEPEFreeTrainer",
+    "YOLOESegValidator",
+    "YOLOETrainer",
+    "YOLOETrainerFromScratch",
     "YOLOEVPDetectPredictor",
     "YOLOEVPSegPredictor",
-    "YOLOETrainerFromScratch",
+    "YOLOEVPTrainer",
 ]

ultralytics/models/yolo/yoloe/predict.py CHANGED Viewed

@@ -9,11 +9,10 @@ from ultralytics.models.yolo.segment import SegmentationPredictor
 class YOLOEVPDetectPredictor(DetectionPredictor):
-    """
-    A mixin class for YOLO-EVP (Enhanced Visual Prompting) predictors.
+    """A mixin class for YOLO-EVP (Enhanced Visual Prompting) predictors.
-    This mixin provides common functionality for YOLO models that use visual prompting, including
-    model setup, prompt handling, and preprocessing transformations.
+    This mixin provides common functionality for YOLO models that use visual prompting, including model setup, prompt
+    handling, and preprocessing transformations.
     Attributes:
         model (torch.nn.Module): The YOLO model for inference.
@@ -29,8 +28,7 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
     """
     def setup_model(self, model, verbose: bool = True):
-        """
-        Set up the model for prediction.
+        """Set up the model for prediction.
         Args:
             model (torch.nn.Module): Model to load or use.
@@ -40,21 +38,19 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
         self.done_warmup = True
     def set_prompts(self, prompts):
-        """
-        Set the visual prompts for the model.
+        """Set the visual prompts for the model.
         Args:
-            prompts (dict): Dictionary containing class indices and bounding boxes or masks.
-                Must include a 'cls' key with class indices.
+            prompts (dict): Dictionary containing class indices and bounding boxes or masks. Must include a 'cls' key
+                with class indices.
         """
         self.prompts = prompts
     def pre_transform(self, im):
-        """
-        Preprocess images and prompts before inference.
+        """Preprocess images and prompts before inference.
-        This method applies letterboxing to the input image and transforms the visual prompts
-        (bounding boxes or masks) accordingly.
+        This method applies letterboxing to the input image and transforms the visual prompts (bounding boxes or masks)
+        accordingly.
         Args:
             im (list): List containing a single input image.
@@ -94,8 +90,7 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
         return img
     def _process_single_image(self, dst_shape, src_shape, category, bboxes=None, masks=None):
-        """
-        Process a single image by resizing bounding boxes or masks and generating visuals.
+        """Process a single image by resizing bounding boxes or masks and generating visuals.
         Args:
             dst_shape (tuple): The target shape (height, width) of the image.
@@ -131,8 +126,7 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
         return LoadVisualPrompt().get_visuals(category, dst_shape, bboxes, masks)
     def inference(self, im, *args, **kwargs):
-        """
-        Run inference with visual prompts.
+        """Run inference with visual prompts.
         Args:
             im (torch.Tensor): Input image tensor.
@@ -145,13 +139,12 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
         return super().inference(im, vpe=self.prompts, *args, **kwargs)
     def get_vpe(self, source):
-        """
-        Process the source to get the visual prompt embeddings (VPE).
+        """Process the source to get the visual prompt embeddings (VPE).
         Args:
-            source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source
-                of the image to make predictions on. Accepts various types including file paths, URLs, PIL
-                images, numpy arrays, and torch tensors.
+            source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source of the image to
+                make predictions on. Accepts various types including file paths, URLs, PIL images, numpy arrays, and
+                torch tensors.
         Returns:
             (torch.Tensor): The visual prompt embeddings (VPE) from the model.

dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl