PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
tests/__init__.py +7 -6
tests/conftest.py +15 -39
tests/test_cli.py +17 -17
tests/test_cuda.py +17 -8
tests/test_engine.py +36 -10
tests/test_exports.py +98 -37
tests/test_integrations.py +12 -15
tests/test_python.py +126 -82
tests/test_solutions.py +319 -135
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +83 -87
ultralytics/cfg/datasets/Argoverse.yaml +4 -4
ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
ultralytics/cfg/datasets/ImageNet.yaml +3 -3
ultralytics/cfg/datasets/Objects365.yaml +24 -20
ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
ultralytics/cfg/datasets/VOC.yaml +10 -13
ultralytics/cfg/datasets/VisDrone.yaml +43 -33
ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
ultralytics/cfg/datasets/coco-pose.yaml +26 -4
ultralytics/cfg/datasets/coco.yaml +4 -4
ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
ultralytics/cfg/datasets/coco128.yaml +2 -2
ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
ultralytics/cfg/datasets/coco8.yaml +2 -2
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/crack-seg.yaml +5 -5
ultralytics/cfg/datasets/dog-pose.yaml +32 -4
ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
ultralytics/cfg/datasets/lvis.yaml +9 -9
ultralytics/cfg/datasets/medical-pills.yaml +4 -5
ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
ultralytics/cfg/datasets/package-seg.yaml +5 -5
ultralytics/cfg/datasets/signature.yaml +4 -4
ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
ultralytics/cfg/datasets/xView.yaml +5 -5
ultralytics/cfg/default.yaml +96 -93
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +12 -12
ultralytics/data/augment.py +531 -564
ultralytics/data/base.py +76 -81
ultralytics/data/build.py +206 -42
ultralytics/data/converter.py +179 -78
ultralytics/data/dataset.py +121 -121
ultralytics/data/loaders.py +114 -91
ultralytics/data/split.py +28 -15
ultralytics/data/split_dota.py +67 -48
ultralytics/data/utils.py +110 -89
ultralytics/engine/exporter.py +422 -460
ultralytics/engine/model.py +224 -252
ultralytics/engine/predictor.py +94 -89
ultralytics/engine/results.py +345 -595
ultralytics/engine/trainer.py +231 -134
ultralytics/engine/tuner.py +279 -73
ultralytics/engine/validator.py +53 -46
ultralytics/hub/__init__.py +26 -28
ultralytics/hub/auth.py +30 -16
ultralytics/hub/google/__init__.py +34 -36
ultralytics/hub/session.py +53 -77
ultralytics/hub/utils.py +23 -109
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +36 -18
ultralytics/models/fastsam/predict.py +33 -44
ultralytics/models/fastsam/utils.py +4 -5
ultralytics/models/fastsam/val.py +12 -14
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +16 -20
ultralytics/models/nas/predict.py +12 -14
ultralytics/models/nas/val.py +4 -5
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +9 -9
ultralytics/models/rtdetr/predict.py +22 -17
ultralytics/models/rtdetr/train.py +20 -16
ultralytics/models/rtdetr/val.py +79 -59
ultralytics/models/sam/__init__.py +8 -2
ultralytics/models/sam/amg.py +53 -38
ultralytics/models/sam/build.py +29 -31
ultralytics/models/sam/model.py +33 -38
ultralytics/models/sam/modules/blocks.py +159 -182
ultralytics/models/sam/modules/decoders.py +38 -47
ultralytics/models/sam/modules/encoders.py +114 -133
ultralytics/models/sam/modules/memory_attention.py +38 -31
ultralytics/models/sam/modules/sam.py +114 -93
ultralytics/models/sam/modules/tiny_encoder.py +268 -291
ultralytics/models/sam/modules/transformer.py +59 -66
ultralytics/models/sam/modules/utils.py +55 -72
ultralytics/models/sam/predict.py +745 -341
ultralytics/models/utils/loss.py +118 -107
ultralytics/models/utils/ops.py +118 -71
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +28 -26
ultralytics/models/yolo/classify/train.py +50 -81
ultralytics/models/yolo/classify/val.py +68 -61
ultralytics/models/yolo/detect/predict.py +12 -15
ultralytics/models/yolo/detect/train.py +56 -46
ultralytics/models/yolo/detect/val.py +279 -223
ultralytics/models/yolo/model.py +167 -86
ultralytics/models/yolo/obb/predict.py +7 -11
ultralytics/models/yolo/obb/train.py +23 -25
ultralytics/models/yolo/obb/val.py +107 -99
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +12 -14
ultralytics/models/yolo/pose/train.py +31 -69
ultralytics/models/yolo/pose/val.py +119 -254
ultralytics/models/yolo/segment/predict.py +21 -25
ultralytics/models/yolo/segment/train.py +12 -66
ultralytics/models/yolo/segment/val.py +126 -305
ultralytics/models/yolo/world/train.py +53 -45
ultralytics/models/yolo/world/train_world.py +51 -32
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +30 -37
ultralytics/models/yolo/yoloe/train.py +89 -71
ultralytics/models/yolo/yoloe/train_seg.py +15 -17
ultralytics/models/yolo/yoloe/val.py +56 -41
ultralytics/nn/__init__.py +9 -11
ultralytics/nn/autobackend.py +179 -107
ultralytics/nn/modules/__init__.py +67 -67
ultralytics/nn/modules/activation.py +8 -7
ultralytics/nn/modules/block.py +302 -323
ultralytics/nn/modules/conv.py +61 -104
ultralytics/nn/modules/head.py +488 -186
ultralytics/nn/modules/transformer.py +183 -123
ultralytics/nn/modules/utils.py +15 -20
ultralytics/nn/tasks.py +327 -203
ultralytics/nn/text_model.py +81 -65
ultralytics/py.typed +1 -0
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +19 -27
ultralytics/solutions/analytics.py +36 -26
ultralytics/solutions/config.py +29 -28
ultralytics/solutions/distance_calculation.py +23 -24
ultralytics/solutions/heatmap.py +17 -19
ultralytics/solutions/instance_segmentation.py +21 -19
ultralytics/solutions/object_blurrer.py +16 -17
ultralytics/solutions/object_counter.py +48 -53
ultralytics/solutions/object_cropper.py +22 -16
ultralytics/solutions/parking_management.py +61 -58
ultralytics/solutions/queue_management.py +19 -19
ultralytics/solutions/region_counter.py +63 -50
ultralytics/solutions/security_alarm.py +22 -25
ultralytics/solutions/similarity_search.py +107 -60
ultralytics/solutions/solutions.py +343 -262
ultralytics/solutions/speed_estimation.py +35 -31
ultralytics/solutions/streamlit_inference.py +104 -40
ultralytics/solutions/templates/similarity-search.html +31 -24
ultralytics/solutions/trackzone.py +24 -24
ultralytics/solutions/vision_eye.py +11 -12
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +18 -27
ultralytics/trackers/bot_sort.py +48 -39
ultralytics/trackers/byte_tracker.py +94 -94
ultralytics/trackers/track.py +7 -16
ultralytics/trackers/utils/gmc.py +37 -69
ultralytics/trackers/utils/kalman_filter.py +68 -76
ultralytics/trackers/utils/matching.py +13 -17
ultralytics/utils/__init__.py +251 -275
ultralytics/utils/autobatch.py +19 -7
ultralytics/utils/autodevice.py +68 -38
ultralytics/utils/benchmarks.py +169 -130
ultralytics/utils/callbacks/base.py +12 -13
ultralytics/utils/callbacks/clearml.py +14 -15
ultralytics/utils/callbacks/comet.py +139 -66
ultralytics/utils/callbacks/dvc.py +19 -27
ultralytics/utils/callbacks/hub.py +8 -6
ultralytics/utils/callbacks/mlflow.py +6 -10
ultralytics/utils/callbacks/neptune.py +11 -19
ultralytics/utils/callbacks/platform.py +73 -0
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +9 -12
ultralytics/utils/callbacks/wb.py +33 -30
ultralytics/utils/checks.py +163 -114
ultralytics/utils/cpu.py +89 -0
ultralytics/utils/dist.py +24 -20
ultralytics/utils/downloads.py +176 -146
ultralytics/utils/errors.py +11 -13
ultralytics/utils/events.py +113 -0
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +81 -63
ultralytics/utils/export/imx.py +294 -0
ultralytics/utils/export/tensorflow.py +217 -0
ultralytics/utils/files.py +33 -36
ultralytics/utils/git.py +137 -0
ultralytics/utils/instance.py +105 -120
ultralytics/utils/logger.py +404 -0
ultralytics/utils/loss.py +99 -61
ultralytics/utils/metrics.py +649 -478
ultralytics/utils/nms.py +337 -0
ultralytics/utils/ops.py +263 -451
ultralytics/utils/patches.py +70 -31
ultralytics/utils/plotting.py +253 -223
ultralytics/utils/tal.py +48 -61
ultralytics/utils/torch_utils.py +244 -251
ultralytics/utils/tqdm.py +438 -0
ultralytics/utils/triton.py +22 -23
ultralytics/utils/tuner.py +11 -10
dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0

ultralytics/models/rtdetr/val.py CHANGED Viewed

@@ -1,5 +1,10 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
 import torch
 from ultralytics.data import YOLODataset
@@ -11,48 +16,61 @@ __all__ = ("RTDETRValidator",)  # tuple or list
 class RTDETRDataset(YOLODataset):
-    """
-    Real-Time DEtection and TRacking (RT-DETR) dataset class extending the base YOLODataset class.
+    """Real-Time DEtection and TRacking (RT-DETR) dataset class extending the base YOLODataset class.
     This specialized dataset class is designed for use with the RT-DETR object detection model and is optimized for
     real-time detection and tracking tasks.
+    Attributes:
+        augment (bool): Whether to apply data augmentation.
+        rect (bool): Whether to use rectangular training.
+        use_segments (bool): Whether to use segmentation masks.
+        use_keypoints (bool): Whether to use keypoint annotations.
+        imgsz (int): Target image size for training.
+    Methods:
+        load_image: Load one image from dataset index.
+        build_transforms: Build transformation pipeline for the dataset.
+    Examples:
+        Initialize an RT-DETR dataset
+        >>> dataset = RTDETRDataset(img_path="path/to/images", imgsz=640)
+        >>> image, hw = dataset.load_image(0)
     """
     def __init__(self, *args, data=None, **kwargs):
-        """
-        Initialize the RTDETRDataset class by inheriting from the YOLODataset class.
+        """Initialize the RTDETRDataset class by inheriting from the YOLODataset class.
         This constructor sets up a dataset specifically optimized for the RT-DETR (Real-Time DEtection and TRacking)
         model, building upon the base YOLODataset functionality.
         Args:
             *args (Any): Variable length argument list passed to the parent YOLODataset class.
-            data (Dict | None): Dictionary containing dataset information. If None, default values will be used.
+            data (dict | None): Dictionary containing dataset information. If None, default values will be used.
             **kwargs (Any): Additional keyword arguments passed to the parent YOLODataset class.
         """
         super().__init__(*args, data=data, **kwargs)
     def load_image(self, i, rect_mode=False):
-        """
-        Load one image from dataset index 'i'.
+        """Load one image from dataset index 'i'.
         Args:
             i (int): Index of the image to load.
             rect_mode (bool, optional): Whether to use rectangular mode for batch inference.
         Returns:
-            im (numpy.ndarray): The loaded image.
+            im (torch.Tensor): The loaded image.
             resized_hw (tuple): Height and width of the resized image with shape (2,).
         Examples:
-            >>> dataset = RTDETRDataset(...)
+            Load an image from the dataset
+            >>> dataset = RTDETRDataset(img_path="path/to/images")
             >>> image, hw = dataset.load_image(0)
         """
         return super().load_image(i=i, rect_mode=rect_mode)
     def build_transforms(self, hyp=None):
-        """
-        Build transformation pipeline for the dataset.
+        """Build transformation pipeline for the dataset.
         Args:
             hyp (dict, optional): Hyperparameters for transformations.
@@ -67,7 +85,7 @@ class RTDETRDataset(YOLODataset):
             transforms = v8_transforms(self, self.imgsz, hyp, stretch=True)
         else:
             # transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scale_fill=True)])
-            transforms = Compose([])
+            transforms = Compose([lambda x: {**x, **{"ratio_pad": [x["ratio_pad"], [0, 0]]}}])
         transforms.append(
             Format(
                 bbox_format="xywh",
@@ -83,30 +101,38 @@ class RTDETRDataset(YOLODataset):
 class RTDETRValidator(DetectionValidator):
-    """
-    RTDETRValidator extends the DetectionValidator class to provide validation capabilities specifically tailored for
+    """RTDETRValidator extends the DetectionValidator class to provide validation capabilities specifically tailored for
     the RT-DETR (Real-Time DETR) object detection model.
     The class allows building of an RTDETR-specific dataset for validation, applies Non-maximum suppression for
     post-processing, and updates evaluation metrics accordingly.
+    Attributes:
+        args (Namespace): Configuration arguments for validation.
+        data (dict): Dataset configuration dictionary.
+    Methods:
+        build_dataset: Build an RTDETR Dataset for validation.
+        postprocess: Apply Non-maximum suppression to prediction outputs.
     Examples:
+        Initialize and run RT-DETR validation
         >>> from ultralytics.models.rtdetr import RTDETRValidator
         >>> args = dict(model="rtdetr-l.pt", data="coco8.yaml")
         >>> validator = RTDETRValidator(args=args)
         >>> validator()
-    Note:
+    Notes:
         For further details on the attributes and methods, refer to the parent DetectionValidator class.
     """
     def build_dataset(self, img_path, mode="val", batch=None):
-        """
-        Build an RTDETR Dataset.
+        """Build an RTDETR Dataset.
         Args:
             img_path (str): Path to the folder containing images.
-            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
+            mode (str, optional): `train` mode or `val` mode, users are able to customize different augmentations for
+                each mode.
             batch (int, optional): Size of batches, this is for `rect`.
         Returns:
@@ -124,15 +150,21 @@ class RTDETRValidator(DetectionValidator):
             data=self.data,
         )
-    def postprocess(self, preds):
-        """
-        Apply Non-maximum suppression to prediction outputs.
+    def postprocess(
+        self, preds: torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor]
+    ) -> list[dict[str, torch.Tensor]]:
+        """Apply Non-maximum suppression to prediction outputs.
         Args:
-            preds (List | Tuple | torch.Tensor): Raw predictions from the model.
+            preds (torch.Tensor | list | tuple): Raw predictions from the model. If tensor, should have shape
+                (batch_size, num_predictions, num_classes + 4) where last dimension contains bbox coords and
+                class scores.
         Returns:
-            (List[torch.Tensor]): List of processed predictions for each image in batch.
+            (list[dict[str, torch.Tensor]]): List of dictionaries for each image, each containing:
+                - 'bboxes': Tensor of shape (N, 4) with bounding box coordinates
+                - 'conf': Tensor of shape (N,) with confidence scores
+                - 'cls': Tensor of shape (N,) with class indices
         """
         if not isinstance(preds, (list, tuple)):  # list for PyTorch inference but list[0] Tensor for export inference
             preds = [preds, None]
@@ -149,43 +181,31 @@ class RTDETRValidator(DetectionValidator):
             pred = pred[score.argsort(descending=True)]
             outputs[i] = pred[score > self.args.conf]
-        return outputs
-    def _prepare_batch(self, si, batch):
-        """
-        Prepares a batch for validation by applying necessary transformations.
-        Args:
-            si (int): Batch index.
-            batch (dict): Batch data containing images and annotations.
+        return [{"bboxes": x[:, :4], "conf": x[:, 4], "cls": x[:, 5]} for x in outputs]
-        Returns:
-            (dict): Prepared batch with transformed annotations.
-        """
-        idx = batch["batch_idx"] == si
-        cls = batch["cls"][idx].squeeze(-1)
-        bbox = batch["bboxes"][idx]
-        ori_shape = batch["ori_shape"][si]
-        imgsz = batch["img"].shape[2:]
-        ratio_pad = batch["ratio_pad"][si]
-        if len(cls):
-            bbox = ops.xywh2xyxy(bbox)  # target boxes
-            bbox[..., [0, 2]] *= ori_shape[1]  # native-space pred
-            bbox[..., [1, 3]] *= ori_shape[0]  # native-space pred
-        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
-    def _prepare_pred(self, pred, pbatch):
-        """
-        Prepares predictions by scaling bounding boxes to original image dimensions.
+    def pred_to_json(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> None:
+        """Serialize YOLO predictions to COCO json format.
         Args:
-            pred (torch.Tensor): Raw predictions.
-            pbatch (dict): Prepared batch information.
-        Returns:
-            (torch.Tensor): Predictions scaled to original image dimensions.
+            predn (dict[str, torch.Tensor]): Predictions dictionary containing 'bboxes', 'conf', and 'cls' keys with
+                bounding box coordinates, confidence scores, and class predictions.
+            pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
         """
-        predn = pred.clone()
-        predn[..., [0, 2]] *= pbatch["ori_shape"][1] / self.args.imgsz  # native-space pred
-        predn[..., [1, 3]] *= pbatch["ori_shape"][0] / self.args.imgsz  # native-space pred
-        return predn.float()
+        path = Path(pbatch["im_file"])
+        stem = path.stem
+        image_id = int(stem) if stem.isnumeric() else stem
+        box = predn["bboxes"].clone()
+        box[..., [0, 2]] *= pbatch["ori_shape"][1] / self.args.imgsz  # native-space pred
+        box[..., [1, 3]] *= pbatch["ori_shape"][0] / self.args.imgsz  # native-space pred
+        box = ops.xyxy2xywh(box)  # xywh
+        box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+        for b, s, c in zip(box.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
+            self.jdict.append(
+                {
+                    "image_id": image_id,
+                    "file_name": path.name,
+                    "category_id": self.class_map[int(c)],
+                    "bbox": [round(x, 3) for x in b],
+                    "score": round(s, 5),
+                }
+            )

ultralytics/models/sam/__init__.py CHANGED Viewed

@@ -1,6 +1,12 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from .model import SAM
-from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor
+from .predict import Predictor, SAM2DynamicInteractivePredictor, SAM2Predictor, SAM2VideoPredictor
-__all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor"  # tuple or list of exportable items
+__all__ = (
+    "SAM",
+    "Predictor",
+    "SAM2DynamicInteractivePredictor",
+    "SAM2Predictor",
+    "SAM2VideoPredictor",
+)  # tuple or list of exportable items

ultralytics/models/sam/amg.py CHANGED Viewed

@@ -1,17 +1,36 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 import math
+from collections.abc import Generator
 from itertools import product
-from typing import Any, Generator, List, Tuple
+from typing import Any
 import numpy as np
 import torch
 def is_box_near_crop_edge(
-    boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], atol: float = 20.0
+    boxes: torch.Tensor, crop_box: list[int], orig_box: list[int], atol: float = 20.0
 ) -> torch.Tensor:
-    """Determines if bounding boxes are near the edge of a cropped image region using a specified tolerance."""
+    """Determine if bounding boxes are near the edge of a cropped image region using a specified tolerance.
+    Args:
+        boxes (torch.Tensor): Bounding boxes in XYXY format.
+        crop_box (list[int]): Crop box coordinates in [x0, y0, x1, y1] format.
+        orig_box (list[int]): Original image box coordinates in [x0, y0, x1, y1] format.
+        atol (float, optional): Absolute tolerance for edge proximity detection.
+    Returns:
+        (torch.Tensor): Boolean tensor indicating which boxes are near crop edges.
+    Examples:
+        >>> boxes = torch.tensor([[10, 10, 50, 50], [100, 100, 150, 150]])
+        >>> crop_box = [0, 0, 200, 200]
+        >>> orig_box = [0, 0, 300, 300]
+        >>> near_edge = is_box_near_crop_edge(boxes, crop_box, orig_box, atol=20.0)
+    """
     crop_box_torch = torch.as_tensor(crop_box, dtype=torch.float, device=boxes.device)
     orig_box_torch = torch.as_tensor(orig_box, dtype=torch.float, device=boxes.device)
     boxes = uncrop_boxes_xyxy(boxes, crop_box).float()
@@ -21,9 +40,8 @@ def is_box_near_crop_edge(
     return torch.any(near_crop_edge, dim=1)
-def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
-    """
-    Yield batches of data from input arguments with specified batch size for efficient processing.
+def batch_iterator(batch_size: int, *args) -> Generator[list[Any]]:
+    """Yield batches of data from input arguments with specified batch size for efficient processing.
     This function takes a batch size and any number of iterables, then yields batches of elements from those
     iterables. All input iterables must have the same length.
@@ -33,7 +51,7 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
         *args (Any): Variable length input iterables to batch. All iterables must have the same length.
     Yields:
-        (List[Any]): A list of batched elements from each input iterable.
+        (list[Any]): A list of batched elements from each input iterable.
     Examples:
         >>> data = [1, 2, 3, 4, 5]
@@ -51,11 +69,10 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
 def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor:
-    """
-    Computes the stability score for a batch of masks.
+    """Compute the stability score for a batch of masks.
-    The stability score is the IoU between binary masks obtained by thresholding the predicted mask logits at
-    high and low values.
+    The stability score is the IoU between binary masks obtained by thresholding the predicted mask logits at high and
+    low values.
     Args:
         masks (torch.Tensor): Batch of predicted mask logits.
@@ -65,15 +82,15 @@ def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, thresh
     Returns:
         (torch.Tensor): Stability scores for each mask in the batch.
-    Notes:
-        - One mask is always contained inside the other.
-        - Memory is saved by preventing unnecessary cast to torch.int64.
     Examples:
         >>> masks = torch.rand(10, 256, 256)  # Batch of 10 masks
         >>> mask_threshold = 0.5
         >>> threshold_offset = 0.1
         >>> stability_scores = calculate_stability_score(masks, mask_threshold, threshold_offset)
+    Notes:
+        - One mask is always contained inside the other.
+        - Memory is saved by preventing unnecessary cast to torch.int64.
     """
     intersections = (masks > (mask_threshold + threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, dtype=torch.int32)
     unions = (masks > (mask_threshold - threshold_offset)).sum(-1, dtype=torch.int16).sum(-1, dtype=torch.int32)
@@ -89,25 +106,24 @@ def build_point_grid(n_per_side: int) -> np.ndarray:
     return np.stack([points_x, points_y], axis=-1).reshape(-1, 2)
-def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> List[np.ndarray]:
-    """Generates point grids for multiple crop layers with varying scales and densities."""
+def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> list[np.ndarray]:
+    """Generate point grids for multiple crop layers with varying scales and densities."""
     return [build_point_grid(int(n_per_side / (scale_per_layer**i))) for i in range(n_layers + 1)]
 def generate_crop_boxes(
-    im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
-) -> Tuple[List[List[int]], List[int]]:
-    """
-    Generates crop boxes of varying sizes for multiscale image processing, with layered overlapping regions.
+    im_size: tuple[int, ...], n_layers: int, overlap_ratio: float
+) -> tuple[list[list[int]], list[int]]:
+    """Generate crop boxes of varying sizes for multiscale image processing, with layered overlapping regions.
     Args:
-        im_size (Tuple[int, ...]): Height and width of the input image.
+        im_size (tuple[int, ...]): Height and width of the input image.
         n_layers (int): Number of layers to generate crop boxes for.
         overlap_ratio (float): Ratio of overlap between adjacent crop boxes.
     Returns:
-        (List[List[int]]): List of crop boxes in [x0, y0, x1, y1] format.
-        (List[int]): List of layer indices corresponding to each crop box.
+        crop_boxes (list[list[int]]): List of crop boxes in [x0, y0, x1, y1] format.
+        layer_idxs (list[int]): List of layer indices corresponding to each crop box.
     Examples:
         >>> im_size = (800, 1200)  # Height, width
@@ -124,8 +140,8 @@ def generate_crop_boxes(
     layer_idxs.append(0)
     def crop_len(orig_len, n_crops, overlap):
-        """Calculates the length of each crop given the original length, number of crops, and overlap."""
-        return int(math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops))
+        """Calculate the length of each crop given the original length, number of crops, and overlap."""
+        return math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops)
     for i_layer in range(n_layers):
         n_crops_per_side = 2 ** (i_layer + 1)
@@ -146,7 +162,7 @@ def generate_crop_boxes(
     return crop_boxes, layer_idxs
-def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
+def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: list[int]) -> torch.Tensor:
     """Uncrop bounding boxes by adding the crop box offset to their coordinates."""
     x0, y0, _, _ = crop_box
     offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device)
@@ -156,7 +172,7 @@ def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
     return boxes + offset
-def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
+def uncrop_points(points: torch.Tensor, crop_box: list[int]) -> torch.Tensor:
     """Uncrop points by adding the crop box offset to their coordinates."""
     x0, y0, _, _ = crop_box
     offset = torch.tensor([[x0, y0]], device=points.device)
@@ -166,7 +182,7 @@ def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
     return points + offset
-def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int) -> torch.Tensor:
+def uncrop_masks(masks: torch.Tensor, crop_box: list[int], orig_h: int, orig_w: int) -> torch.Tensor:
     """Uncrop masks by padding them to the original image size, handling coordinate transformations."""
     x0, y0, x1, y1 = crop_box
     if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h:
@@ -177,18 +193,18 @@ def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w:
     return torch.nn.functional.pad(masks, pad, value=0)
-def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tuple[np.ndarray, bool]:
-    """
-    Removes small disconnected regions or holes in a mask based on area threshold and mode.
+def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> tuple[np.ndarray, bool]:
+    """Remove small disconnected regions or holes in a mask based on area threshold and mode.
     Args:
         mask (np.ndarray): Binary mask to process.
         area_thresh (float): Area threshold below which regions will be removed.
-        mode (str): Processing mode, either 'holes' to fill small holes or 'islands' to remove small disconnected regions.
+        mode (str): Processing mode, either 'holes' to fill small holes or 'islands' to remove small disconnected
+            regions.
     Returns:
-        (np.ndarray): Processed binary mask with small regions removed.
-        (bool): Whether any regions were modified.
+        processed_mask (np.ndarray): Processed binary mask with small regions removed.
+        modified (bool): Whether any regions were modified.
     Examples:
         >>> mask = np.zeros((100, 100), dtype=np.bool_)
@@ -206,7 +222,7 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup
     small_regions = [i + 1 for i, s in enumerate(sizes) if s < area_thresh]
     if not small_regions:
         return mask, False
-    fill_labels = [0] + small_regions
+    fill_labels = [0, *small_regions]
     if not correct_holes:
         # If every region is below threshold, keep largest
         fill_labels = [i for i in range(n_labels) if i not in fill_labels] or [int(np.argmax(sizes)) + 1]
@@ -215,8 +231,7 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup
 def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
-    """
-    Calculates bounding boxes in XYXY format around binary masks.
+    """Calculate bounding boxes in XYXY format around binary masks.
     Args:
         masks (torch.Tensor): Binary masks with shape (B, H, W) or (B, C, H, W).

ultralytics/models/sam/build.py CHANGED Viewed

@@ -11,6 +11,7 @@ from functools import partial
 import torch
 from ultralytics.utils.downloads import attempt_download_asset
+from ultralytics.utils.torch_utils import TORCH_1_13
 from .modules.decoders import MaskDecoder
 from .modules.encoders import FpnNeck, Hiera, ImageEncoder, ImageEncoderViT, MemoryEncoder, PromptEncoder
@@ -21,7 +22,7 @@ from .modules.transformer import TwoWayTransformer
 def build_sam_vit_h(checkpoint=None):
-    """Builds and returns a Segment Anything Model (SAM) h-size model with specified encoder parameters."""
+    """Build and return a Segment Anything Model (SAM) h-size model with specified encoder parameters."""
     return _build_sam(
         encoder_embed_dim=1280,
         encoder_depth=32,
@@ -32,7 +33,7 @@ def build_sam_vit_h(checkpoint=None):
 def build_sam_vit_l(checkpoint=None):
-    """Builds and returns a Segment Anything Model (SAM) l-size model with specified encoder parameters."""
+    """Build and return a Segment Anything Model (SAM) l-size model with specified encoder parameters."""
     return _build_sam(
         encoder_embed_dim=1024,
         encoder_depth=24,
@@ -43,7 +44,7 @@ def build_sam_vit_l(checkpoint=None):
 def build_sam_vit_b(checkpoint=None):
-    """Constructs and returns a Segment Anything Model (SAM) with b-size architecture and optional checkpoint."""
+    """Build and return a Segment Anything Model (SAM) b-size model with specified encoder parameters."""
     return _build_sam(
         encoder_embed_dim=768,
         encoder_depth=12,
@@ -54,7 +55,7 @@ def build_sam_vit_b(checkpoint=None):
 def build_mobile_sam(checkpoint=None):
-    """Builds and returns a Mobile Segment Anything Model (Mobile-SAM) for efficient image segmentation."""
+    """Build and return a Mobile Segment Anything Model (Mobile-SAM) for efficient image segmentation."""
     return _build_sam(
         encoder_embed_dim=[64, 128, 160, 320],
         encoder_depth=[2, 2, 6, 2],
@@ -66,7 +67,7 @@ def build_mobile_sam(checkpoint=None):
 def build_sam2_t(checkpoint=None):
-    """Builds and returns a Segment Anything Model 2 (SAM2) tiny-size model with specified architecture parameters."""
+    """Build and return a Segment Anything Model 2 (SAM2) tiny-size model with specified architecture parameters."""
     return _build_sam2(
         encoder_embed_dim=96,
         encoder_stages=[1, 2, 7, 2],
@@ -79,7 +80,7 @@ def build_sam2_t(checkpoint=None):
 def build_sam2_s(checkpoint=None):
-    """Builds and returns a small-size Segment Anything Model (SAM2) with specified architecture parameters."""
+    """Build and return a small-size Segment Anything Model 2 (SAM2) with specified architecture parameters."""
     return _build_sam2(
         encoder_embed_dim=96,
         encoder_stages=[1, 2, 11, 2],
@@ -92,7 +93,7 @@ def build_sam2_s(checkpoint=None):
 def build_sam2_b(checkpoint=None):
-    """Builds and returns a SAM2 base-size model with specified architecture parameters."""
+    """Build and return a Segment Anything Model 2 (SAM2) base-size model with specified architecture parameters."""
     return _build_sam2(
         encoder_embed_dim=112,
         encoder_stages=[2, 3, 16, 3],
@@ -106,7 +107,7 @@ def build_sam2_b(checkpoint=None):
 def build_sam2_l(checkpoint=None):
-    """Builds and returns a large-size Segment Anything Model (SAM2) with specified architecture parameters."""
+    """Build and return a large-size Segment Anything Model 2 (SAM2) with specified architecture parameters."""
     return _build_sam2(
         encoder_embed_dim=144,
         encoder_stages=[2, 6, 36, 4],
@@ -126,16 +127,15 @@ def _build_sam(
     checkpoint=None,
     mobile_sam=False,
 ):
-    """
-    Builds a Segment Anything Model (SAM) with specified encoder parameters.
+    """Build a Segment Anything Model (SAM) with specified encoder parameters.
     Args:
-        encoder_embed_dim (int | List[int]): Embedding dimension for the encoder.
-        encoder_depth (int | List[int]): Depth of the encoder.
-        encoder_num_heads (int | List[int]): Number of attention heads in the encoder.
-        encoder_global_attn_indexes (List[int] | None): Indexes for global attention in the encoder.
-        checkpoint (str | None): Path to the model checkpoint file.
-        mobile_sam (bool): Whether to build a Mobile-SAM model.
+        encoder_embed_dim (int | list[int]): Embedding dimension for the encoder.
+        encoder_depth (int | list[int]): Depth of the encoder.
+        encoder_num_heads (int | list[int]): Number of attention heads in the encoder.
+        encoder_global_attn_indexes (list[int] | None): Indexes for global attention in the encoder.
+        checkpoint (str | None, optional): Path to the model checkpoint file.
+        mobile_sam (bool, optional): Whether to build a Mobile-SAM model.
     Returns:
         (SAMModel): A Segment Anything Model instance with the specified architecture.
@@ -207,7 +207,7 @@ def _build_sam(
     if checkpoint is not None:
         checkpoint = attempt_download_asset(checkpoint)
         with open(checkpoint, "rb") as f:
-            state_dict = torch.load(f)
+            state_dict = torch.load(f, weights_only=False) if TORCH_1_13 else torch.load(f)
         sam.load_state_dict(state_dict)
     sam.eval()
     return sam
@@ -223,18 +223,17 @@ def _build_sam2(
     encoder_window_spec=[8, 4, 16, 8],
     checkpoint=None,
 ):
-    """
-    Builds and returns a Segment Anything Model 2 (SAM2) with specified architecture parameters.
+    """Build and return a Segment Anything Model 2 (SAM2) with specified architecture parameters.
     Args:
-        encoder_embed_dim (int): Embedding dimension for the encoder.
-        encoder_stages (List[int]): Number of blocks in each stage of the encoder.
-        encoder_num_heads (int): Number of attention heads in the encoder.
-        encoder_global_att_blocks (List[int]): Indices of global attention blocks in the encoder.
-        encoder_backbone_channel_list (List[int]): Channel dimensions for each level of the encoder backbone.
-        encoder_window_spatial_size (List[int]): Spatial size of the window for position embeddings.
-        encoder_window_spec (List[int]): Window specifications for each stage of the encoder.
-        checkpoint (str | None): Path to the checkpoint file for loading pre-trained weights.
+        encoder_embed_dim (int, optional): Embedding dimension for the encoder.
+        encoder_stages (list[int], optional): Number of blocks in each stage of the encoder.
+        encoder_num_heads (int, optional): Number of attention heads in the encoder.
+        encoder_global_att_blocks (list[int], optional): Indices of global attention blocks in the encoder.
+        encoder_backbone_channel_list (list[int], optional): Channel dimensions for each level of the encoder backbone.
+        encoder_window_spatial_size (list[int], optional): Spatial size of the window for position embeddings.
+        encoder_window_spec (list[int], optional): Window specifications for each stage of the encoder.
+        checkpoint (str | None, optional): Path to the checkpoint file for loading pre-trained weights.
     Returns:
         (SAM2Model): A configured and initialized SAM2 model.
@@ -302,7 +301,7 @@ def _build_sam2(
     if checkpoint is not None:
         checkpoint = attempt_download_asset(checkpoint)
         with open(checkpoint, "rb") as f:
-            state_dict = torch.load(f)["model"]
+            state_dict = (torch.load(f, weights_only=False) if TORCH_1_13 else torch.load(f))["model"]
         sam2.load_state_dict(state_dict)
     sam2.eval()
     return sam2
@@ -325,11 +324,10 @@ sam_model_map = {
 def build_sam(ckpt="sam_b.pt"):
-    """
-    Builds and returns a Segment Anything Model (SAM) based on the provided checkpoint.
+    """Build and return a Segment Anything Model (SAM) based on the provided checkpoint.
     Args:
-        ckpt (str | Path): Path to the checkpoint file or name of a pre-defined SAM model.
+        ckpt (str | Path, optional): Path to the checkpoint file or name of a pre-defined SAM model.
     Returns:
         (SAMModel | SAM2Model): A configured and initialized SAM or SAM2 model instance.

dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl