PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.214__py3-none-any.whl → 8.4.7__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.214py3-none-any.whl → 8.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/METADATA +64 -74
dgenerate_ultralytics_headless-8.4.7.dist-info/RECORD +311 -0
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/WHEEL +1 -1
tests/__init__.py +7 -9
tests/conftest.py +8 -15
tests/test_cli.py +1 -1
tests/test_cuda.py +13 -10
tests/test_engine.py +9 -9
tests/test_exports.py +65 -13
tests/test_integrations.py +13 -13
tests/test_python.py +125 -69
tests/test_solutions.py +161 -152
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +86 -92
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/TT100K.yaml +346 -0
ultralytics/cfg/datasets/VOC.yaml +15 -16
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco-pose.yaml +21 -0
ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
ultralytics/cfg/datasets/dog-pose.yaml +28 -0
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
ultralytics/cfg/datasets/kitti.yaml +27 -0
ultralytics/cfg/datasets/lvis.yaml +5 -5
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/default.yaml +4 -2
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
ultralytics/cfg/models/26/yolo26.yaml +52 -0
ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
ultralytics/cfg/models/v6/yolov6.yaml +1 -1
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +5 -6
ultralytics/data/augment.py +300 -475
ultralytics/data/base.py +18 -26
ultralytics/data/build.py +147 -25
ultralytics/data/converter.py +108 -87
ultralytics/data/dataset.py +47 -75
ultralytics/data/loaders.py +42 -49
ultralytics/data/split.py +5 -6
ultralytics/data/split_dota.py +8 -15
ultralytics/data/utils.py +36 -45
ultralytics/engine/exporter.py +351 -263
ultralytics/engine/model.py +186 -225
ultralytics/engine/predictor.py +45 -54
ultralytics/engine/results.py +198 -325
ultralytics/engine/trainer.py +165 -106
ultralytics/engine/tuner.py +41 -43
ultralytics/engine/validator.py +55 -38
ultralytics/hub/__init__.py +16 -19
ultralytics/hub/auth.py +6 -12
ultralytics/hub/google/__init__.py +7 -10
ultralytics/hub/session.py +15 -25
ultralytics/hub/utils.py +5 -8
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +8 -10
ultralytics/models/fastsam/predict.py +18 -30
ultralytics/models/fastsam/utils.py +1 -2
ultralytics/models/fastsam/val.py +5 -7
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +5 -8
ultralytics/models/nas/predict.py +7 -9
ultralytics/models/nas/val.py +1 -2
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +5 -8
ultralytics/models/rtdetr/predict.py +15 -19
ultralytics/models/rtdetr/train.py +10 -13
ultralytics/models/rtdetr/val.py +21 -23
ultralytics/models/sam/__init__.py +15 -2
ultralytics/models/sam/amg.py +14 -20
ultralytics/models/sam/build.py +26 -19
ultralytics/models/sam/build_sam3.py +377 -0
ultralytics/models/sam/model.py +29 -32
ultralytics/models/sam/modules/blocks.py +83 -144
ultralytics/models/sam/modules/decoders.py +19 -37
ultralytics/models/sam/modules/encoders.py +44 -101
ultralytics/models/sam/modules/memory_attention.py +16 -30
ultralytics/models/sam/modules/sam.py +200 -73
ultralytics/models/sam/modules/tiny_encoder.py +64 -83
ultralytics/models/sam/modules/transformer.py +18 -28
ultralytics/models/sam/modules/utils.py +174 -50
ultralytics/models/sam/predict.py +2248 -350
ultralytics/models/sam/sam3/__init__.py +3 -0
ultralytics/models/sam/sam3/decoder.py +546 -0
ultralytics/models/sam/sam3/encoder.py +529 -0
ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
ultralytics/models/sam/sam3/model_misc.py +199 -0
ultralytics/models/sam/sam3/necks.py +129 -0
ultralytics/models/sam/sam3/sam3_image.py +339 -0
ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
ultralytics/models/sam/sam3/vitdet.py +547 -0
ultralytics/models/sam/sam3/vl_combiner.py +160 -0
ultralytics/models/utils/loss.py +14 -26
ultralytics/models/utils/ops.py +13 -17
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +10 -13
ultralytics/models/yolo/classify/train.py +12 -33
ultralytics/models/yolo/classify/val.py +30 -29
ultralytics/models/yolo/detect/predict.py +9 -12
ultralytics/models/yolo/detect/train.py +17 -23
ultralytics/models/yolo/detect/val.py +77 -59
ultralytics/models/yolo/model.py +43 -60
ultralytics/models/yolo/obb/predict.py +7 -16
ultralytics/models/yolo/obb/train.py +14 -17
ultralytics/models/yolo/obb/val.py +40 -37
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +7 -22
ultralytics/models/yolo/pose/train.py +13 -16
ultralytics/models/yolo/pose/val.py +39 -58
ultralytics/models/yolo/segment/predict.py +17 -21
ultralytics/models/yolo/segment/train.py +7 -10
ultralytics/models/yolo/segment/val.py +95 -47
ultralytics/models/yolo/world/train.py +8 -14
ultralytics/models/yolo/world/train_world.py +11 -34
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +16 -23
ultralytics/models/yolo/yoloe/train.py +36 -44
ultralytics/models/yolo/yoloe/train_seg.py +11 -11
ultralytics/models/yolo/yoloe/val.py +15 -20
ultralytics/nn/__init__.py +7 -7
ultralytics/nn/autobackend.py +159 -85
ultralytics/nn/modules/__init__.py +68 -60
ultralytics/nn/modules/activation.py +4 -6
ultralytics/nn/modules/block.py +260 -224
ultralytics/nn/modules/conv.py +52 -97
ultralytics/nn/modules/head.py +831 -299
ultralytics/nn/modules/transformer.py +76 -88
ultralytics/nn/modules/utils.py +16 -21
ultralytics/nn/tasks.py +180 -195
ultralytics/nn/text_model.py +45 -69
ultralytics/optim/__init__.py +5 -0
ultralytics/optim/muon.py +338 -0
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +13 -19
ultralytics/solutions/analytics.py +15 -16
ultralytics/solutions/config.py +6 -7
ultralytics/solutions/distance_calculation.py +10 -13
ultralytics/solutions/heatmap.py +8 -14
ultralytics/solutions/instance_segmentation.py +6 -9
ultralytics/solutions/object_blurrer.py +7 -10
ultralytics/solutions/object_counter.py +12 -19
ultralytics/solutions/object_cropper.py +8 -14
ultralytics/solutions/parking_management.py +34 -32
ultralytics/solutions/queue_management.py +10 -12
ultralytics/solutions/region_counter.py +9 -12
ultralytics/solutions/security_alarm.py +15 -20
ultralytics/solutions/similarity_search.py +10 -15
ultralytics/solutions/solutions.py +77 -76
ultralytics/solutions/speed_estimation.py +7 -10
ultralytics/solutions/streamlit_inference.py +2 -4
ultralytics/solutions/templates/similarity-search.html +7 -18
ultralytics/solutions/trackzone.py +7 -10
ultralytics/solutions/vision_eye.py +5 -8
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +3 -5
ultralytics/trackers/bot_sort.py +10 -27
ultralytics/trackers/byte_tracker.py +21 -37
ultralytics/trackers/track.py +4 -7
ultralytics/trackers/utils/gmc.py +11 -22
ultralytics/trackers/utils/kalman_filter.py +37 -48
ultralytics/trackers/utils/matching.py +12 -15
ultralytics/utils/__init__.py +124 -124
ultralytics/utils/autobatch.py +2 -4
ultralytics/utils/autodevice.py +17 -18
ultralytics/utils/benchmarks.py +57 -71
ultralytics/utils/callbacks/base.py +8 -10
ultralytics/utils/callbacks/clearml.py +5 -13
ultralytics/utils/callbacks/comet.py +32 -46
ultralytics/utils/callbacks/dvc.py +13 -18
ultralytics/utils/callbacks/mlflow.py +4 -5
ultralytics/utils/callbacks/neptune.py +7 -15
ultralytics/utils/callbacks/platform.py +423 -38
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +25 -31
ultralytics/utils/callbacks/wb.py +16 -14
ultralytics/utils/checks.py +127 -85
ultralytics/utils/cpu.py +3 -8
ultralytics/utils/dist.py +9 -12
ultralytics/utils/downloads.py +25 -33
ultralytics/utils/errors.py +6 -14
ultralytics/utils/events.py +2 -4
ultralytics/utils/export/__init__.py +4 -236
ultralytics/utils/export/engine.py +246 -0
ultralytics/utils/export/imx.py +117 -63
ultralytics/utils/export/tensorflow.py +231 -0
ultralytics/utils/files.py +26 -30
ultralytics/utils/git.py +9 -11
ultralytics/utils/instance.py +30 -51
ultralytics/utils/logger.py +212 -114
ultralytics/utils/loss.py +601 -215
ultralytics/utils/metrics.py +128 -156
ultralytics/utils/nms.py +13 -16
ultralytics/utils/ops.py +117 -166
ultralytics/utils/patches.py +75 -21
ultralytics/utils/plotting.py +75 -80
ultralytics/utils/tal.py +125 -59
ultralytics/utils/torch_utils.py +53 -79
ultralytics/utils/tqdm.py +24 -21
ultralytics/utils/triton.py +13 -19
ultralytics/utils/tuner.py +19 -10
dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/top_level.txt +0 -0

ultralytics/utils/nms.py CHANGED Viewed

@@ -27,11 +27,10 @@ def non_max_suppression(
     end2end: bool = False,
     return_idxs: bool = False,
 ):
-    """
-    Perform non-maximum suppression (NMS) on prediction results.
+    """Perform non-maximum suppression (NMS) on prediction results.
-    Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple
-    detection formats including standard boxes, rotated boxes, and masks.
+    Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple detection
+    formats including standard boxes, rotated boxes, and masks.
     Args:
         prediction (torch.Tensor): Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes)
@@ -52,8 +51,8 @@ def non_max_suppression(
         return_idxs (bool): Whether to return the indices of kept detections.
     Returns:
-        output (list[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks)
-            containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
+        output (list[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks) containing (x1,
+            y1, x2, y2, confidence, class, mask1, mask2, ...).
         keepi (list[torch.Tensor]): Indices of kept detections if return_idxs=True.
     """
     # Checks
@@ -168,8 +167,7 @@ def non_max_suppression(
 class TorchNMS:
-    """
-    Ultralytics custom NMS implementation optimized for YOLO.
+    """Ultralytics custom NMS implementation optimized for YOLO.
     This class provides static methods for performing non-maximum suppression (NMS) operations on bounding boxes,
     including both standard NMS and batched NMS for multi-class scenarios.
@@ -194,8 +192,7 @@ class TorchNMS:
         iou_func=box_iou,
         exit_early: bool = True,
     ) -> torch.Tensor:
-        """
-        Fast-NMS implementation from https://arxiv.org/pdf/1904.02689 using upper triangular matrix operations.
+        """Fast-NMS implementation from https://arxiv.org/pdf/1904.02689 using upper triangular matrix operations.
         Args:
             boxes (torch.Tensor): Bounding boxes with shape (N, 4) in xyxy format.
@@ -231,15 +228,16 @@ class TorchNMS:
             upper_mask = row_idx < col_idx
             ious = ious * upper_mask
             # Zeroing these scores ensures the additional indices would not affect the final results
-            scores[~((ious >= iou_threshold).sum(0) <= 0)] = 0
+            scores_ = scores[sorted_idx]
+            scores_[~((ious >= iou_threshold).sum(0) <= 0)] = 0
+            scores[sorted_idx] = scores_  # update original tensor for NMSModel
             # NOTE: return indices with fixed length to avoid TFLite reshape error
-            pick = torch.topk(scores, scores.shape[0]).indices
+            pick = torch.topk(scores_, scores_.shape[0]).indices
         return sorted_idx[pick]
     @staticmethod
     def nms(boxes: torch.Tensor, scores: torch.Tensor, iou_threshold: float) -> torch.Tensor:
-        """
-        Optimized NMS with early termination that matches torchvision behavior exactly.
+        """Optimized NMS with early termination that matches torchvision behavior exactly.
         Args:
             boxes (torch.Tensor): Bounding boxes with shape (N, 4) in xyxy format.
@@ -305,8 +303,7 @@ class TorchNMS:
         iou_threshold: float,
         use_fast_nms: bool = False,
     ) -> torch.Tensor:
-        """
-        Batched NMS for class-aware suppression.
+        """Batched NMS for class-aware suppression.
         Args:
             boxes (torch.Tensor): Bounding boxes with shape (N, 4) in xyxy format.

ultralytics/utils/ops.py CHANGED Viewed

@@ -16,8 +16,7 @@ from ultralytics.utils import NOT_MACOS14
 class Profile(contextlib.ContextDecorator):
-    """
-    Ultralytics Profile class for timing code execution.
+    """Ultralytics Profile class for timing code execution.
     Use as a decorator with @Profile() or as a context manager with 'with Profile():'. Provides accurate timing
     measurements with CUDA synchronization support for GPU operations.
@@ -40,8 +39,7 @@ class Profile(contextlib.ContextDecorator):
     """
     def __init__(self, t: float = 0.0, device: torch.device | None = None):
-        """
-        Initialize the Profile class.
+        """Initialize the Profile class.
         Args:
             t (float): Initial accumulated time in seconds.
@@ -56,7 +54,7 @@ class Profile(contextlib.ContextDecorator):
         self.start = self.time()
         return self
-    def __exit__(self, type, value, traceback):  # noqa
+    def __exit__(self, type, value, traceback):
         """Stop timing."""
         self.dt = self.time() - self.start  # delta-time
         self.t += self.dt  # accumulate dt
@@ -73,11 +71,10 @@ class Profile(contextlib.ContextDecorator):
 def segment2box(segment, width: int = 640, height: int = 640):
-    """
-    Convert segment coordinates to bounding box coordinates.
+    """Convert segment coordinates to bounding box coordinates.
-    Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates.
-    Applies inside-image constraint and clips coordinates when necessary.
+    Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates. Applies
+    inside-image constraint and clips coordinates when necessary.
     Args:
         segment (torch.Tensor): Segment coordinates in format (N, 2) where N is number of points.
@@ -103,11 +100,10 @@ def segment2box(segment, width: int = 640, height: int = 640):
 def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = True, xywh: bool = False):
-    """
-    Rescale bounding boxes from one image shape to another.
+    """Rescale bounding boxes from one image shape to another.
-    Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes.
-    Supports both xyxy and xywh box formats.
+    Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes. Supports
+    both xyxy and xywh box formats.
     Args:
         img1_shape (tuple): Shape of the source image (height, width).
@@ -139,8 +135,7 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = T
 def make_divisible(x: int, divisor):
-    """
-    Return the nearest number that is divisible by the given divisor.
+    """Return the nearest number that is divisible by the given divisor.
     Args:
         x (int): The number to make divisible.
@@ -155,8 +150,7 @@ def make_divisible(x: int, divisor):
 def clip_boxes(boxes, shape):
-    """
-    Clip bounding boxes to image boundaries.
+    """Clip bounding boxes to image boundaries.
     Args:
         boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
@@ -184,8 +178,7 @@ def clip_boxes(boxes, shape):
 def clip_coords(coords, shape):
-    """
-    Clip line coordinates to image boundaries.
+    """Clip line coordinates to image boundaries.
     Args:
         coords (torch.Tensor | np.ndarray): Line coordinates to clip.
@@ -208,55 +201,9 @@ def clip_coords(coords, shape):
     return coords
-def scale_image(masks, im0_shape, ratio_pad=None):
-    """
-    Rescale masks to original image size.
-    Takes resized and padded masks and rescales them back to the original image dimensions, removing any padding
-    that was applied during preprocessing.
-    Args:
-        masks (np.ndarray): Resized and padded masks with shape [H, W, N] or [H, W, 3].
-        im0_shape (tuple): Original image shape as HWC or HW (supports both).
-        ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
-    Returns:
-        (np.ndarray): Rescaled masks with shape [H, W, N] matching original image dimensions.
-    """
-    # Rescale coordinates (xyxy) from im1_shape to im0_shape
-    im0_h, im0_w = im0_shape[:2]  # supports both HWC or HW shapes
-    im1_h, im1_w, _ = masks.shape
-    if im1_h == im0_h and im1_w == im0_w:
-        return masks
-    if ratio_pad is None:  # calculate from im0_shape
-        gain = min(im1_h / im0_h, im1_w / im0_w)  # gain  = old / new
-        pad = (im1_w - im0_w * gain) / 2, (im1_h - im0_h * gain) / 2  # wh padding
-    else:
-        pad = ratio_pad[1]
-    pad_w, pad_h = pad
-    top = int(round(pad_h - 0.1))
-    left = int(round(pad_w - 0.1))
-    bottom = im1_h - int(round(pad_h + 0.1))
-    right = im1_w - int(round(pad_w + 0.1))
-    if len(masks.shape) < 2:
-        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
-    masks = masks[top:bottom, left:right]
-    # handle the cv2.resize 512 channels limitation: https://github.com/ultralytics/ultralytics/pull/21947
-    masks = [cv2.resize(array, (im0_w, im0_h)) for array in np.array_split(masks, masks.shape[-1] // 512 + 1, axis=-1)]
-    masks = np.concatenate(masks, axis=-1) if len(masks) > 1 else masks[0]
-    if len(masks.shape) == 2:
-        masks = masks[:, :, None]
-    return masks
 def xyxy2xywh(x):
-    """
-    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the
-    top-left corner and (x2, y2) is the bottom-right corner.
+    """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is
+    the top-left corner and (x2, y2) is the bottom-right corner.
     Args:
         x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
@@ -275,9 +222,8 @@ def xyxy2xywh(x):
 def xywh2xyxy(x):
-    """
-    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
-    top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.
+    """Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is
+    the top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.
     Args:
         x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x, y, width, height) format.
@@ -295,8 +241,7 @@ def xywh2xyxy(x):
 def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
-    """
-    Convert normalized bounding box coordinates to pixel coordinates.
+    """Convert normalized bounding box coordinates to pixel coordinates.
     Args:
         x (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, w, h) format.
@@ -306,8 +251,8 @@ def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
         padh (int): Padding height in pixels.
     Returns:
-        y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
-            x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
+        y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is
+            the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
     """
     assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
     y = empty_like(x)  # faster than clone/copy
@@ -321,8 +266,7 @@ def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
 def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0):
-    """
-    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
+    """Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
     width and height are normalized to image dimensions.
     Args:
@@ -348,14 +292,13 @@ def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0
 def xywh2ltwh(x):
-    """
-    Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.
+    """Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.
     Args:
         x (np.ndarray | torch.Tensor): Input bounding box coordinates in xywh format.
     Returns:
-        (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
+        (np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
     """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
@@ -364,14 +307,13 @@ def xywh2ltwh(x):
 def xyxy2ltwh(x):
-    """
-    Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.
+    """Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.
     Args:
         x (np.ndarray | torch.Tensor): Input bounding box coordinates in xyxy format.
     Returns:
-        (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
+        (np.ndarray | torch.Tensor): Bounding box coordinates in ltwh format.
     """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
     y[..., 2] = x[..., 2] - x[..., 0]  # width
@@ -380,11 +322,10 @@ def xyxy2ltwh(x):
 def ltwh2xywh(x):
-    """
-    Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
+    """Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
     Args:
-        x (torch.Tensor): Input bounding box coordinates.
+        x (np.ndarray | torch.Tensor): Input bounding box coordinates.
     Returns:
         (np.ndarray | torch.Tensor): Bounding box coordinates in xywh format.
@@ -396,15 +337,14 @@ def ltwh2xywh(x):
 def xyxyxyxy2xywhr(x):
-    """
-    Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
+    """Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.
     Args:
         x (np.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.
     Returns:
-        (np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5).
-            Rotation values are in radians from 0 to pi/2.
+        (np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5). Rotation
+            values are in radians from [-pi/4, 3pi/4).
     """
     is_torch = isinstance(x, torch.Tensor)
     points = x.cpu().numpy() if is_torch else x
@@ -414,17 +354,25 @@ def xyxyxyxy2xywhr(x):
         # NOTE: Use cv2.minAreaRect to get accurate xywhr,
         # especially some objects are cut off by augmentations in dataloader.
         (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
-        rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
+        # convert angle to radian and normalize to [-pi/4, 3pi/4)
+        theta = angle / 180 * np.pi
+        if w < h:
+            w, h = h, w
+            theta += np.pi / 2
+        while theta >= 3 * np.pi / 4:
+            theta -= np.pi
+        while theta < -np.pi / 4:
+            theta += np.pi
+        rboxes.append([cx, cy, w, h, theta])
     return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)
 def xywhr2xyxyxyxy(x):
-    """
-    Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
+    """Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.
     Args:
-        x (np.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5).
-            Rotation values should be in radians from 0 to pi/2.
+        x (np.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5). Rotation
+            values should be in radians from 0 to pi/2.
     Returns:
         (np.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
@@ -450,8 +398,7 @@ def xywhr2xyxyxyxy(x):
 def ltwh2xyxy(x):
-    """
-    Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
+    """Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
     Args:
         x (np.ndarray | torch.Tensor): Input bounding box coordinates.
@@ -460,14 +407,13 @@ def ltwh2xyxy(x):
         (np.ndarray | torch.Tensor): Bounding box coordinates in xyxy format.
     """
     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[..., 2] = x[..., 2] + x[..., 0]  # width
-    y[..., 3] = x[..., 3] + x[..., 1]  # height
+    y[..., 2] = x[..., 2] + x[..., 0]  # x2
+    y[..., 3] = x[..., 3] + x[..., 1]  # y2
     return y
 def segments2boxes(segments):
-    """
-    Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
+    """Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).
     Args:
         segments (list): List of segments where each segment is a list of points, each point is [x, y] coordinates.
@@ -483,8 +429,7 @@ def segments2boxes(segments):
 def resample_segments(segments, n: int = 1000):
-    """
-    Resample segments to n points each using linear interpolation.
+    """Resample segments to n points each using linear interpolation.
     Args:
         segments (list): List of (N, 2) arrays where N is the number of points in each segment.
@@ -506,9 +451,8 @@ def resample_segments(segments, n: int = 1000):
     return segments
-def crop_mask(masks, boxes):
-    """
-    Crop masks to bounding box regions.
+def crop_mask(masks: torch.Tensor, boxes: torch.Tensor) -> torch.Tensor:
+    """Crop masks to bounding box regions.
     Args:
         masks (torch.Tensor): Masks with shape (N, H, W).
@@ -517,17 +461,25 @@ def crop_mask(masks, boxes):
     Returns:
         (torch.Tensor): Cropped masks.
     """
-    _, h, w = masks.shape
-    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
-    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
-    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)
-    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+    if boxes.device != masks.device:
+        boxes = boxes.to(masks.device)
+    n, h, w = masks.shape
+    if n < 50 and not masks.is_cuda:  # faster for fewer masks (predict)
+        for i, (x1, y1, x2, y2) in enumerate(boxes.round().int()):
+            masks[i, :y1] = 0
+            masks[i, y2:] = 0
+            masks[i, :, :x1] = 0
+            masks[i, :, x2:] = 0
+        return masks
+    else:  # faster for more masks (val)
+        x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
+        r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
+        c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)
+        return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
 def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False):
-    """
-    Apply masks to bounding boxes using mask head output.
+    """Apply masks to bounding boxes using mask head output.
     Args:
         protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
@@ -541,26 +493,20 @@ def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False):
             are the height and width of the input image. The mask is applied to the bounding boxes.
     """
     c, mh, mw = protos.shape  # CHW
-    ih, iw = shape
-    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)  # CHW
-    width_ratio = mw / iw
-    height_ratio = mh / ih
-    downsampled_bboxes = bboxes.clone()
-    downsampled_bboxes[:, 0] *= width_ratio
-    downsampled_bboxes[:, 2] *= width_ratio
-    downsampled_bboxes[:, 3] *= height_ratio
-    downsampled_bboxes[:, 1] *= height_ratio
-    masks = crop_mask(masks, downsampled_bboxes)  # CHW
+    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)  # NHW
+    width_ratio = mw / shape[1]
+    height_ratio = mh / shape[0]
+    ratios = torch.tensor([[width_ratio, height_ratio, width_ratio, height_ratio]], device=bboxes.device)
+    masks = crop_mask(masks, boxes=bboxes * ratios)  # NHW
     if upsample:
-        masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0]  # CHW
-    return masks.gt_(0.0)
+        masks = F.interpolate(masks[None], shape, mode="bilinear")[0]  # NHW
+    return masks.gt_(0.0).byte()
 def process_mask_native(protos, masks_in, bboxes, shape):
-    """
-    Apply masks to bounding boxes using mask head output with native upsampling.
+    """Apply masks to bounding boxes using mask head output with native upsampling.
     Args:
         protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
@@ -569,43 +515,53 @@ def process_mask_native(protos, masks_in, bboxes, shape):
         shape (tuple): Input image size as (height, width).
     Returns:
-        (torch.Tensor): Binary mask tensor with shape (H, W, N).
+        (torch.Tensor): Binary mask tensor with shape (N, H, W).
     """
     c, mh, mw = protos.shape  # CHW
     masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
-    masks = scale_masks(masks[None], shape)[0]  # CHW
-    masks = crop_mask(masks, bboxes)  # CHW
-    return masks.gt_(0.0)
+    masks = scale_masks(masks[None], shape)[0]  # NHW
+    masks = crop_mask(masks, bboxes)  # NHW
+    return masks.gt_(0.0).byte()
-def scale_masks(masks, shape, padding: bool = True):
-    """
-    Rescale segment masks to target shape.
+def scale_masks(
+    masks: torch.Tensor,
+    shape: tuple[int, int],
+    ratio_pad: tuple[tuple[int, int], tuple[int, int]] | None = None,
+    padding: bool = True,
+) -> torch.Tensor:
+    """Rescale segment masks to target shape.
     Args:
         masks (torch.Tensor): Masks with shape (N, C, H, W).
-        shape (tuple): Target height and width as (height, width).
+        shape (tuple[int, int]): Target height and width as (height, width).
+        ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
         padding (bool): Whether masks are based on YOLO-style augmented images with padding.
     Returns:
         (torch.Tensor): Rescaled masks.
     """
-    mh, mw = masks.shape[2:]
-    gain = min(mh / shape[0], mw / shape[1])  # gain  = old / new
-    pad_w = mw - shape[1] * gain
-    pad_h = mh - shape[0] * gain
-    if padding:
-        pad_w /= 2
-        pad_h /= 2
-    top, left = (int(round(pad_h - 0.1)), int(round(pad_w - 0.1))) if padding else (0, 0)
-    bottom = mh - int(round(pad_h + 0.1))
-    right = mw - int(round(pad_w + 0.1))
-    return F.interpolate(masks[..., top:bottom, left:right], shape, mode="bilinear", align_corners=False)  # NCHW masks
+    im1_h, im1_w = masks.shape[2:]
+    im0_h, im0_w = shape[:2]
+    if im1_h == im0_h and im1_w == im0_w:
+        return masks
+    if ratio_pad is None:  # calculate from im0_shape
+        gain = min(im1_h / im0_h, im1_w / im0_w)  # gain  = old / new
+        pad_w, pad_h = (im1_w - im0_w * gain), (im1_h - im0_h * gain)  # wh padding
+        if padding:
+            pad_w /= 2
+            pad_h /= 2
+    else:
+        pad_w, pad_h = ratio_pad[1]
+    top, left = (round(pad_h - 0.1), round(pad_w - 0.1)) if padding else (0, 0)
+    bottom = im1_h - round(pad_h + 0.1)
+    right = im1_w - round(pad_w + 0.1)
+    return F.interpolate(masks[..., top:bottom, left:right].float(), shape, mode="bilinear")  # NCHW masks
 def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
-    """
-    Rescale segment coordinates from img1_shape to img0_shape.
+    """Rescale segment coordinates from img1_shape to img0_shape.
     Args:
         img1_shape (tuple): Source image shape as HWC or HW (supports both).
@@ -640,8 +596,7 @@ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool
 def regularize_rboxes(rboxes):
-    """
-    Regularize rotated bounding boxes to range [0, pi/2].
+    """Regularize rotated bounding boxes to range [0, pi/2].
     Args:
         rboxes (torch.Tensor): Input rotated boxes with shape (N, 5) in xywhr format.
@@ -658,12 +613,11 @@ def regularize_rboxes(rboxes):
     return torch.stack([x, y, w_, h_, t], dim=-1)  # regularized boxes
-def masks2segments(masks, strategy: str = "all"):
-    """
-    Convert masks to segments using contour detection.
+def masks2segments(masks: np.ndarray | torch.Tensor, strategy: str = "all") -> list[np.ndarray]:
+    """Convert masks to segments using contour detection.
     Args:
-        masks (torch.Tensor): Binary masks with shape (batch_size, 160, 160).
+        masks (np.ndarray | torch.Tensor): Binary masks with shape (batch_size, 160, 160).
         strategy (str): Segmentation strategy, either 'all' or 'largest'.
     Returns:
@@ -671,8 +625,9 @@ def masks2segments(masks, strategy: str = "all"):
     """
     from ultralytics.data.converter import merge_multi_segment
+    masks = masks.astype("uint8") if isinstance(masks, np.ndarray) else masks.byte().cpu().numpy()
     segments = []
-    for x in masks.int().cpu().numpy().astype("uint8"):
+    for x in np.ascontiguousarray(masks):
         c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
         if c:
             if strategy == "all":  # merge and concatenate all segments
@@ -690,8 +645,7 @@ def masks2segments(masks, strategy: str = "all"):
 def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
-    """
-    Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.
+    """Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.
     Args:
         batch (torch.Tensor): Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32.
@@ -699,12 +653,11 @@ def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
     Returns:
         (np.ndarray): Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8.
     """
-    return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy()
+    return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).byte().cpu().numpy()
 def clean_str(s):
-    """
-    Clean a string by replacing special characters with '_' character.
+    """Clean a string by replacing special characters with '_' character.
     Args:
         s (str): A string needing special characters replaced.
@@ -712,11 +665,9 @@ def clean_str(s):
     Returns:
         (str): A string with special characters replaced by an underscore _.
     """
-    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
+    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨`><+]", repl="_", string=s)
 def empty_like(x):
     """Create empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
-    return (
-        torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32)
-    )
+    return torch.empty_like(x, dtype=x.dtype) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=x.dtype)

dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.4.7__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.214py3-none-any.whl → 8.4.7py3-none-any.whl