PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (243) hide show

{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
tests/__init__.py +5 -7
tests/conftest.py +8 -15
tests/test_cli.py +8 -10
tests/test_cuda.py +9 -10
tests/test_engine.py +29 -2
tests/test_exports.py +69 -21
tests/test_integrations.py +8 -11
tests/test_python.py +109 -71
tests/test_solutions.py +170 -159
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +57 -64
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/Objects365.yaml +19 -15
ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +19 -21
ultralytics/cfg/datasets/VisDrone.yaml +5 -5
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco-pose.yaml +24 -2
ultralytics/cfg/datasets/coco.yaml +2 -2
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/dog-pose.yaml +28 -0
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
ultralytics/cfg/datasets/kitti.yaml +27 -0
ultralytics/cfg/datasets/lvis.yaml +7 -7
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/default.yaml +96 -94
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
ultralytics/cfg/models/v6/yolov6.yaml +1 -1
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +3 -4
ultralytics/data/augment.py +286 -476
ultralytics/data/base.py +18 -26
ultralytics/data/build.py +151 -26
ultralytics/data/converter.py +38 -50
ultralytics/data/dataset.py +47 -75
ultralytics/data/loaders.py +42 -49
ultralytics/data/split.py +5 -6
ultralytics/data/split_dota.py +8 -15
ultralytics/data/utils.py +41 -45
ultralytics/engine/exporter.py +462 -462
ultralytics/engine/model.py +150 -191
ultralytics/engine/predictor.py +30 -40
ultralytics/engine/results.py +177 -311
ultralytics/engine/trainer.py +193 -120
ultralytics/engine/tuner.py +77 -63
ultralytics/engine/validator.py +39 -22
ultralytics/hub/__init__.py +16 -19
ultralytics/hub/auth.py +6 -12
ultralytics/hub/google/__init__.py +7 -10
ultralytics/hub/session.py +15 -25
ultralytics/hub/utils.py +5 -8
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +8 -10
ultralytics/models/fastsam/predict.py +19 -30
ultralytics/models/fastsam/utils.py +1 -2
ultralytics/models/fastsam/val.py +5 -7
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +5 -8
ultralytics/models/nas/predict.py +7 -9
ultralytics/models/nas/val.py +1 -2
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +7 -8
ultralytics/models/rtdetr/predict.py +15 -19
ultralytics/models/rtdetr/train.py +10 -13
ultralytics/models/rtdetr/val.py +21 -23
ultralytics/models/sam/__init__.py +15 -2
ultralytics/models/sam/amg.py +14 -20
ultralytics/models/sam/build.py +26 -19
ultralytics/models/sam/build_sam3.py +377 -0
ultralytics/models/sam/model.py +29 -32
ultralytics/models/sam/modules/blocks.py +83 -144
ultralytics/models/sam/modules/decoders.py +22 -40
ultralytics/models/sam/modules/encoders.py +44 -101
ultralytics/models/sam/modules/memory_attention.py +16 -30
ultralytics/models/sam/modules/sam.py +206 -79
ultralytics/models/sam/modules/tiny_encoder.py +64 -83
ultralytics/models/sam/modules/transformer.py +18 -28
ultralytics/models/sam/modules/utils.py +174 -50
ultralytics/models/sam/predict.py +2268 -366
ultralytics/models/sam/sam3/__init__.py +3 -0
ultralytics/models/sam/sam3/decoder.py +546 -0
ultralytics/models/sam/sam3/encoder.py +529 -0
ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
ultralytics/models/sam/sam3/model_misc.py +199 -0
ultralytics/models/sam/sam3/necks.py +129 -0
ultralytics/models/sam/sam3/sam3_image.py +339 -0
ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
ultralytics/models/sam/sam3/vitdet.py +547 -0
ultralytics/models/sam/sam3/vl_combiner.py +160 -0
ultralytics/models/utils/loss.py +14 -26
ultralytics/models/utils/ops.py +13 -17
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +9 -12
ultralytics/models/yolo/classify/train.py +15 -41
ultralytics/models/yolo/classify/val.py +34 -32
ultralytics/models/yolo/detect/predict.py +8 -11
ultralytics/models/yolo/detect/train.py +13 -32
ultralytics/models/yolo/detect/val.py +75 -63
ultralytics/models/yolo/model.py +37 -53
ultralytics/models/yolo/obb/predict.py +5 -14
ultralytics/models/yolo/obb/train.py +11 -14
ultralytics/models/yolo/obb/val.py +42 -39
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +7 -22
ultralytics/models/yolo/pose/train.py +10 -22
ultralytics/models/yolo/pose/val.py +40 -59
ultralytics/models/yolo/segment/predict.py +16 -20
ultralytics/models/yolo/segment/train.py +3 -12
ultralytics/models/yolo/segment/val.py +106 -56
ultralytics/models/yolo/world/train.py +12 -16
ultralytics/models/yolo/world/train_world.py +11 -34
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +16 -23
ultralytics/models/yolo/yoloe/train.py +31 -56
ultralytics/models/yolo/yoloe/train_seg.py +5 -10
ultralytics/models/yolo/yoloe/val.py +16 -21
ultralytics/nn/__init__.py +7 -7
ultralytics/nn/autobackend.py +152 -80
ultralytics/nn/modules/__init__.py +60 -60
ultralytics/nn/modules/activation.py +4 -6
ultralytics/nn/modules/block.py +133 -217
ultralytics/nn/modules/conv.py +52 -97
ultralytics/nn/modules/head.py +64 -116
ultralytics/nn/modules/transformer.py +79 -89
ultralytics/nn/modules/utils.py +16 -21
ultralytics/nn/tasks.py +111 -156
ultralytics/nn/text_model.py +40 -67
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +11 -17
ultralytics/solutions/analytics.py +15 -16
ultralytics/solutions/config.py +5 -6
ultralytics/solutions/distance_calculation.py +10 -13
ultralytics/solutions/heatmap.py +7 -13
ultralytics/solutions/instance_segmentation.py +5 -8
ultralytics/solutions/object_blurrer.py +7 -10
ultralytics/solutions/object_counter.py +12 -19
ultralytics/solutions/object_cropper.py +8 -14
ultralytics/solutions/parking_management.py +33 -31
ultralytics/solutions/queue_management.py +10 -12
ultralytics/solutions/region_counter.py +9 -12
ultralytics/solutions/security_alarm.py +15 -20
ultralytics/solutions/similarity_search.py +13 -17
ultralytics/solutions/solutions.py +75 -74
ultralytics/solutions/speed_estimation.py +7 -10
ultralytics/solutions/streamlit_inference.py +4 -7
ultralytics/solutions/templates/similarity-search.html +7 -18
ultralytics/solutions/trackzone.py +7 -10
ultralytics/solutions/vision_eye.py +5 -8
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +3 -5
ultralytics/trackers/bot_sort.py +10 -27
ultralytics/trackers/byte_tracker.py +14 -30
ultralytics/trackers/track.py +3 -6
ultralytics/trackers/utils/gmc.py +11 -22
ultralytics/trackers/utils/kalman_filter.py +37 -48
ultralytics/trackers/utils/matching.py +12 -15
ultralytics/utils/__init__.py +116 -116
ultralytics/utils/autobatch.py +2 -4
ultralytics/utils/autodevice.py +17 -18
ultralytics/utils/benchmarks.py +70 -70
ultralytics/utils/callbacks/base.py +8 -10
ultralytics/utils/callbacks/clearml.py +5 -13
ultralytics/utils/callbacks/comet.py +32 -46
ultralytics/utils/callbacks/dvc.py +13 -18
ultralytics/utils/callbacks/mlflow.py +4 -5
ultralytics/utils/callbacks/neptune.py +7 -15
ultralytics/utils/callbacks/platform.py +314 -38
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +23 -31
ultralytics/utils/callbacks/wb.py +10 -13
ultralytics/utils/checks.py +151 -87
ultralytics/utils/cpu.py +3 -8
ultralytics/utils/dist.py +19 -15
ultralytics/utils/downloads.py +29 -41
ultralytics/utils/errors.py +6 -14
ultralytics/utils/events.py +2 -4
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +16 -16
ultralytics/utils/export/imx.py +325 -0
ultralytics/utils/export/tensorflow.py +231 -0
ultralytics/utils/files.py +24 -28
ultralytics/utils/git.py +9 -11
ultralytics/utils/instance.py +30 -51
ultralytics/utils/logger.py +212 -114
ultralytics/utils/loss.py +15 -24
ultralytics/utils/metrics.py +131 -160
ultralytics/utils/nms.py +21 -30
ultralytics/utils/ops.py +107 -165
ultralytics/utils/patches.py +33 -21
ultralytics/utils/plotting.py +122 -119
ultralytics/utils/tal.py +28 -44
ultralytics/utils/torch_utils.py +70 -187
ultralytics/utils/tqdm.py +20 -20
ultralytics/utils/triton.py +13 -19
ultralytics/utils/tuner.py +17 -5
dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0

ultralytics/nn/modules/head.py CHANGED Viewed

@@ -12,20 +12,19 @@ import torch.nn.functional as F
 from torch.nn.init import constant_, xavier_uniform_
 from ultralytics.utils import NOT_MACOS14
-from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
-from ultralytics.utils.torch_utils import disable_dynamo, fuse_conv_and_bn, smart_inference_mode
+from ultralytics.utils.tal import dist2bbox, dist2rbox, make_anchors
+from ultralytics.utils.torch_utils import TORCH_1_11, fuse_conv_and_bn, smart_inference_mode
 from .block import DFL, SAVPE, BNContrastiveHead, ContrastiveHead, Proto, Residual, SwiGLUFFN
 from .conv import Conv, DWConv
 from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
 from .utils import bias_init_with_prob, linear_init
-__all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder", "v10Detect", "YOLOEDetect", "YOLOESegment"
+__all__ = "OBB", "Classify", "Detect", "Pose", "RTDETRDecoder", "Segment", "YOLOEDetect", "YOLOESegment", "v10Detect"
 class Detect(nn.Module):
-    """
-    YOLO Detect head for object detection models.
+    """YOLO Detect head for object detection models.
     This class implements the detection head used in YOLO models for predicting bounding boxes and class probabilities.
     It supports both training and inference modes, with optional end-to-end detection capabilities.
@@ -78,8 +77,7 @@ class Detect(nn.Module):
     xyxy = False  # xyxy or xywh output
     def __init__(self, nc: int = 80, ch: tuple = ()):
-        """
-        Initialize the YOLO detection layer with specified number of classes and channels.
+        """Initialize the YOLO detection layer with specified number of classes and channels.
         Args:
             nc (int): Number of classes.
@@ -126,15 +124,14 @@ class Detect(nn.Module):
         return y if self.export else (y, x)
     def forward_end2end(self, x: list[torch.Tensor]) -> dict | tuple:
-        """
-        Perform forward pass of the v10Detect module.
+        """Perform forward pass of the v10Detect module.
         Args:
             x (list[torch.Tensor]): Input feature maps from different levels.
         Returns:
-            outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs.
-                Inference mode returns processed detections or tuple with detections and raw outputs.
+            outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs. Inference mode returns
+                processed detections or tuple with detections and raw outputs.
         """
         x_detach = [xi.detach() for xi in x]
         one2one = [
@@ -149,10 +146,8 @@ class Detect(nn.Module):
         y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
         return y if self.export else (y, {"one2many": x, "one2one": one2one})
-    @disable_dynamo
     def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
-        """
-        Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
+        """Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
         Args:
             x (list[torch.Tensor]): List of feature maps from different detection layers.
@@ -163,28 +158,12 @@ class Detect(nn.Module):
         # Inference path
         shape = x[0].shape  # BCHW
         x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
-        if self.format != "imx" and (self.dynamic or self.shape != shape):
+        if self.dynamic or self.shape != shape:
             self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
             self.shape = shape
-        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
-            box = x_cat[:, : self.reg_max * 4]
-            cls = x_cat[:, self.reg_max * 4 :]
-        else:
-            box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
-        if self.export and self.format in {"tflite", "edgetpu"}:
-            # Precompute normalization factor to increase numerical stability
-            # See https://github.com/ultralytics/ultralytics/issues/7371
-            grid_h = shape[2]
-            grid_w = shape[3]
-            grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
-            norm = self.strides / (self.stride[0] * grid_size)
-            dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
-        else:
-            dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
-        if self.export and self.format == "imx":
-            return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
+        box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
+        dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
         return torch.cat((dbox, cls.sigmoid()), 1)
     def bias_init(self):
@@ -211,8 +190,7 @@ class Detect(nn.Module):
     @staticmethod
     def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80) -> torch.Tensor:
-        """
-        Post-process YOLO model predictions.
+        """Post-process YOLO model predictions.
         Args:
             preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
@@ -235,8 +213,7 @@ class Detect(nn.Module):
 class Segment(Detect):
-    """
-    YOLO Segment head for segmentation models.
+    """YOLO Segment head for segmentation models.
     This class extends the Detect head to include mask prediction capabilities for instance segmentation tasks.
@@ -257,8 +234,7 @@ class Segment(Detect):
     """
     def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: tuple = ()):
-        """
-        Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
+        """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
         Args:
             nc (int): Number of classes.
@@ -287,8 +263,7 @@ class Segment(Detect):
 class OBB(Detect):
-    """
-    YOLO OBB detection head for detection with rotation models.
+    """YOLO OBB detection head for detection with rotation models.
     This class extends the Detect head to include oriented bounding box prediction with rotation angles.
@@ -309,8 +284,7 @@ class OBB(Detect):
     """
     def __init__(self, nc: int = 80, ne: int = 1, ch: tuple = ()):
-        """
-        Initialize OBB with number of classes `nc` and layer channels `ch`.
+        """Initialize OBB with number of classes `nc` and layer channels `ch`.
         Args:
             nc (int): Number of classes.
@@ -343,8 +317,7 @@ class OBB(Detect):
 class Pose(Detect):
-    """
-    YOLO Pose head for keypoints models.
+    """YOLO Pose head for keypoints models.
     This class extends the Detect head to include keypoint prediction capabilities for pose estimation tasks.
@@ -365,8 +338,7 @@ class Pose(Detect):
     """
     def __init__(self, nc: int = 80, kpt_shape: tuple = (17, 3), ch: tuple = ()):
-        """
-        Initialize YOLO network with default parameters and Convolutional Layers.
+        """Initialize YOLO network with default parameters and Convolutional Layers.
         Args:
             nc (int): Number of classes.
@@ -388,28 +360,15 @@ class Pose(Detect):
         if self.training:
             return x, kpt
         pred_kpt = self.kpts_decode(bs, kpt)
-        if self.export and self.format == "imx":
-            return (*x, pred_kpt.permute(0, 2, 1))
         return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
     def kpts_decode(self, bs: int, kpts: torch.Tensor) -> torch.Tensor:
         """Decode keypoints from predictions."""
         ndim = self.kpt_shape[1]
         if self.export:
-            if self.format in {
-                "tflite",
-                "edgetpu",
-            }:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
-                # Precompute normalization factor to increase numerical stability
-                y = kpts.view(bs, *self.kpt_shape, -1)
-                grid_h, grid_w = self.shape[2], self.shape[3]
-                grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
-                norm = self.strides / (self.stride[0] * grid_size)
-                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
-            else:
-                # NCNN fix
-                y = kpts.view(bs, *self.kpt_shape, -1)
-                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
+            # NCNN fix
+            y = kpts.view(bs, *self.kpt_shape, -1)
+            a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
             if ndim == 3:
                 a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
             return a.view(bs, self.nk, -1)
@@ -426,8 +385,7 @@ class Pose(Detect):
 class Classify(nn.Module):
-    """
-    YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).
+    """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).
     This class implements a classification head that transforms feature maps into class predictions.
@@ -451,8 +409,7 @@ class Classify(nn.Module):
     export = False  # export mode
     def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
-        """
-        Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
+        """Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
         Args:
             c1 (int): Number of input channels.
@@ -481,11 +438,10 @@ class Classify(nn.Module):
 class WorldDetect(Detect):
-    """
-    Head for integrating YOLO detection models with semantic understanding from text embeddings.
+    """Head for integrating YOLO detection models with semantic understanding from text embeddings.
-    This class extends the standard Detect head to incorporate text embeddings for enhanced semantic understanding
-    in object detection tasks.
+    This class extends the standard Detect head to incorporate text embeddings for enhanced semantic understanding in
+    object detection tasks.
     Attributes:
         cv3 (nn.ModuleList): Convolution layers for embedding features.
@@ -504,8 +460,7 @@ class WorldDetect(Detect):
     """
     def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
-        """
-        Initialize YOLO detection layer with nc classes and layer channels ch.
+        """Initialize YOLO detection layer with nc classes and layer channels ch.
         Args:
             nc (int): Number of classes.
@@ -539,11 +494,10 @@ class WorldDetect(Detect):
 class LRPCHead(nn.Module):
-    """
-    Lightweight Region Proposal and Classification Head for efficient object detection.
+    """Lightweight Region Proposal and Classification Head for efficient object detection.
-    This head combines region proposal filtering with classification to enable efficient detection with
-    dynamic vocabulary support.
+    This head combines region proposal filtering with classification to enable efficient detection with dynamic
+    vocabulary support.
     Attributes:
         vocab (nn.Module): Vocabulary/classification layer.
@@ -564,8 +518,7 @@ class LRPCHead(nn.Module):
     """
     def __init__(self, vocab: nn.Module, pf: nn.Module, loc: nn.Module, enabled: bool = True):
-        """
-        Initialize LRPCHead with vocabulary, proposal filter, and localization components.
+        """Initialize LRPCHead with vocabulary, proposal filter, and localization components.
         Args:
             vocab (nn.Module): Vocabulary/classification module.
@@ -579,7 +532,8 @@ class LRPCHead(nn.Module):
         self.loc = loc
         self.enabled = enabled
-    def conv2linear(self, conv: nn.Conv2d) -> nn.Linear:
+    @staticmethod
+    def conv2linear(conv: nn.Conv2d) -> nn.Linear:
         """Convert a 1x1 convolutional layer to a linear layer."""
         assert isinstance(conv, nn.Conv2d) and conv.kernel_size == (1, 1)
         linear = nn.Linear(conv.in_channels, conv.out_channels)
@@ -604,8 +558,7 @@ class LRPCHead(nn.Module):
 class YOLOEDetect(Detect):
-    """
-    Head for integrating YOLO detection models with semantic understanding from text embeddings.
+    """Head for integrating YOLO detection models with semantic understanding from text embeddings.
     This class extends the standard Detect head to support text-guided detection with enhanced semantic understanding
     through text embeddings and visual prompt embeddings.
@@ -637,8 +590,7 @@ class YOLOEDetect(Detect):
     is_fused = False
     def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
-        """
-        Initialize YOLO detection layer with nc classes and layer channels ch.
+        """Initialize YOLO detection layer with nc classes and layer channels ch.
         Args:
             nc (int): Number of classes.
@@ -792,11 +744,10 @@ class YOLOEDetect(Detect):
 class YOLOESegment(YOLOEDetect):
-    """
-    YOLO segmentation head with text embedding capabilities.
+    """YOLO segmentation head with text embedding capabilities.
-    This class extends YOLOEDetect to include mask prediction capabilities for instance segmentation tasks
-    with text-guided semantic understanding.
+    This class extends YOLOEDetect to include mask prediction capabilities for instance segmentation tasks with
+    text-guided semantic understanding.
     Attributes:
         nm (int): Number of masks.
@@ -818,8 +769,7 @@ class YOLOESegment(YOLOEDetect):
     def __init__(
         self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: tuple = ()
     ):
-        """
-        Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
+        """Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
         Args:
             nc (int): Number of classes.
@@ -860,8 +810,7 @@ class YOLOESegment(YOLOEDetect):
 class RTDETRDecoder(nn.Module):
-    """
-    Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
+    """Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
     This decoder module utilizes Transformer architecture along with deformable convolutions to predict bounding boxes
     and class labels for objects in an image. It integrates features from multiple layers and runs through a series of
@@ -901,6 +850,10 @@ class RTDETRDecoder(nn.Module):
     """
     export = False  # export mode
+    shapes = []
+    anchors = torch.empty(0)
+    valid_mask = torch.empty(0)
+    dynamic = False
     def __init__(
         self,
@@ -921,8 +874,7 @@ class RTDETRDecoder(nn.Module):
         box_noise_scale: float = 1.0,
         learnt_init_query: bool = False,
     ):
-        """
-        Initialize the RTDETRDecoder module with the given parameters.
+        """Initialize the RTDETRDecoder module with the given parameters.
         Args:
             nc (int): Number of classes.
@@ -982,8 +934,7 @@ class RTDETRDecoder(nn.Module):
         self._reset_parameters()
     def forward(self, x: list[torch.Tensor], batch: dict | None = None) -> tuple | torch.Tensor:
-        """
-        Run the forward pass of the module, returning bounding box and classification scores for the input.
+        """Run the forward pass of the module, returning bounding box and classification scores for the input.
         Args:
             x (list[torch.Tensor]): List of feature maps from the backbone.
@@ -1031,16 +982,15 @@ class RTDETRDecoder(nn.Module):
         y = torch.cat((dec_bboxes.squeeze(0), dec_scores.squeeze(0).sigmoid()), -1)
         return y if self.export else (y, x)
+    @staticmethod
     def _generate_anchors(
-        self,
         shapes: list[list[int]],
         grid_size: float = 0.05,
         dtype: torch.dtype = torch.float32,
         device: str = "cpu",
         eps: float = 1e-2,
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        Generate anchor bounding boxes for given shapes with specific grid size and validate them.
+        """Generate anchor bounding boxes for given shapes with specific grid size and validate them.
         Args:
             shapes (list): List of feature map shapes.
@@ -1057,7 +1007,7 @@ class RTDETRDecoder(nn.Module):
         for i, (h, w) in enumerate(shapes):
             sy = torch.arange(end=h, dtype=dtype, device=device)
             sx = torch.arange(end=w, dtype=dtype, device=device)
-            grid_y, grid_x = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
+            grid_y, grid_x = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_11 else torch.meshgrid(sy, sx)
             grid_xy = torch.stack([grid_x, grid_y], -1)  # (h, w, 2)
             valid_WH = torch.tensor([w, h], dtype=dtype, device=device)
@@ -1072,8 +1022,7 @@ class RTDETRDecoder(nn.Module):
         return anchors, valid_mask
     def _get_encoder_input(self, x: list[torch.Tensor]) -> tuple[torch.Tensor, list[list[int]]]:
-        """
-        Process and return encoder inputs by getting projection features from input and concatenating them.
+        """Process and return encoder inputs by getting projection features from input and concatenating them.
         Args:
             x (list[torch.Tensor]): List of feature maps from the backbone.
@@ -1105,8 +1054,7 @@ class RTDETRDecoder(nn.Module):
         dn_embed: torch.Tensor | None = None,
         dn_bbox: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """
-        Generate and prepare the input required for the decoder from the provided features and shapes.
+        """Generate and prepare the input required for the decoder from the provided features and shapes.
         Args:
             feats (torch.Tensor): Processed features from encoder.
@@ -1121,22 +1069,24 @@ class RTDETRDecoder(nn.Module):
             enc_scores (torch.Tensor): Encoded scores.
         """
         bs = feats.shape[0]
-        # Prepare input for decoder
-        anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
-        features = self.enc_output(valid_mask * feats)  # bs, h*w, 256
+        if self.dynamic or self.shapes != shapes:
+            self.anchors, self.valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
+            self.shapes = shapes
+        # Prepare input for decoder
+        features = self.enc_output(self.valid_mask * feats)  # bs, h*w, 256
         enc_outputs_scores = self.enc_score_head(features)  # (bs, h*w, nc)
         # Query selection
-        # (bs, num_queries)
+        # (bs*num_queries,)
         topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
-        # (bs, num_queries)
+        # (bs*num_queries,)
         batch_ind = torch.arange(end=bs, dtype=topk_ind.dtype).unsqueeze(-1).repeat(1, self.num_queries).view(-1)
         # (bs, num_queries, 256)
         top_k_features = features[batch_ind, topk_ind].view(bs, self.num_queries, -1)
         # (bs, num_queries, 4)
-        top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
+        top_k_anchors = self.anchors[:, topk_ind].view(bs, self.num_queries, -1)
         # Dynamic anchors + static content
         refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
@@ -1182,11 +1132,10 @@ class RTDETRDecoder(nn.Module):
 class v10Detect(Detect):
-    """
-    v10 Detection head from https://arxiv.org/pdf/2405.14458.
+    """v10 Detection head from https://arxiv.org/pdf/2405.14458.
-    This class implements the YOLOv10 detection head with dual-assignment training and consistent dual predictions
-    for improved efficiency and performance.
+    This class implements the YOLOv10 detection head with dual-assignment training and consistent dual predictions for
+    improved efficiency and performance.
     Attributes:
         end2end (bool): End-to-end detection mode.
@@ -1210,8 +1159,7 @@ class v10Detect(Detect):
     end2end = True
     def __init__(self, nc: int = 80, ch: tuple = ()):
-        """
-        Initialize the v10Detect object with the specified number of classes and input channels.
+        """Initialize the v10Detect object with the specified number of classes and input channels.
         Args:
             nc (int): Number of classes.

dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl