PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (243) hide show

{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
tests/__init__.py +5 -7
tests/conftest.py +8 -15
tests/test_cli.py +8 -10
tests/test_cuda.py +9 -10
tests/test_engine.py +29 -2
tests/test_exports.py +69 -21
tests/test_integrations.py +8 -11
tests/test_python.py +109 -71
tests/test_solutions.py +170 -159
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +57 -64
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/Objects365.yaml +19 -15
ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +19 -21
ultralytics/cfg/datasets/VisDrone.yaml +5 -5
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco-pose.yaml +24 -2
ultralytics/cfg/datasets/coco.yaml +2 -2
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/dog-pose.yaml +28 -0
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
ultralytics/cfg/datasets/kitti.yaml +27 -0
ultralytics/cfg/datasets/lvis.yaml +7 -7
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/default.yaml +96 -94
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
ultralytics/cfg/models/v6/yolov6.yaml +1 -1
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +3 -4
ultralytics/data/augment.py +286 -476
ultralytics/data/base.py +18 -26
ultralytics/data/build.py +151 -26
ultralytics/data/converter.py +38 -50
ultralytics/data/dataset.py +47 -75
ultralytics/data/loaders.py +42 -49
ultralytics/data/split.py +5 -6
ultralytics/data/split_dota.py +8 -15
ultralytics/data/utils.py +41 -45
ultralytics/engine/exporter.py +462 -462
ultralytics/engine/model.py +150 -191
ultralytics/engine/predictor.py +30 -40
ultralytics/engine/results.py +177 -311
ultralytics/engine/trainer.py +193 -120
ultralytics/engine/tuner.py +77 -63
ultralytics/engine/validator.py +39 -22
ultralytics/hub/__init__.py +16 -19
ultralytics/hub/auth.py +6 -12
ultralytics/hub/google/__init__.py +7 -10
ultralytics/hub/session.py +15 -25
ultralytics/hub/utils.py +5 -8
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +8 -10
ultralytics/models/fastsam/predict.py +19 -30
ultralytics/models/fastsam/utils.py +1 -2
ultralytics/models/fastsam/val.py +5 -7
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +5 -8
ultralytics/models/nas/predict.py +7 -9
ultralytics/models/nas/val.py +1 -2
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +7 -8
ultralytics/models/rtdetr/predict.py +15 -19
ultralytics/models/rtdetr/train.py +10 -13
ultralytics/models/rtdetr/val.py +21 -23
ultralytics/models/sam/__init__.py +15 -2
ultralytics/models/sam/amg.py +14 -20
ultralytics/models/sam/build.py +26 -19
ultralytics/models/sam/build_sam3.py +377 -0
ultralytics/models/sam/model.py +29 -32
ultralytics/models/sam/modules/blocks.py +83 -144
ultralytics/models/sam/modules/decoders.py +22 -40
ultralytics/models/sam/modules/encoders.py +44 -101
ultralytics/models/sam/modules/memory_attention.py +16 -30
ultralytics/models/sam/modules/sam.py +206 -79
ultralytics/models/sam/modules/tiny_encoder.py +64 -83
ultralytics/models/sam/modules/transformer.py +18 -28
ultralytics/models/sam/modules/utils.py +174 -50
ultralytics/models/sam/predict.py +2268 -366
ultralytics/models/sam/sam3/__init__.py +3 -0
ultralytics/models/sam/sam3/decoder.py +546 -0
ultralytics/models/sam/sam3/encoder.py +529 -0
ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
ultralytics/models/sam/sam3/model_misc.py +199 -0
ultralytics/models/sam/sam3/necks.py +129 -0
ultralytics/models/sam/sam3/sam3_image.py +339 -0
ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
ultralytics/models/sam/sam3/vitdet.py +547 -0
ultralytics/models/sam/sam3/vl_combiner.py +160 -0
ultralytics/models/utils/loss.py +14 -26
ultralytics/models/utils/ops.py +13 -17
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +9 -12
ultralytics/models/yolo/classify/train.py +15 -41
ultralytics/models/yolo/classify/val.py +34 -32
ultralytics/models/yolo/detect/predict.py +8 -11
ultralytics/models/yolo/detect/train.py +13 -32
ultralytics/models/yolo/detect/val.py +75 -63
ultralytics/models/yolo/model.py +37 -53
ultralytics/models/yolo/obb/predict.py +5 -14
ultralytics/models/yolo/obb/train.py +11 -14
ultralytics/models/yolo/obb/val.py +42 -39
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +7 -22
ultralytics/models/yolo/pose/train.py +10 -22
ultralytics/models/yolo/pose/val.py +40 -59
ultralytics/models/yolo/segment/predict.py +16 -20
ultralytics/models/yolo/segment/train.py +3 -12
ultralytics/models/yolo/segment/val.py +106 -56
ultralytics/models/yolo/world/train.py +12 -16
ultralytics/models/yolo/world/train_world.py +11 -34
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +16 -23
ultralytics/models/yolo/yoloe/train.py +31 -56
ultralytics/models/yolo/yoloe/train_seg.py +5 -10
ultralytics/models/yolo/yoloe/val.py +16 -21
ultralytics/nn/__init__.py +7 -7
ultralytics/nn/autobackend.py +152 -80
ultralytics/nn/modules/__init__.py +60 -60
ultralytics/nn/modules/activation.py +4 -6
ultralytics/nn/modules/block.py +133 -217
ultralytics/nn/modules/conv.py +52 -97
ultralytics/nn/modules/head.py +64 -116
ultralytics/nn/modules/transformer.py +79 -89
ultralytics/nn/modules/utils.py +16 -21
ultralytics/nn/tasks.py +111 -156
ultralytics/nn/text_model.py +40 -67
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +11 -17
ultralytics/solutions/analytics.py +15 -16
ultralytics/solutions/config.py +5 -6
ultralytics/solutions/distance_calculation.py +10 -13
ultralytics/solutions/heatmap.py +7 -13
ultralytics/solutions/instance_segmentation.py +5 -8
ultralytics/solutions/object_blurrer.py +7 -10
ultralytics/solutions/object_counter.py +12 -19
ultralytics/solutions/object_cropper.py +8 -14
ultralytics/solutions/parking_management.py +33 -31
ultralytics/solutions/queue_management.py +10 -12
ultralytics/solutions/region_counter.py +9 -12
ultralytics/solutions/security_alarm.py +15 -20
ultralytics/solutions/similarity_search.py +13 -17
ultralytics/solutions/solutions.py +75 -74
ultralytics/solutions/speed_estimation.py +7 -10
ultralytics/solutions/streamlit_inference.py +4 -7
ultralytics/solutions/templates/similarity-search.html +7 -18
ultralytics/solutions/trackzone.py +7 -10
ultralytics/solutions/vision_eye.py +5 -8
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +3 -5
ultralytics/trackers/bot_sort.py +10 -27
ultralytics/trackers/byte_tracker.py +14 -30
ultralytics/trackers/track.py +3 -6
ultralytics/trackers/utils/gmc.py +11 -22
ultralytics/trackers/utils/kalman_filter.py +37 -48
ultralytics/trackers/utils/matching.py +12 -15
ultralytics/utils/__init__.py +116 -116
ultralytics/utils/autobatch.py +2 -4
ultralytics/utils/autodevice.py +17 -18
ultralytics/utils/benchmarks.py +70 -70
ultralytics/utils/callbacks/base.py +8 -10
ultralytics/utils/callbacks/clearml.py +5 -13
ultralytics/utils/callbacks/comet.py +32 -46
ultralytics/utils/callbacks/dvc.py +13 -18
ultralytics/utils/callbacks/mlflow.py +4 -5
ultralytics/utils/callbacks/neptune.py +7 -15
ultralytics/utils/callbacks/platform.py +314 -38
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +23 -31
ultralytics/utils/callbacks/wb.py +10 -13
ultralytics/utils/checks.py +151 -87
ultralytics/utils/cpu.py +3 -8
ultralytics/utils/dist.py +19 -15
ultralytics/utils/downloads.py +29 -41
ultralytics/utils/errors.py +6 -14
ultralytics/utils/events.py +2 -4
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +16 -16
ultralytics/utils/export/imx.py +325 -0
ultralytics/utils/export/tensorflow.py +231 -0
ultralytics/utils/files.py +24 -28
ultralytics/utils/git.py +9 -11
ultralytics/utils/instance.py +30 -51
ultralytics/utils/logger.py +212 -114
ultralytics/utils/loss.py +15 -24
ultralytics/utils/metrics.py +131 -160
ultralytics/utils/nms.py +21 -30
ultralytics/utils/ops.py +107 -165
ultralytics/utils/patches.py +33 -21
ultralytics/utils/plotting.py +122 -119
ultralytics/utils/tal.py +28 -44
ultralytics/utils/torch_utils.py +70 -187
ultralytics/utils/tqdm.py +20 -20
ultralytics/utils/triton.py +13 -19
ultralytics/utils/tuner.py +17 -5
dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0

ultralytics/nn/modules/transformer.py CHANGED Viewed

@@ -10,26 +10,27 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn.init import constant_, xavier_uniform_
+from ultralytics.utils.torch_utils import TORCH_1_11
 from .conv import Conv
 from .utils import _get_clones, inverse_sigmoid, multi_scale_deformable_attn_pytorch
 __all__ = (
-    "TransformerEncoderLayer",
-    "TransformerLayer",
-    "TransformerBlock",
-    "MLPBlock",
-    "LayerNorm2d",
     "AIFI",
+    "MLP",
     "DeformableTransformerDecoder",
     "DeformableTransformerDecoderLayer",
+    "LayerNorm2d",
+    "MLPBlock",
     "MSDeformAttn",
-    "MLP",
+    "TransformerBlock",
+    "TransformerEncoderLayer",
+    "TransformerLayer",
 )
 class TransformerEncoderLayer(nn.Module):
-    """
-    A single layer of the transformer encoder.
+    """A single layer of the transformer encoder.
     This class implements a standard transformer encoder layer with multi-head attention and feedforward network,
     supporting both pre-normalization and post-normalization configurations.
@@ -56,8 +57,7 @@ class TransformerEncoderLayer(nn.Module):
         act: nn.Module = nn.GELU(),
         normalize_before: bool = False,
     ):
-        """
-        Initialize the TransformerEncoderLayer with specified parameters.
+        """Initialize the TransformerEncoderLayer with specified parameters.
         Args:
             c1 (int): Input dimension.
@@ -100,8 +100,7 @@ class TransformerEncoderLayer(nn.Module):
         src_key_padding_mask: torch.Tensor | None = None,
         pos: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        """
-        Perform forward pass with post-normalization.
+        """Perform forward pass with post-normalization.
         Args:
             src (torch.Tensor): Input tensor.
@@ -127,8 +126,7 @@ class TransformerEncoderLayer(nn.Module):
         src_key_padding_mask: torch.Tensor | None = None,
         pos: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        """
-        Perform forward pass with pre-normalization.
+        """Perform forward pass with pre-normalization.
         Args:
             src (torch.Tensor): Input tensor.
@@ -154,8 +152,7 @@ class TransformerEncoderLayer(nn.Module):
         src_key_padding_mask: torch.Tensor | None = None,
         pos: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        """
-        Forward propagate the input through the encoder module.
+        """Forward propagate the input through the encoder module.
         Args:
             src (torch.Tensor): Input tensor.
@@ -172,8 +169,7 @@ class TransformerEncoderLayer(nn.Module):
 class AIFI(TransformerEncoderLayer):
-    """
-    AIFI transformer layer for 2D data with positional embeddings.
+    """AIFI transformer layer for 2D data with positional embeddings.
     This class extends TransformerEncoderLayer to work with 2D feature maps by adding 2D sine-cosine positional
     embeddings and handling the spatial dimensions appropriately.
@@ -188,8 +184,7 @@ class AIFI(TransformerEncoderLayer):
         act: nn.Module = nn.GELU(),
         normalize_before: bool = False,
     ):
-        """
-        Initialize the AIFI instance with specified parameters.
+        """Initialize the AIFI instance with specified parameters.
         Args:
             c1 (int): Input dimension.
@@ -202,8 +197,7 @@ class AIFI(TransformerEncoderLayer):
         super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass for the AIFI transformer layer.
+        """Forward pass for the AIFI transformer layer.
         Args:
             x (torch.Tensor): Input tensor with shape [B, C, H, W].
@@ -221,8 +215,7 @@ class AIFI(TransformerEncoderLayer):
     def build_2d_sincos_position_embedding(
         w: int, h: int, embed_dim: int = 256, temperature: float = 10000.0
     ) -> torch.Tensor:
-        """
-        Build 2D sine-cosine position embedding.
+        """Build 2D sine-cosine position embedding.
         Args:
             w (int): Width of the feature map.
@@ -236,7 +229,7 @@ class AIFI(TransformerEncoderLayer):
         assert embed_dim % 4 == 0, "Embed dimension must be divisible by 4 for 2D sin-cos position embedding"
         grid_w = torch.arange(w, dtype=torch.float32)
         grid_h = torch.arange(h, dtype=torch.float32)
-        grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing="ij")
+        grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing="ij") if TORCH_1_11 else torch.meshgrid(grid_w, grid_h)
         pos_dim = embed_dim // 4
         omega = torch.arange(pos_dim, dtype=torch.float32) / pos_dim
         omega = 1.0 / (temperature**omega)
@@ -251,8 +244,7 @@ class TransformerLayer(nn.Module):
     """Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
     def __init__(self, c: int, num_heads: int):
-        """
-        Initialize a self-attention mechanism using linear transformations and multi-head attention.
+        """Initialize a self-attention mechanism using linear transformations and multi-head attention.
         Args:
             c (int): Input and output channel dimension.
@@ -267,8 +259,7 @@ class TransformerLayer(nn.Module):
         self.fc2 = nn.Linear(c, c, bias=False)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Apply a transformer block to the input x and return the output.
+        """Apply a transformer block to the input x and return the output.
         Args:
             x (torch.Tensor): Input tensor.
@@ -281,11 +272,10 @@ class TransformerLayer(nn.Module):
 class TransformerBlock(nn.Module):
-    """
-    Vision Transformer block based on https://arxiv.org/abs/2010.11929.
+    """Vision Transformer block based on https://arxiv.org/abs/2010.11929.
-    This class implements a complete transformer block with optional convolution layer for channel adjustment,
-    learnable position embedding, and multiple transformer layers.
+    This class implements a complete transformer block with optional convolution layer for channel adjustment, learnable
+    position embedding, and multiple transformer layers.
     Attributes:
         conv (Conv, optional): Convolution layer if input and output channels differ.
@@ -295,8 +285,7 @@ class TransformerBlock(nn.Module):
     """
     def __init__(self, c1: int, c2: int, num_heads: int, num_layers: int):
-        """
-        Initialize a Transformer module with position embedding and specified number of heads and layers.
+        """Initialize a Transformer module with position embedding and specified number of heads and layers.
         Args:
             c1 (int): Input channel dimension.
@@ -313,28 +302,26 @@ class TransformerBlock(nn.Module):
         self.c2 = c2
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward propagate the input through the transformer block.
+        """Forward propagate the input through the transformer block.
         Args:
-            x (torch.Tensor): Input tensor with shape [b, c1, w, h].
+            x (torch.Tensor): Input tensor with shape [b, c1, h, w].
         Returns:
-            (torch.Tensor): Output tensor with shape [b, c2, w, h].
+            (torch.Tensor): Output tensor with shape [b, c2, h, w].
         """
         if self.conv is not None:
             x = self.conv(x)
-        b, _, w, h = x.shape
+        b, _, h, w = x.shape
         p = x.flatten(2).permute(2, 0, 1)
-        return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
+        return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, h, w)
 class MLPBlock(nn.Module):
     """A single block of a multi-layer perceptron."""
     def __init__(self, embedding_dim: int, mlp_dim: int, act=nn.GELU):
-        """
-        Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.
+        """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.
         Args:
             embedding_dim (int): Input and output dimension.
@@ -347,8 +334,7 @@ class MLPBlock(nn.Module):
         self.act = act()
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass for the MLPBlock.
+        """Forward pass for the MLPBlock.
         Args:
             x (torch.Tensor): Input tensor.
@@ -360,11 +346,10 @@ class MLPBlock(nn.Module):
 class MLP(nn.Module):
-    """
-    A simple multi-layer perceptron (also called FFN).
+    """A simple multi-layer perceptron (also called FFN).
-    This class implements a configurable MLP with multiple linear layers, activation functions, and optional
-    sigmoid output activation.
+    This class implements a configurable MLP with multiple linear layers, activation functions, and optional sigmoid
+    output activation.
     Attributes:
         num_layers (int): Number of layers in the MLP.
@@ -374,10 +359,17 @@ class MLP(nn.Module):
     """
     def __init__(
-        self, input_dim: int, hidden_dim: int, output_dim: int, num_layers: int, act=nn.ReLU, sigmoid: bool = False
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        act=nn.ReLU,
+        sigmoid: bool = False,
+        residual: bool = False,
+        out_norm: nn.Module = None,
     ):
-        """
-        Initialize the MLP with specified input, hidden, output dimensions and number of layers.
+        """Initialize the MLP with specified input, hidden, output dimensions and number of layers.
         Args:
             input_dim (int): Input dimension.
@@ -386,17 +378,24 @@ class MLP(nn.Module):
             num_layers (int): Number of layers.
             act (nn.Module): Activation function.
             sigmoid (bool): Whether to apply sigmoid to the output.
+            residual (bool): Whether to use residual connections.
+            out_norm (nn.Module, optional): Normalization layer for the output.
         """
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
-        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim, *h], [*h, output_dim]))
         self.sigmoid = sigmoid
         self.act = act()
+        if residual and input_dim != output_dim:
+            raise ValueError("residual is only supported if input_dim == output_dim")
+        self.residual = residual
+        # whether to apply a normalization layer to the output
+        assert isinstance(out_norm, nn.Module) or out_norm is None
+        self.out_norm = out_norm or nn.Identity()
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass for the entire MLP.
+        """Forward pass for the entire MLP.
         Args:
             x (torch.Tensor): Input tensor.
@@ -404,17 +403,20 @@ class MLP(nn.Module):
         Returns:
             (torch.Tensor): Output tensor after MLP.
         """
+        orig_x = x
         for i, layer in enumerate(self.layers):
             x = getattr(self, "act", nn.ReLU())(layer(x)) if i < self.num_layers - 1 else layer(x)
+        if getattr(self, "residual", False):
+            x = x + orig_x
+        x = getattr(self, "out_norm", nn.Identity())(x)
         return x.sigmoid() if getattr(self, "sigmoid", False) else x
 class LayerNorm2d(nn.Module):
-    """
-    2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
+    """2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
-    This class implements layer normalization for 2D feature maps, normalizing across the channel dimension
-    while preserving spatial dimensions.
+    This class implements layer normalization for 2D feature maps, normalizing across the channel dimension while
+    preserving spatial dimensions.
     Attributes:
         weight (nn.Parameter): Learnable scale parameter.
@@ -427,8 +429,7 @@ class LayerNorm2d(nn.Module):
     """
     def __init__(self, num_channels: int, eps: float = 1e-6):
-        """
-        Initialize LayerNorm2d with the given parameters.
+        """Initialize LayerNorm2d with the given parameters.
         Args:
             num_channels (int): Number of channels in the input.
@@ -440,8 +441,7 @@ class LayerNorm2d(nn.Module):
         self.eps = eps
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Perform forward pass for 2D layer normalization.
+        """Perform forward pass for 2D layer normalization.
         Args:
             x (torch.Tensor): Input tensor.
@@ -456,11 +456,10 @@ class LayerNorm2d(nn.Module):
 class MSDeformAttn(nn.Module):
-    """
-    Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
+    """Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
-    This module implements multiscale deformable attention that can attend to features at multiple scales
-    with learnable sampling locations and attention weights.
+    This module implements multiscale deformable attention that can attend to features at multiple scales with learnable
+    sampling locations and attention weights.
     Attributes:
         im2col_step (int): Step size for im2col operations.
@@ -478,8 +477,7 @@ class MSDeformAttn(nn.Module):
     """
     def __init__(self, d_model: int = 256, n_levels: int = 4, n_heads: int = 8, n_points: int = 4):
-        """
-        Initialize MSDeformAttn with the given parameters.
+        """Initialize MSDeformAttn with the given parameters.
         Args:
             d_model (int): Model dimension.
@@ -537,13 +535,12 @@ class MSDeformAttn(nn.Module):
         value_shapes: list,
         value_mask: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        """
-        Perform forward pass for multiscale deformable attention.
+        """Perform forward pass for multiscale deformable attention.
         Args:
             query (torch.Tensor): Query tensor with shape [bs, query_length, C].
-            refer_bbox (torch.Tensor): Reference bounding boxes with shape [bs, query_length, n_levels, 2],
-                range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area.
+            refer_bbox (torch.Tensor): Reference bounding boxes with shape [bs, query_length, n_levels, 2], range in [0,
+                1], top-left (0,0), bottom-right (1, 1), including padding area.
             value (torch.Tensor): Value tensor with shape [bs, value_length, C].
             value_shapes (list): List with shape [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})].
             value_mask (torch.Tensor, optional): Mask tensor with shape [bs, value_length], True for non-padding
@@ -582,8 +579,7 @@ class MSDeformAttn(nn.Module):
 class DeformableTransformerDecoderLayer(nn.Module):
-    """
-    Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
+    """Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
     This class implements a single decoder layer with self-attention, cross-attention using multiscale deformable
     attention, and a feedforward network.
@@ -617,8 +613,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
         n_levels: int = 4,
         n_points: int = 4,
     ):
-        """
-        Initialize the DeformableTransformerDecoderLayer with the given parameters.
+        """Initialize the DeformableTransformerDecoderLayer with the given parameters.
         Args:
             d_model (int): Model dimension.
@@ -655,8 +650,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
         return tensor if pos is None else tensor + pos
     def forward_ffn(self, tgt: torch.Tensor) -> torch.Tensor:
-        """
-        Perform forward pass through the Feed-Forward Network part of the layer.
+        """Perform forward pass through the Feed-Forward Network part of the layer.
         Args:
             tgt (torch.Tensor): Input tensor.
@@ -678,8 +672,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
         attn_mask: torch.Tensor | None = None,
         query_pos: torch.Tensor | None = None,
     ) -> torch.Tensor:
-        """
-        Perform the forward pass through the entire decoder layer.
+        """Perform the forward pass through the entire decoder layer.
         Args:
             embed (torch.Tensor): Input embeddings.
@@ -713,11 +706,10 @@ class DeformableTransformerDecoderLayer(nn.Module):
 class DeformableTransformerDecoder(nn.Module):
-    """
-    Deformable Transformer Decoder based on PaddleDetection implementation.
+    """Deformable Transformer Decoder based on PaddleDetection implementation.
-    This class implements a complete deformable transformer decoder with multiple decoder layers and prediction
-    heads for bounding box regression and classification.
+    This class implements a complete deformable transformer decoder with multiple decoder layers and prediction heads
+    for bounding box regression and classification.
     Attributes:
         layers (nn.ModuleList): List of decoder layers.
@@ -730,8 +722,7 @@ class DeformableTransformerDecoder(nn.Module):
     """
     def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1):
-        """
-        Initialize the DeformableTransformerDecoder with the given parameters.
+        """Initialize the DeformableTransformerDecoder with the given parameters.
         Args:
             hidden_dim (int): Hidden dimension.
@@ -757,8 +748,7 @@ class DeformableTransformerDecoder(nn.Module):
         attn_mask: torch.Tensor | None = None,
         padding_mask: torch.Tensor | None = None,
     ):
-        """
-        Perform the forward pass through the entire decoder.
+        """Perform the forward pass through the entire decoder.
         Args:
             embed (torch.Tensor): Decoder embeddings.

ultralytics/nn/modules/utils.py CHANGED Viewed

@@ -9,12 +9,11 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn.init import uniform_
-__all__ = "multi_scale_deformable_attn_pytorch", "inverse_sigmoid"
+__all__ = "inverse_sigmoid", "multi_scale_deformable_attn_pytorch"
 def _get_clones(module, n):
-    """
-    Create a list of cloned modules from the given module.
+    """Create a list of cloned modules from the given module.
     Args:
         module (nn.Module): The module to be cloned.
@@ -34,12 +33,11 @@ def _get_clones(module, n):
 def bias_init_with_prob(prior_prob=0.01):
-    """
-    Initialize conv/fc bias value according to a given probability value.
+    """Initialize conv/fc bias value according to a given probability value.
-    This function calculates the bias initialization value based on a prior probability using the inverse error function.
-    It's commonly used in object detection models to initialize classification layers with a specific positive prediction
-    probability.
+    This function calculates the bias initialization value based on a prior probability using the inverse error
+    function. It's commonly used in object detection models to initialize classification layers with a specific positive
+    prediction probability.
     Args:
         prior_prob (float, optional): Prior probability for bias initialization.
@@ -56,11 +54,10 @@ def bias_init_with_prob(prior_prob=0.01):
 def linear_init(module):
-    """
-    Initialize the weights and biases of a linear module.
+    """Initialize the weights and biases of a linear module.
-    This function initializes the weights of a linear module using a uniform distribution within bounds calculated
-    from the input dimension. If the module has a bias, it is also initialized.
+    This function initializes the weights of a linear module using a uniform distribution within bounds calculated from
+    the input dimension. If the module has a bias, it is also initialized.
     Args:
         module (nn.Module): Linear module to initialize.
@@ -80,8 +77,7 @@ def linear_init(module):
 def inverse_sigmoid(x, eps=1e-5):
-    """
-    Calculate the inverse sigmoid function for a tensor.
+    """Calculate the inverse sigmoid function for a tensor.
     This function applies the inverse of the sigmoid function to a tensor, which is useful in various neural network
     operations, particularly in attention mechanisms and coordinate transformations.
@@ -110,8 +106,7 @@ def multi_scale_deformable_attn_pytorch(
     sampling_locations: torch.Tensor,
     attention_weights: torch.Tensor,
 ) -> torch.Tensor:
-    """
-    Implement multi-scale deformable attention in PyTorch.
+    """Implement multi-scale deformable attention in PyTorch.
     This function performs deformable attention across multiple feature map scales, allowing the model to attend to
     different spatial locations with learned offsets.
@@ -119,10 +114,10 @@ def multi_scale_deformable_attn_pytorch(
     Args:
         value (torch.Tensor): The value tensor with shape (bs, num_keys, num_heads, embed_dims).
         value_spatial_shapes (torch.Tensor): Spatial shapes of the value tensor with shape (num_levels, 2).
-        sampling_locations (torch.Tensor): The sampling locations with shape
-            (bs, num_queries, num_heads, num_levels, num_points, 2).
-        attention_weights (torch.Tensor): The attention weights with shape
-            (bs, num_queries, num_heads, num_levels, num_points).
+        sampling_locations (torch.Tensor): The sampling locations with shape (bs, num_queries, num_heads, num_levels,
+            num_points, 2).
+        attention_weights (torch.Tensor): The attention weights with shape (bs, num_queries, num_heads, num_levels,
+            num_points).
     Returns:
         (torch.Tensor): The output tensor with shape (bs, num_queries, embed_dims).
@@ -152,7 +147,7 @@ def multi_scale_deformable_attn_pytorch(
         sampling_value_list.append(sampling_value_l_)
     # (bs, num_queries, num_heads, num_levels, num_points) ->
     # (bs, num_heads, num_queries, num_levels, num_points) ->
-    # (bs, num_heads, 1, num_queries, num_levels*num_points)
+    # (bs*num_heads, 1, num_queries, num_levels*num_points)
     attention_weights = attention_weights.transpose(1, 2).reshape(
         bs * num_heads, 1, num_queries, num_levels * num_points
     )

dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl