PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
tests/__init__.py +7 -6
tests/conftest.py +15 -39
tests/test_cli.py +17 -17
tests/test_cuda.py +17 -8
tests/test_engine.py +36 -10
tests/test_exports.py +98 -37
tests/test_integrations.py +12 -15
tests/test_python.py +126 -82
tests/test_solutions.py +319 -135
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +83 -87
ultralytics/cfg/datasets/Argoverse.yaml +4 -4
ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
ultralytics/cfg/datasets/ImageNet.yaml +3 -3
ultralytics/cfg/datasets/Objects365.yaml +24 -20
ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
ultralytics/cfg/datasets/VOC.yaml +10 -13
ultralytics/cfg/datasets/VisDrone.yaml +43 -33
ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
ultralytics/cfg/datasets/coco-pose.yaml +26 -4
ultralytics/cfg/datasets/coco.yaml +4 -4
ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
ultralytics/cfg/datasets/coco128.yaml +2 -2
ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
ultralytics/cfg/datasets/coco8.yaml +2 -2
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/crack-seg.yaml +5 -5
ultralytics/cfg/datasets/dog-pose.yaml +32 -4
ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
ultralytics/cfg/datasets/lvis.yaml +9 -9
ultralytics/cfg/datasets/medical-pills.yaml +4 -5
ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
ultralytics/cfg/datasets/package-seg.yaml +5 -5
ultralytics/cfg/datasets/signature.yaml +4 -4
ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
ultralytics/cfg/datasets/xView.yaml +5 -5
ultralytics/cfg/default.yaml +96 -93
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +12 -12
ultralytics/data/augment.py +531 -564
ultralytics/data/base.py +76 -81
ultralytics/data/build.py +206 -42
ultralytics/data/converter.py +179 -78
ultralytics/data/dataset.py +121 -121
ultralytics/data/loaders.py +114 -91
ultralytics/data/split.py +28 -15
ultralytics/data/split_dota.py +67 -48
ultralytics/data/utils.py +110 -89
ultralytics/engine/exporter.py +422 -460
ultralytics/engine/model.py +224 -252
ultralytics/engine/predictor.py +94 -89
ultralytics/engine/results.py +345 -595
ultralytics/engine/trainer.py +231 -134
ultralytics/engine/tuner.py +279 -73
ultralytics/engine/validator.py +53 -46
ultralytics/hub/__init__.py +26 -28
ultralytics/hub/auth.py +30 -16
ultralytics/hub/google/__init__.py +34 -36
ultralytics/hub/session.py +53 -77
ultralytics/hub/utils.py +23 -109
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +36 -18
ultralytics/models/fastsam/predict.py +33 -44
ultralytics/models/fastsam/utils.py +4 -5
ultralytics/models/fastsam/val.py +12 -14
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +16 -20
ultralytics/models/nas/predict.py +12 -14
ultralytics/models/nas/val.py +4 -5
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +9 -9
ultralytics/models/rtdetr/predict.py +22 -17
ultralytics/models/rtdetr/train.py +20 -16
ultralytics/models/rtdetr/val.py +79 -59
ultralytics/models/sam/__init__.py +8 -2
ultralytics/models/sam/amg.py +53 -38
ultralytics/models/sam/build.py +29 -31
ultralytics/models/sam/model.py +33 -38
ultralytics/models/sam/modules/blocks.py +159 -182
ultralytics/models/sam/modules/decoders.py +38 -47
ultralytics/models/sam/modules/encoders.py +114 -133
ultralytics/models/sam/modules/memory_attention.py +38 -31
ultralytics/models/sam/modules/sam.py +114 -93
ultralytics/models/sam/modules/tiny_encoder.py +268 -291
ultralytics/models/sam/modules/transformer.py +59 -66
ultralytics/models/sam/modules/utils.py +55 -72
ultralytics/models/sam/predict.py +745 -341
ultralytics/models/utils/loss.py +118 -107
ultralytics/models/utils/ops.py +118 -71
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +28 -26
ultralytics/models/yolo/classify/train.py +50 -81
ultralytics/models/yolo/classify/val.py +68 -61
ultralytics/models/yolo/detect/predict.py +12 -15
ultralytics/models/yolo/detect/train.py +56 -46
ultralytics/models/yolo/detect/val.py +279 -223
ultralytics/models/yolo/model.py +167 -86
ultralytics/models/yolo/obb/predict.py +7 -11
ultralytics/models/yolo/obb/train.py +23 -25
ultralytics/models/yolo/obb/val.py +107 -99
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +12 -14
ultralytics/models/yolo/pose/train.py +31 -69
ultralytics/models/yolo/pose/val.py +119 -254
ultralytics/models/yolo/segment/predict.py +21 -25
ultralytics/models/yolo/segment/train.py +12 -66
ultralytics/models/yolo/segment/val.py +126 -305
ultralytics/models/yolo/world/train.py +53 -45
ultralytics/models/yolo/world/train_world.py +51 -32
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +30 -37
ultralytics/models/yolo/yoloe/train.py +89 -71
ultralytics/models/yolo/yoloe/train_seg.py +15 -17
ultralytics/models/yolo/yoloe/val.py +56 -41
ultralytics/nn/__init__.py +9 -11
ultralytics/nn/autobackend.py +179 -107
ultralytics/nn/modules/__init__.py +67 -67
ultralytics/nn/modules/activation.py +8 -7
ultralytics/nn/modules/block.py +302 -323
ultralytics/nn/modules/conv.py +61 -104
ultralytics/nn/modules/head.py +488 -186
ultralytics/nn/modules/transformer.py +183 -123
ultralytics/nn/modules/utils.py +15 -20
ultralytics/nn/tasks.py +327 -203
ultralytics/nn/text_model.py +81 -65
ultralytics/py.typed +1 -0
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +19 -27
ultralytics/solutions/analytics.py +36 -26
ultralytics/solutions/config.py +29 -28
ultralytics/solutions/distance_calculation.py +23 -24
ultralytics/solutions/heatmap.py +17 -19
ultralytics/solutions/instance_segmentation.py +21 -19
ultralytics/solutions/object_blurrer.py +16 -17
ultralytics/solutions/object_counter.py +48 -53
ultralytics/solutions/object_cropper.py +22 -16
ultralytics/solutions/parking_management.py +61 -58
ultralytics/solutions/queue_management.py +19 -19
ultralytics/solutions/region_counter.py +63 -50
ultralytics/solutions/security_alarm.py +22 -25
ultralytics/solutions/similarity_search.py +107 -60
ultralytics/solutions/solutions.py +343 -262
ultralytics/solutions/speed_estimation.py +35 -31
ultralytics/solutions/streamlit_inference.py +104 -40
ultralytics/solutions/templates/similarity-search.html +31 -24
ultralytics/solutions/trackzone.py +24 -24
ultralytics/solutions/vision_eye.py +11 -12
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +18 -27
ultralytics/trackers/bot_sort.py +48 -39
ultralytics/trackers/byte_tracker.py +94 -94
ultralytics/trackers/track.py +7 -16
ultralytics/trackers/utils/gmc.py +37 -69
ultralytics/trackers/utils/kalman_filter.py +68 -76
ultralytics/trackers/utils/matching.py +13 -17
ultralytics/utils/__init__.py +251 -275
ultralytics/utils/autobatch.py +19 -7
ultralytics/utils/autodevice.py +68 -38
ultralytics/utils/benchmarks.py +169 -130
ultralytics/utils/callbacks/base.py +12 -13
ultralytics/utils/callbacks/clearml.py +14 -15
ultralytics/utils/callbacks/comet.py +139 -66
ultralytics/utils/callbacks/dvc.py +19 -27
ultralytics/utils/callbacks/hub.py +8 -6
ultralytics/utils/callbacks/mlflow.py +6 -10
ultralytics/utils/callbacks/neptune.py +11 -19
ultralytics/utils/callbacks/platform.py +73 -0
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +9 -12
ultralytics/utils/callbacks/wb.py +33 -30
ultralytics/utils/checks.py +163 -114
ultralytics/utils/cpu.py +89 -0
ultralytics/utils/dist.py +24 -20
ultralytics/utils/downloads.py +176 -146
ultralytics/utils/errors.py +11 -13
ultralytics/utils/events.py +113 -0
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +81 -63
ultralytics/utils/export/imx.py +294 -0
ultralytics/utils/export/tensorflow.py +217 -0
ultralytics/utils/files.py +33 -36
ultralytics/utils/git.py +137 -0
ultralytics/utils/instance.py +105 -120
ultralytics/utils/logger.py +404 -0
ultralytics/utils/loss.py +99 -61
ultralytics/utils/metrics.py +649 -478
ultralytics/utils/nms.py +337 -0
ultralytics/utils/ops.py +263 -451
ultralytics/utils/patches.py +70 -31
ultralytics/utils/plotting.py +253 -223
ultralytics/utils/tal.py +48 -61
ultralytics/utils/torch_utils.py +244 -251
ultralytics/utils/tqdm.py +438 -0
ultralytics/utils/triton.py +22 -23
ultralytics/utils/tuner.py +11 -10
dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0

ultralytics/nn/modules/transformer.py CHANGED Viewed

@@ -1,6 +1,8 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """Transformer modules."""
+from __future__ import annotations
 import math
 import torch
@@ -8,26 +10,30 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn.init import constant_, xavier_uniform_
+from ultralytics.utils.torch_utils import TORCH_1_11
 from .conv import Conv
 from .utils import _get_clones, inverse_sigmoid, multi_scale_deformable_attn_pytorch
 __all__ = (
-    "TransformerEncoderLayer",
-    "TransformerLayer",
-    "TransformerBlock",
-    "MLPBlock",
-    "LayerNorm2d",
     "AIFI",
+    "MLP",
     "DeformableTransformerDecoder",
     "DeformableTransformerDecoderLayer",
+    "LayerNorm2d",
+    "MLPBlock",
     "MSDeformAttn",
-    "MLP",
+    "TransformerBlock",
+    "TransformerEncoderLayer",
+    "TransformerLayer",
 )
 class TransformerEncoderLayer(nn.Module):
-    """
-    Defines a single layer of the transformer encoder.
+    """A single layer of the transformer encoder.
+    This class implements a standard transformer encoder layer with multi-head attention and feedforward network,
+    supporting both pre-normalization and post-normalization configurations.
     Attributes:
         ma (nn.MultiheadAttention): Multi-head attention module.
@@ -42,9 +48,16 @@ class TransformerEncoderLayer(nn.Module):
         normalize_before (bool): Whether to apply normalization before attention and feedforward.
     """
-    def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
-        """
-        Initialize the TransformerEncoderLayer with specified parameters.
+    def __init__(
+        self,
+        c1: int,
+        cm: int = 2048,
+        num_heads: int = 8,
+        dropout: float = 0.0,
+        act: nn.Module = nn.GELU(),
+        normalize_before: bool = False,
+    ):
+        """Initialize the TransformerEncoderLayer with specified parameters.
         Args:
             c1 (int): Input dimension.
@@ -76,13 +89,18 @@ class TransformerEncoderLayer(nn.Module):
         self.normalize_before = normalize_before
     @staticmethod
-    def with_pos_embed(tensor, pos=None):
+    def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None = None) -> torch.Tensor:
         """Add position embeddings to the tensor if provided."""
         return tensor if pos is None else tensor + pos
-    def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
-        """
-        Perform forward pass with post-normalization.
+    def forward_post(
+        self,
+        src: torch.Tensor,
+        src_mask: torch.Tensor | None = None,
+        src_key_padding_mask: torch.Tensor | None = None,
+        pos: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Perform forward pass with post-normalization.
         Args:
             src (torch.Tensor): Input tensor.
@@ -101,9 +119,14 @@ class TransformerEncoderLayer(nn.Module):
         src = src + self.dropout2(src2)
         return self.norm2(src)
-    def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
-        """
-        Perform forward pass with pre-normalization.
+    def forward_pre(
+        self,
+        src: torch.Tensor,
+        src_mask: torch.Tensor | None = None,
+        src_key_padding_mask: torch.Tensor | None = None,
+        pos: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Perform forward pass with pre-normalization.
         Args:
             src (torch.Tensor): Input tensor.
@@ -122,9 +145,14 @@ class TransformerEncoderLayer(nn.Module):
         src2 = self.fc2(self.dropout(self.act(self.fc1(src2))))
         return src + self.dropout2(src2)
-    def forward(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
-        """
-        Forward propagates the input through the encoder module.
+    def forward(
+        self,
+        src: torch.Tensor,
+        src_mask: torch.Tensor | None = None,
+        src_key_padding_mask: torch.Tensor | None = None,
+        pos: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Forward propagate the input through the encoder module.
         Args:
             src (torch.Tensor): Input tensor.
@@ -141,15 +169,22 @@ class TransformerEncoderLayer(nn.Module):
 class AIFI(TransformerEncoderLayer):
-    """
-    Defines the AIFI transformer layer.
+    """AIFI transformer layer for 2D data with positional embeddings.
-    This class extends TransformerEncoderLayer to work with 2D data by adding positional embeddings.
+    This class extends TransformerEncoderLayer to work with 2D feature maps by adding 2D sine-cosine positional
+    embeddings and handling the spatial dimensions appropriately.
     """
-    def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
-        """
-        Initialize the AIFI instance with specified parameters.
+    def __init__(
+        self,
+        c1: int,
+        cm: int = 2048,
+        num_heads: int = 8,
+        dropout: float = 0,
+        act: nn.Module = nn.GELU(),
+        normalize_before: bool = False,
+    ):
+        """Initialize the AIFI instance with specified parameters.
         Args:
             c1 (int): Input dimension.
@@ -161,9 +196,8 @@ class AIFI(TransformerEncoderLayer):
         """
         super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
-    def forward(self, x):
-        """
-        Forward pass for the AIFI transformer layer.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass for the AIFI transformer layer.
         Args:
             x (torch.Tensor): Input tensor with shape [B, C, H, W].
@@ -178,9 +212,10 @@ class AIFI(TransformerEncoderLayer):
         return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
     @staticmethod
-    def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
-        """
-        Build 2D sine-cosine position embedding.
+    def build_2d_sincos_position_embedding(
+        w: int, h: int, embed_dim: int = 256, temperature: float = 10000.0
+    ) -> torch.Tensor:
+        """Build 2D sine-cosine position embedding.
         Args:
             w (int): Width of the feature map.
@@ -194,7 +229,7 @@ class AIFI(TransformerEncoderLayer):
         assert embed_dim % 4 == 0, "Embed dimension must be divisible by 4 for 2D sin-cos position embedding"
         grid_w = torch.arange(w, dtype=torch.float32)
         grid_h = torch.arange(h, dtype=torch.float32)
-        grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing="ij")
+        grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing="ij") if TORCH_1_11 else torch.meshgrid(grid_w, grid_h)
         pos_dim = embed_dim // 4
         omega = torch.arange(pos_dim, dtype=torch.float32) / pos_dim
         omega = 1.0 / (temperature**omega)
@@ -208,9 +243,8 @@ class AIFI(TransformerEncoderLayer):
 class TransformerLayer(nn.Module):
     """Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
-    def __init__(self, c, num_heads):
-        """
-        Initialize a self-attention mechanism using linear transformations and multi-head attention.
+    def __init__(self, c: int, num_heads: int):
+        """Initialize a self-attention mechanism using linear transformations and multi-head attention.
         Args:
             c (int): Input and output channel dimension.
@@ -224,9 +258,8 @@ class TransformerLayer(nn.Module):
         self.fc1 = nn.Linear(c, c, bias=False)
         self.fc2 = nn.Linear(c, c, bias=False)
-    def forward(self, x):
-        """
-        Apply a transformer block to the input x and return the output.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Apply a transformer block to the input x and return the output.
         Args:
             x (torch.Tensor): Input tensor.
@@ -239,8 +272,10 @@ class TransformerLayer(nn.Module):
 class TransformerBlock(nn.Module):
-    """
-    Vision Transformer https://arxiv.org/abs/2010.11929.
+    """Vision Transformer block based on https://arxiv.org/abs/2010.11929.
+    This class implements a complete transformer block with optional convolution layer for channel adjustment, learnable
+    position embedding, and multiple transformer layers.
     Attributes:
         conv (Conv, optional): Convolution layer if input and output channels differ.
@@ -249,9 +284,8 @@ class TransformerBlock(nn.Module):
         c2 (int): Output channel dimension.
     """
-    def __init__(self, c1, c2, num_heads, num_layers):
-        """
-        Initialize a Transformer module with position embedding and specified number of heads and layers.
+    def __init__(self, c1: int, c2: int, num_heads: int, num_layers: int):
+        """Initialize a Transformer module with position embedding and specified number of heads and layers.
         Args:
             c1 (int): Input channel dimension.
@@ -267,9 +301,8 @@ class TransformerBlock(nn.Module):
         self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
         self.c2 = c2
-    def forward(self, x):
-        """
-        Forward propagates the input through the bottleneck module.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward propagate the input through the transformer block.
         Args:
             x (torch.Tensor): Input tensor with shape [b, c1, w, h].
@@ -285,11 +318,10 @@ class TransformerBlock(nn.Module):
 class MLPBlock(nn.Module):
-    """Implements a single block of a multi-layer perceptron."""
+    """A single block of a multi-layer perceptron."""
-    def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
-        """
-        Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.
+    def __init__(self, embedding_dim: int, mlp_dim: int, act=nn.GELU):
+        """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.
         Args:
             embedding_dim (int): Input and output dimension.
@@ -302,8 +334,7 @@ class MLPBlock(nn.Module):
         self.act = act()
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass for the MLPBlock.
+        """Forward pass for the MLPBlock.
         Args:
             x (torch.Tensor): Input tensor.
@@ -315,8 +346,10 @@ class MLPBlock(nn.Module):
 class MLP(nn.Module):
-    """
-    Implements a simple multi-layer perceptron (also called FFN).
+    """A simple multi-layer perceptron (also called FFN).
+    This class implements a configurable MLP with multiple linear layers, activation functions, and optional sigmoid
+    output activation.
     Attributes:
         num_layers (int): Number of layers in the MLP.
@@ -325,9 +358,10 @@ class MLP(nn.Module):
         act (nn.Module): Activation function.
     """
-    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, act=nn.ReLU, sigmoid=False):
-        """
-        Initialize the MLP with specified input, hidden, output dimensions and number of layers.
+    def __init__(
+        self, input_dim: int, hidden_dim: int, output_dim: int, num_layers: int, act=nn.ReLU, sigmoid: bool = False
+    ):
+        """Initialize the MLP with specified input, hidden, output dimensions and number of layers.
         Args:
             input_dim (int): Input dimension.
@@ -340,13 +374,12 @@ class MLP(nn.Module):
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
-        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim, *h], [*h, output_dim]))
         self.sigmoid = sigmoid
         self.act = act()
-    def forward(self, x):
-        """
-        Forward pass for the entire MLP.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass for the entire MLP.
         Args:
             x (torch.Tensor): Input tensor.
@@ -360,23 +393,23 @@ class MLP(nn.Module):
 class LayerNorm2d(nn.Module):
-    """
-    2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
+    """2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
-    Original implementations in
-    https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
-    and
-    https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py.
+    This class implements layer normalization for 2D feature maps, normalizing across the channel dimension while
+    preserving spatial dimensions.
     Attributes:
         weight (nn.Parameter): Learnable scale parameter.
         bias (nn.Parameter): Learnable bias parameter.
         eps (float): Small constant for numerical stability.
+    References:
+        https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
+        https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py
     """
-    def __init__(self, num_channels, eps=1e-6):
-        """
-        Initialize LayerNorm2d with the given parameters.
+    def __init__(self, num_channels: int, eps: float = 1e-6):
+        """Initialize LayerNorm2d with the given parameters.
         Args:
             num_channels (int): Number of channels in the input.
@@ -387,9 +420,8 @@ class LayerNorm2d(nn.Module):
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
-    def forward(self, x):
-        """
-        Perform forward pass for 2D layer normalization.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Perform forward pass for 2D layer normalization.
         Args:
             x (torch.Tensor): Input tensor.
@@ -404,10 +436,10 @@ class LayerNorm2d(nn.Module):
 class MSDeformAttn(nn.Module):
-    """
-    Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
+    """Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
-    https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
+    This module implements multiscale deformable attention that can attend to features at multiple scales with learnable
+    sampling locations and attention weights.
     Attributes:
         im2col_step (int): Step size for im2col operations.
@@ -419,11 +451,13 @@ class MSDeformAttn(nn.Module):
         attention_weights (nn.Linear): Linear layer for generating attention weights.
         value_proj (nn.Linear): Linear layer for projecting values.
         output_proj (nn.Linear): Linear layer for projecting output.
+    References:
+        https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
     """
-    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
-        """
-        Initialize MSDeformAttn with the given parameters.
+    def __init__(self, d_model: int = 256, n_levels: int = 4, n_heads: int = 8, n_points: int = 4):
+        """Initialize MSDeformAttn with the given parameters.
         Args:
             d_model (int): Model dimension.
@@ -473,23 +507,30 @@ class MSDeformAttn(nn.Module):
         xavier_uniform_(self.output_proj.weight.data)
         constant_(self.output_proj.bias.data, 0.0)
-    def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
-        """
-        Perform forward pass for multiscale deformable attention.
-        https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
+    def forward(
+        self,
+        query: torch.Tensor,
+        refer_bbox: torch.Tensor,
+        value: torch.Tensor,
+        value_shapes: list,
+        value_mask: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Perform forward pass for multiscale deformable attention.
         Args:
-            query (torch.Tensor): Tensor with shape [bs, query_length, C].
-            refer_bbox (torch.Tensor): Tensor with shape [bs, query_length, n_levels, 2], range in [0, 1],
-                top-left (0,0), bottom-right (1, 1), including padding area.
-            value (torch.Tensor): Tensor with shape [bs, value_length, C].
+            query (torch.Tensor): Query tensor with shape [bs, query_length, C].
+            refer_bbox (torch.Tensor): Reference bounding boxes with shape [bs, query_length, n_levels, 2], range in [0,
+                1], top-left (0,0), bottom-right (1, 1), including padding area.
+            value (torch.Tensor): Value tensor with shape [bs, value_length, C].
             value_shapes (list): List with shape [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})].
-            value_mask (torch.Tensor, optional): Tensor with shape [bs, value_length], True for non-padding elements,
-                False for padding elements.
+            value_mask (torch.Tensor, optional): Mask tensor with shape [bs, value_length], True for non-padding
+                elements, False for padding elements.
         Returns:
             (torch.Tensor): Output tensor with shape [bs, Length_{query}, C].
+        References:
+            https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
         """
         bs, len_q = query.shape[:2]
         len_v = value.shape[1]
@@ -518,11 +559,10 @@ class MSDeformAttn(nn.Module):
 class DeformableTransformerDecoderLayer(nn.Module):
-    """
-    Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
+    """Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
-    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
-    https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
+    This class implements a single decoder layer with self-attention, cross-attention using multiscale deformable
+    attention, and a feedforward network.
     Attributes:
         self_attn (nn.MultiheadAttention): Self-attention module.
@@ -537,11 +577,23 @@ class DeformableTransformerDecoderLayer(nn.Module):
         linear2 (nn.Linear): Second linear layer in the feedforward network.
         dropout4 (nn.Dropout): Dropout after the feedforward network.
         norm3 (nn.LayerNorm): Layer normalization after the feedforward network.
+    References:
+        https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
+        https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
     """
-    def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0.0, act=nn.ReLU(), n_levels=4, n_points=4):
-        """
-        Initialize the DeformableTransformerDecoderLayer with the given parameters.
+    def __init__(
+        self,
+        d_model: int = 256,
+        n_heads: int = 8,
+        d_ffn: int = 1024,
+        dropout: float = 0.0,
+        act: nn.Module = nn.ReLU(),
+        n_levels: int = 4,
+        n_points: int = 4,
+    ):
+        """Initialize the DeformableTransformerDecoderLayer with the given parameters.
         Args:
             d_model (int): Model dimension.
@@ -573,13 +625,12 @@ class DeformableTransformerDecoderLayer(nn.Module):
         self.norm3 = nn.LayerNorm(d_model)
     @staticmethod
-    def with_pos_embed(tensor, pos):
+    def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None) -> torch.Tensor:
         """Add positional embeddings to the input tensor, if provided."""
         return tensor if pos is None else tensor + pos
-    def forward_ffn(self, tgt):
-        """
-        Perform forward pass through the Feed-Forward Network part of the layer.
+    def forward_ffn(self, tgt: torch.Tensor) -> torch.Tensor:
+        """Perform forward pass through the Feed-Forward Network part of the layer.
         Args:
             tgt (torch.Tensor): Input tensor.
@@ -591,9 +642,17 @@ class DeformableTransformerDecoderLayer(nn.Module):
         tgt = tgt + self.dropout4(tgt2)
         return self.norm3(tgt)
-    def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
-        """
-        Perform the forward pass through the entire decoder layer.
+    def forward(
+        self,
+        embed: torch.Tensor,
+        refer_bbox: torch.Tensor,
+        feats: torch.Tensor,
+        shapes: list,
+        padding_mask: torch.Tensor | None = None,
+        attn_mask: torch.Tensor | None = None,
+        query_pos: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Perform the forward pass through the entire decoder layer.
         Args:
             embed (torch.Tensor): Input embeddings.
@@ -627,21 +686,23 @@ class DeformableTransformerDecoderLayer(nn.Module):
 class DeformableTransformerDecoder(nn.Module):
-    """
-    Implementation of Deformable Transformer Decoder based on PaddleDetection.
+    """Deformable Transformer Decoder based on PaddleDetection implementation.
-    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
+    This class implements a complete deformable transformer decoder with multiple decoder layers and prediction heads
+    for bounding box regression and classification.
     Attributes:
         layers (nn.ModuleList): List of decoder layers.
         num_layers (int): Number of decoder layers.
         hidden_dim (int): Hidden dimension.
         eval_idx (int): Index of the layer to use during evaluation.
+    References:
+        https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
     """
-    def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
-        """
-        Initialize the DeformableTransformerDecoder with the given parameters.
+    def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1):
+        """Initialize the DeformableTransformerDecoder with the given parameters.
         Args:
             hidden_dim (int): Hidden dimension.
@@ -657,18 +718,17 @@ class DeformableTransformerDecoder(nn.Module):
     def forward(
         self,
-        embed,  # decoder embeddings
-        refer_bbox,  # anchor
-        feats,  # image features
-        shapes,  # feature shapes
-        bbox_head,
-        score_head,
-        pos_mlp,
-        attn_mask=None,
-        padding_mask=None,
+        embed: torch.Tensor,  # decoder embeddings
+        refer_bbox: torch.Tensor,  # anchor
+        feats: torch.Tensor,  # image features
+        shapes: list,  # feature shapes
+        bbox_head: nn.Module,
+        score_head: nn.Module,
+        pos_mlp: nn.Module,
+        attn_mask: torch.Tensor | None = None,
+        padding_mask: torch.Tensor | None = None,
     ):
-        """
-        Perform the forward pass through the entire decoder.
+        """Perform the forward pass through the entire decoder.
         Args:
             embed (torch.Tensor): Decoder embeddings.

ultralytics/nn/modules/utils.py CHANGED Viewed

@@ -9,12 +9,11 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn.init import uniform_
-__all__ = "multi_scale_deformable_attn_pytorch", "inverse_sigmoid"
+__all__ = "inverse_sigmoid", "multi_scale_deformable_attn_pytorch"
 def _get_clones(module, n):
-    """
-    Create a list of cloned modules from the given module.
+    """Create a list of cloned modules from the given module.
     Args:
         module (nn.Module): The module to be cloned.
@@ -34,12 +33,11 @@ def _get_clones(module, n):
 def bias_init_with_prob(prior_prob=0.01):
-    """
-    Initialize conv/fc bias value according to a given probability value.
+    """Initialize conv/fc bias value according to a given probability value.
-    This function calculates the bias initialization value based on a prior probability using the inverse error function.
-    It's commonly used in object detection models to initialize classification layers with a specific positive prediction
-    probability.
+    This function calculates the bias initialization value based on a prior probability using the inverse error
+    function. It's commonly used in object detection models to initialize classification layers with a specific positive
+    prediction probability.
     Args:
         prior_prob (float, optional): Prior probability for bias initialization.
@@ -56,11 +54,10 @@ def bias_init_with_prob(prior_prob=0.01):
 def linear_init(module):
-    """
-    Initialize the weights and biases of a linear module.
+    """Initialize the weights and biases of a linear module.
-    This function initializes the weights of a linear module using a uniform distribution within bounds calculated
-    from the input dimension. If the module has a bias, it is also initialized.
+    This function initializes the weights of a linear module using a uniform distribution within bounds calculated from
+    the input dimension. If the module has a bias, it is also initialized.
     Args:
         module (nn.Module): Linear module to initialize.
@@ -80,8 +77,7 @@ def linear_init(module):
 def inverse_sigmoid(x, eps=1e-5):
-    """
-    Calculate the inverse sigmoid function for a tensor.
+    """Calculate the inverse sigmoid function for a tensor.
     This function applies the inverse of the sigmoid function to a tensor, which is useful in various neural network
     operations, particularly in attention mechanisms and coordinate transformations.
@@ -110,8 +106,7 @@ def multi_scale_deformable_attn_pytorch(
     sampling_locations: torch.Tensor,
     attention_weights: torch.Tensor,
 ) -> torch.Tensor:
-    """
-    Implement multi-scale deformable attention in PyTorch.
+    """Implement multi-scale deformable attention in PyTorch.
     This function performs deformable attention across multiple feature map scales, allowing the model to attend to
     different spatial locations with learned offsets.
@@ -119,10 +114,10 @@ def multi_scale_deformable_attn_pytorch(
     Args:
         value (torch.Tensor): The value tensor with shape (bs, num_keys, num_heads, embed_dims).
         value_spatial_shapes (torch.Tensor): Spatial shapes of the value tensor with shape (num_levels, 2).
-        sampling_locations (torch.Tensor): The sampling locations with shape
-            (bs, num_queries, num_heads, num_levels, num_points, 2).
-        attention_weights (torch.Tensor): The attention weights with shape
-            (bs, num_queries, num_heads, num_levels, num_points).
+        sampling_locations (torch.Tensor): The sampling locations with shape (bs, num_queries, num_heads, num_levels,
+            num_points, 2).
+        attention_weights (torch.Tensor): The attention weights with shape (bs, num_queries, num_heads, num_levels,
+            num_points).
     Returns:
         (torch.Tensor): The output tensor with shape (bs, num_queries, embed_dims).

dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.137py3-none-any.whl → 8.3.224py3-none-any.whl