PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (243) hide show

{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
tests/__init__.py +5 -7
tests/conftest.py +8 -15
tests/test_cli.py +8 -10
tests/test_cuda.py +9 -10
tests/test_engine.py +29 -2
tests/test_exports.py +69 -21
tests/test_integrations.py +8 -11
tests/test_python.py +109 -71
tests/test_solutions.py +170 -159
ultralytics/__init__.py +27 -9
ultralytics/cfg/__init__.py +57 -64
ultralytics/cfg/datasets/Argoverse.yaml +7 -6
ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/Objects365.yaml +19 -15
ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
ultralytics/cfg/datasets/VOC.yaml +19 -21
ultralytics/cfg/datasets/VisDrone.yaml +5 -5
ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
ultralytics/cfg/datasets/coco-pose.yaml +24 -2
ultralytics/cfg/datasets/coco.yaml +2 -2
ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
ultralytics/cfg/datasets/dog-pose.yaml +28 -0
ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
ultralytics/cfg/datasets/dota8.yaml +2 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
ultralytics/cfg/datasets/kitti.yaml +27 -0
ultralytics/cfg/datasets/lvis.yaml +7 -7
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
ultralytics/cfg/datasets/xView.yaml +16 -16
ultralytics/cfg/default.yaml +96 -94
ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
ultralytics/cfg/models/v6/yolov6.yaml +1 -1
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/cfg/trackers/botsort.yaml +16 -17
ultralytics/cfg/trackers/bytetrack.yaml +9 -11
ultralytics/data/__init__.py +4 -4
ultralytics/data/annotator.py +3 -4
ultralytics/data/augment.py +286 -476
ultralytics/data/base.py +18 -26
ultralytics/data/build.py +151 -26
ultralytics/data/converter.py +38 -50
ultralytics/data/dataset.py +47 -75
ultralytics/data/loaders.py +42 -49
ultralytics/data/split.py +5 -6
ultralytics/data/split_dota.py +8 -15
ultralytics/data/utils.py +41 -45
ultralytics/engine/exporter.py +462 -462
ultralytics/engine/model.py +150 -191
ultralytics/engine/predictor.py +30 -40
ultralytics/engine/results.py +177 -311
ultralytics/engine/trainer.py +193 -120
ultralytics/engine/tuner.py +77 -63
ultralytics/engine/validator.py +39 -22
ultralytics/hub/__init__.py +16 -19
ultralytics/hub/auth.py +6 -12
ultralytics/hub/google/__init__.py +7 -10
ultralytics/hub/session.py +15 -25
ultralytics/hub/utils.py +5 -8
ultralytics/models/__init__.py +1 -1
ultralytics/models/fastsam/__init__.py +1 -1
ultralytics/models/fastsam/model.py +8 -10
ultralytics/models/fastsam/predict.py +19 -30
ultralytics/models/fastsam/utils.py +1 -2
ultralytics/models/fastsam/val.py +5 -7
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +5 -8
ultralytics/models/nas/predict.py +7 -9
ultralytics/models/nas/val.py +1 -2
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +7 -8
ultralytics/models/rtdetr/predict.py +15 -19
ultralytics/models/rtdetr/train.py +10 -13
ultralytics/models/rtdetr/val.py +21 -23
ultralytics/models/sam/__init__.py +15 -2
ultralytics/models/sam/amg.py +14 -20
ultralytics/models/sam/build.py +26 -19
ultralytics/models/sam/build_sam3.py +377 -0
ultralytics/models/sam/model.py +29 -32
ultralytics/models/sam/modules/blocks.py +83 -144
ultralytics/models/sam/modules/decoders.py +22 -40
ultralytics/models/sam/modules/encoders.py +44 -101
ultralytics/models/sam/modules/memory_attention.py +16 -30
ultralytics/models/sam/modules/sam.py +206 -79
ultralytics/models/sam/modules/tiny_encoder.py +64 -83
ultralytics/models/sam/modules/transformer.py +18 -28
ultralytics/models/sam/modules/utils.py +174 -50
ultralytics/models/sam/predict.py +2268 -366
ultralytics/models/sam/sam3/__init__.py +3 -0
ultralytics/models/sam/sam3/decoder.py +546 -0
ultralytics/models/sam/sam3/encoder.py +529 -0
ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
ultralytics/models/sam/sam3/model_misc.py +199 -0
ultralytics/models/sam/sam3/necks.py +129 -0
ultralytics/models/sam/sam3/sam3_image.py +339 -0
ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
ultralytics/models/sam/sam3/vitdet.py +547 -0
ultralytics/models/sam/sam3/vl_combiner.py +160 -0
ultralytics/models/utils/loss.py +14 -26
ultralytics/models/utils/ops.py +13 -17
ultralytics/models/yolo/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +9 -12
ultralytics/models/yolo/classify/train.py +15 -41
ultralytics/models/yolo/classify/val.py +34 -32
ultralytics/models/yolo/detect/predict.py +8 -11
ultralytics/models/yolo/detect/train.py +13 -32
ultralytics/models/yolo/detect/val.py +75 -63
ultralytics/models/yolo/model.py +37 -53
ultralytics/models/yolo/obb/predict.py +5 -14
ultralytics/models/yolo/obb/train.py +11 -14
ultralytics/models/yolo/obb/val.py +42 -39
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +7 -22
ultralytics/models/yolo/pose/train.py +10 -22
ultralytics/models/yolo/pose/val.py +40 -59
ultralytics/models/yolo/segment/predict.py +16 -20
ultralytics/models/yolo/segment/train.py +3 -12
ultralytics/models/yolo/segment/val.py +106 -56
ultralytics/models/yolo/world/train.py +12 -16
ultralytics/models/yolo/world/train_world.py +11 -34
ultralytics/models/yolo/yoloe/__init__.py +7 -7
ultralytics/models/yolo/yoloe/predict.py +16 -23
ultralytics/models/yolo/yoloe/train.py +31 -56
ultralytics/models/yolo/yoloe/train_seg.py +5 -10
ultralytics/models/yolo/yoloe/val.py +16 -21
ultralytics/nn/__init__.py +7 -7
ultralytics/nn/autobackend.py +152 -80
ultralytics/nn/modules/__init__.py +60 -60
ultralytics/nn/modules/activation.py +4 -6
ultralytics/nn/modules/block.py +133 -217
ultralytics/nn/modules/conv.py +52 -97
ultralytics/nn/modules/head.py +64 -116
ultralytics/nn/modules/transformer.py +79 -89
ultralytics/nn/modules/utils.py +16 -21
ultralytics/nn/tasks.py +111 -156
ultralytics/nn/text_model.py +40 -67
ultralytics/solutions/__init__.py +12 -12
ultralytics/solutions/ai_gym.py +11 -17
ultralytics/solutions/analytics.py +15 -16
ultralytics/solutions/config.py +5 -6
ultralytics/solutions/distance_calculation.py +10 -13
ultralytics/solutions/heatmap.py +7 -13
ultralytics/solutions/instance_segmentation.py +5 -8
ultralytics/solutions/object_blurrer.py +7 -10
ultralytics/solutions/object_counter.py +12 -19
ultralytics/solutions/object_cropper.py +8 -14
ultralytics/solutions/parking_management.py +33 -31
ultralytics/solutions/queue_management.py +10 -12
ultralytics/solutions/region_counter.py +9 -12
ultralytics/solutions/security_alarm.py +15 -20
ultralytics/solutions/similarity_search.py +13 -17
ultralytics/solutions/solutions.py +75 -74
ultralytics/solutions/speed_estimation.py +7 -10
ultralytics/solutions/streamlit_inference.py +4 -7
ultralytics/solutions/templates/similarity-search.html +7 -18
ultralytics/solutions/trackzone.py +7 -10
ultralytics/solutions/vision_eye.py +5 -8
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +3 -5
ultralytics/trackers/bot_sort.py +10 -27
ultralytics/trackers/byte_tracker.py +14 -30
ultralytics/trackers/track.py +3 -6
ultralytics/trackers/utils/gmc.py +11 -22
ultralytics/trackers/utils/kalman_filter.py +37 -48
ultralytics/trackers/utils/matching.py +12 -15
ultralytics/utils/__init__.py +116 -116
ultralytics/utils/autobatch.py +2 -4
ultralytics/utils/autodevice.py +17 -18
ultralytics/utils/benchmarks.py +70 -70
ultralytics/utils/callbacks/base.py +8 -10
ultralytics/utils/callbacks/clearml.py +5 -13
ultralytics/utils/callbacks/comet.py +32 -46
ultralytics/utils/callbacks/dvc.py +13 -18
ultralytics/utils/callbacks/mlflow.py +4 -5
ultralytics/utils/callbacks/neptune.py +7 -15
ultralytics/utils/callbacks/platform.py +314 -38
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +23 -31
ultralytics/utils/callbacks/wb.py +10 -13
ultralytics/utils/checks.py +151 -87
ultralytics/utils/cpu.py +3 -8
ultralytics/utils/dist.py +19 -15
ultralytics/utils/downloads.py +29 -41
ultralytics/utils/errors.py +6 -14
ultralytics/utils/events.py +2 -4
ultralytics/utils/export/__init__.py +7 -0
ultralytics/utils/{export.py → export/engine.py} +16 -16
ultralytics/utils/export/imx.py +325 -0
ultralytics/utils/export/tensorflow.py +231 -0
ultralytics/utils/files.py +24 -28
ultralytics/utils/git.py +9 -11
ultralytics/utils/instance.py +30 -51
ultralytics/utils/logger.py +212 -114
ultralytics/utils/loss.py +15 -24
ultralytics/utils/metrics.py +131 -160
ultralytics/utils/nms.py +21 -30
ultralytics/utils/ops.py +107 -165
ultralytics/utils/patches.py +33 -21
ultralytics/utils/plotting.py +122 -119
ultralytics/utils/tal.py +28 -44
ultralytics/utils/torch_utils.py +70 -187
ultralytics/utils/tqdm.py +20 -20
ultralytics/utils/triton.py +13 -19
ultralytics/utils/tuner.py +17 -5
dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0

ultralytics/models/sam/modules/tiny_encoder.py CHANGED Viewed

@@ -22,12 +22,11 @@ from ultralytics.utils.instance import to_2tuple
 class Conv2d_BN(torch.nn.Sequential):
-    """
-    A sequential container that performs 2D convolution followed by batch normalization.
+    """A sequential container that performs 2D convolution followed by batch normalization.
-    This module combines a 2D convolution layer with batch normalization, providing a common building block
-    for convolutional neural networks. The batch normalization weights and biases are initialized to specific
-    values for optimal training performance.
+    This module combines a 2D convolution layer with batch normalization, providing a common building block for
+    convolutional neural networks. The batch normalization weights and biases are initialized to specific values for
+    optimal training performance.
     Attributes:
         c (torch.nn.Conv2d): 2D convolution layer.
@@ -52,8 +51,7 @@ class Conv2d_BN(torch.nn.Sequential):
         groups: int = 1,
         bn_weight_init: float = 1,
     ):
-        """
-        Initialize a sequential container with 2D convolution followed by batch normalization.
+        """Initialize a sequential container with 2D convolution followed by batch normalization.
         Args:
             a (int): Number of input channels.
@@ -74,11 +72,10 @@ class Conv2d_BN(torch.nn.Sequential):
 class PatchEmbed(nn.Module):
-    """
-    Embed images into patches and project them into a specified embedding dimension.
+    """Embed images into patches and project them into a specified embedding dimension.
-    This module converts input images into patch embeddings using a sequence of convolutional layers,
-    effectively downsampling the spatial dimensions while increasing the channel dimension.
+    This module converts input images into patch embeddings using a sequence of convolutional layers, effectively
+    downsampling the spatial dimensions while increasing the channel dimension.
     Attributes:
         patches_resolution (tuple[int, int]): Resolution of the patches after embedding.
@@ -97,8 +94,7 @@ class PatchEmbed(nn.Module):
     """
     def __init__(self, in_chans: int, embed_dim: int, resolution: int, activation):
-        """
-        Initialize patch embedding with convolutional layers for image-to-patch conversion and projection.
+        """Initialize patch embedding with convolutional layers for image-to-patch conversion and projection.
         Args:
             in_chans (int): Number of input channels.
@@ -125,11 +121,10 @@ class PatchEmbed(nn.Module):
 class MBConv(nn.Module):
-    """
-    Mobile Inverted Bottleneck Conv (MBConv) layer, part of the EfficientNet architecture.
+    """Mobile Inverted Bottleneck Conv (MBConv) layer, part of the EfficientNet architecture.
-    This module implements the mobile inverted bottleneck convolution with expansion, depthwise convolution,
-    and projection phases, along with residual connections for improved gradient flow.
+    This module implements the mobile inverted bottleneck convolution with expansion, depthwise convolution, and
+    projection phases, along with residual connections for improved gradient flow.
     Attributes:
         in_chans (int): Number of input channels.
@@ -153,8 +148,7 @@ class MBConv(nn.Module):
     """
     def __init__(self, in_chans: int, out_chans: int, expand_ratio: float, activation, drop_path: float):
-        """
-        Initialize the MBConv layer with specified input/output channels, expansion ratio, and activation.
+        """Initialize the MBConv layer with specified input/output channels, expansion ratio, and activation.
         Args:
             in_chans (int): Number of input channels.
@@ -195,12 +189,11 @@ class MBConv(nn.Module):
 class PatchMerging(nn.Module):
-    """
-    Merge neighboring patches in the feature map and project to a new dimension.
+    """Merge neighboring patches in the feature map and project to a new dimension.
-    This class implements a patch merging operation that combines spatial information and adjusts the feature
-    dimension using a series of convolutional layers with batch normalization. It effectively reduces spatial
-    resolution while potentially increasing channel dimensions.
+    This class implements a patch merging operation that combines spatial information and adjusts the feature dimension
+    using a series of convolutional layers with batch normalization. It effectively reduces spatial resolution while
+    potentially increasing channel dimensions.
     Attributes:
         input_resolution (tuple[int, int]): The input resolution (height, width) of the feature map.
@@ -221,8 +214,7 @@ class PatchMerging(nn.Module):
     """
     def __init__(self, input_resolution: tuple[int, int], dim: int, out_dim: int, activation):
-        """
-        Initialize the PatchMerging module for merging and projecting neighboring patches in feature maps.
+        """Initialize the PatchMerging module for merging and projecting neighboring patches in feature maps.
         Args:
             input_resolution (tuple[int, int]): The input resolution (height, width) of the feature map.
@@ -259,11 +251,10 @@ class PatchMerging(nn.Module):
 class ConvLayer(nn.Module):
-    """
-    Convolutional Layer featuring multiple MobileNetV3-style inverted bottleneck convolutions (MBConv).
+    """Convolutional Layer featuring multiple MobileNetV3-style inverted bottleneck convolutions (MBConv).
-    This layer optionally applies downsample operations to the output and supports gradient checkpointing
-    for memory efficiency during training.
+    This layer optionally applies downsample operations to the output and supports gradient checkpointing for memory
+    efficiency during training.
     Attributes:
         dim (int): Dimensionality of the input and output.
@@ -293,11 +284,10 @@ class ConvLayer(nn.Module):
         out_dim: int | None = None,
         conv_expand_ratio: float = 4.0,
     ):
-        """
-        Initialize the ConvLayer with the given dimensions and settings.
+        """Initialize the ConvLayer with the given dimensions and settings.
-        This layer consists of multiple MobileNetV3-style inverted bottleneck convolutions (MBConv) and
-        optionally applies downsampling to the output.
+        This layer consists of multiple MobileNetV3-style inverted bottleneck convolutions (MBConv) and optionally
+        applies downsampling to the output.
         Args:
             dim (int): The dimensionality of the input and output.
@@ -307,7 +297,7 @@ class ConvLayer(nn.Module):
             drop_path (float | list[float], optional): Drop path rate. Single float or a list of floats for each MBConv.
             downsample (Optional[nn.Module], optional): Function for downsampling the output. None to skip downsampling.
             use_checkpoint (bool, optional): Whether to use gradient checkpointing to save memory.
-            out_dim (Optional[int], optional): The dimensionality of the output. None means it will be the same as `dim`.
+            out_dim (Optional[int], optional): Output dimensions. None means it will be the same as `dim`.
             conv_expand_ratio (float, optional): Expansion ratio for the MBConv layers.
         """
         super().__init__()
@@ -345,11 +335,10 @@ class ConvLayer(nn.Module):
 class MLP(nn.Module):
-    """
-    Multi-layer Perceptron (MLP) module for transformer architectures.
+    """Multi-layer Perceptron (MLP) module for transformer architectures.
-    This module applies layer normalization, two fully-connected layers with an activation function in between,
-    and dropout. It is commonly used in transformer-based architectures for processing token embeddings.
+    This module applies layer normalization, two fully-connected layers with an activation function in between, and
+    dropout. It is commonly used in transformer-based architectures for processing token embeddings.
     Attributes:
         norm (nn.LayerNorm): Layer normalization applied to the input.
@@ -376,8 +365,7 @@ class MLP(nn.Module):
         activation=nn.GELU,
         drop: float = 0.0,
     ):
-        """
-        Initialize a multi-layer perceptron with configurable input, hidden, and output dimensions.
+        """Initialize a multi-layer perceptron with configurable input, hidden, and output dimensions.
         Args:
             in_features (int): Number of input features.
@@ -406,12 +394,11 @@ class MLP(nn.Module):
 class Attention(torch.nn.Module):
-    """
-    Multi-head attention module with spatial awareness and trainable attention biases.
+    """Multi-head attention module with spatial awareness and trainable attention biases.
-    This module implements a multi-head attention mechanism with support for spatial awareness, applying
-    attention biases based on spatial resolution. It includes trainable attention biases for each unique
-    offset between spatial positions in the resolution grid.
+    This module implements a multi-head attention mechanism with support for spatial awareness, applying attention
+    biases based on spatial resolution. It includes trainable attention biases for each unique offset between spatial
+    positions in the resolution grid.
     Attributes:
         num_heads (int): Number of attention heads.
@@ -444,12 +431,11 @@ class Attention(torch.nn.Module):
         attn_ratio: float = 4,
         resolution: tuple[int, int] = (14, 14),
     ):
-        """
-        Initialize the Attention module for multi-head attention with spatial awareness.
+        """Initialize the Attention module for multi-head attention with spatial awareness.
-        This module implements a multi-head attention mechanism with support for spatial awareness, applying
-        attention biases based on spatial resolution. It includes trainable attention biases for each unique
-        offset between spatial positions in the resolution grid.
+        This module implements a multi-head attention mechanism with support for spatial awareness, applying attention
+        biases based on spatial resolution. It includes trainable attention biases for each unique offset between
+        spatial positions in the resolution grid.
         Args:
             dim (int): The dimensionality of the input and output.
@@ -521,12 +507,11 @@ class Attention(torch.nn.Module):
 class TinyViTBlock(nn.Module):
-    """
-    TinyViT Block that applies self-attention and a local convolution to the input.
+    """TinyViT Block that applies self-attention and a local convolution to the input.
-    This block is a key component of the TinyViT architecture, combining self-attention mechanisms with
-    local convolutions to process input features efficiently. It supports windowed attention for
-    computational efficiency and includes residual connections.
+    This block is a key component of the TinyViT architecture, combining self-attention mechanisms with local
+    convolutions to process input features efficiently. It supports windowed attention for computational efficiency and
+    includes residual connections.
     Attributes:
         dim (int): The dimensionality of the input and output.
@@ -559,11 +544,10 @@ class TinyViTBlock(nn.Module):
         local_conv_size: int = 3,
         activation=nn.GELU,
     ):
-        """
-        Initialize a TinyViT block with self-attention and local convolution.
+        """Initialize a TinyViT block with self-attention and local convolution.
-        This block is a key component of the TinyViT architecture, combining self-attention mechanisms with
-        local convolutions to process input features efficiently.
+        This block is a key component of the TinyViT architecture, combining self-attention mechanisms with local
+        convolutions to process input features efficiently.
         Args:
             dim (int): Dimensionality of the input and output features.
@@ -644,8 +628,7 @@ class TinyViTBlock(nn.Module):
         return x + self.drop_path(self.mlp(x))
     def extra_repr(self) -> str:
-        """
-        Return a string representation of the TinyViTBlock's parameters.
+        """Return a string representation of the TinyViTBlock's parameters.
         This method provides a formatted string containing key information about the TinyViTBlock, including its
         dimension, input resolution, number of attention heads, window size, and MLP ratio.
@@ -665,12 +648,11 @@ class TinyViTBlock(nn.Module):
 class BasicLayer(nn.Module):
-    """
-    A basic TinyViT layer for one stage in a TinyViT architecture.
+    """A basic TinyViT layer for one stage in a TinyViT architecture.
-    This class represents a single layer in the TinyViT model, consisting of multiple TinyViT blocks
-    and an optional downsampling operation. It processes features at a specific resolution and
-    dimensionality within the overall architecture.
+    This class represents a single layer in the TinyViT model, consisting of multiple TinyViT blocks and an optional
+    downsampling operation. It processes features at a specific resolution and dimensionality within the overall
+    architecture.
     Attributes:
         dim (int): The dimensionality of the input and output features.
@@ -704,11 +686,10 @@ class BasicLayer(nn.Module):
         activation=nn.GELU,
         out_dim: int | None = None,
     ):
-        """
-        Initialize a BasicLayer in the TinyViT architecture.
+        """Initialize a BasicLayer in the TinyViT architecture.
-        This layer consists of multiple TinyViT blocks and an optional downsampling operation. It is designed to
-        process feature maps at a specific resolution and dimensionality within the TinyViT model.
+        This layer consists of multiple TinyViT blocks and an optional downsampling operation. It is designed to process
+        feature maps at a specific resolution and dimensionality within the TinyViT model.
         Args:
             dim (int): Dimensionality of the input and output features.
@@ -718,12 +699,14 @@ class BasicLayer(nn.Module):
             window_size (int): Size of the local window for attention computation.
             mlp_ratio (float, optional): Ratio of MLP hidden dimension to embedding dimension.
             drop (float, optional): Dropout rate.
-            drop_path (float | list[float], optional): Stochastic depth rate. Can be a float or a list of floats for each block.
-            downsample (nn.Module | None, optional): Downsampling layer at the end of the layer. None to skip downsampling.
+            drop_path (float | list[float], optional): Stochastic depth rate. Can be a float or a list of floats for
+                each block.
+            downsample (nn.Module | None, optional): Downsampling layer at the end of the layer. None to skip
+                downsampling.
             use_checkpoint (bool, optional): Whether to use gradient checkpointing to save memory.
             local_conv_size (int, optional): Kernel size for the local convolution in each TinyViT block.
             activation (nn.Module): Activation function used in the MLP.
-            out_dim (int | None, optional): Output dimension after downsampling. None means it will be the same as `dim`.
+            out_dim (int | None, optional): Output dimension after downsampling. None means it will be the same as dim.
         """
         super().__init__()
         self.dim = dim
@@ -768,12 +751,11 @@ class BasicLayer(nn.Module):
 class TinyViT(nn.Module):
-    """
-    TinyViT: A compact vision transformer architecture for efficient image classification and feature extraction.
+    """TinyViT: A compact vision transformer architecture for efficient image classification and feature extraction.
-    This class implements the TinyViT model, which combines elements of vision transformers and convolutional
-    neural networks for improved efficiency and performance on vision tasks. It features hierarchical processing
-    with patch embedding, multiple stages of attention and convolution blocks, and a feature refinement neck.
+    This class implements the TinyViT model, which combines elements of vision transformers and convolutional neural
+    networks for improved efficiency and performance on vision tasks. It features hierarchical processing with patch
+    embedding, multiple stages of attention and convolution blocks, and a feature refinement neck.
     Attributes:
         img_size (int): Input image size.
@@ -813,11 +795,10 @@ class TinyViT(nn.Module):
         local_conv_size: int = 3,
         layer_lr_decay: float = 1.0,
     ):
-        """
-        Initialize the TinyViT model.
+        """Initialize the TinyViT model.
-        This constructor sets up the TinyViT architecture, including patch embedding, multiple layers of
-        attention and convolution blocks, and a classification head.
+        This constructor sets up the TinyViT architecture, including patch embedding, multiple layers of attention and
+        convolution blocks, and a classification head.
         Args:
             img_size (int, optional): Size of the input image.

ultralytics/models/sam/modules/transformer.py CHANGED Viewed

@@ -11,12 +11,10 @@ from ultralytics.nn.modules import MLPBlock
 class TwoWayTransformer(nn.Module):
-    """
-    A Two-Way Transformer module for simultaneous attention to image and query points.
+    """A Two-Way Transformer module for simultaneous attention to image and query points.
-    This class implements a specialized transformer decoder that attends to an input image using queries with
-    supplied positional embeddings. It's useful for tasks like object detection, image segmentation, and point
-    cloud processing.
+    This class implements a specialized transformer decoder that attends to an input image using queries with supplied
+    positional embeddings. It's useful for tasks like object detection, image segmentation, and point cloud processing.
     Attributes:
         depth (int): Number of layers in the transformer.
@@ -48,8 +46,7 @@ class TwoWayTransformer(nn.Module):
         activation: type[nn.Module] = nn.ReLU,
         attention_downsample_rate: int = 2,
     ) -> None:
-        """
-        Initialize a Two-Way Transformer for simultaneous attention to image and query points.
+        """Initialize a Two-Way Transformer for simultaneous attention to image and query points.
         Args:
             depth (int): Number of layers in the transformer.
@@ -87,8 +84,7 @@ class TwoWayTransformer(nn.Module):
         image_pe: torch.Tensor,
         point_embedding: torch.Tensor,
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        Process image and point embeddings through the Two-Way Transformer.
+        """Process image and point embeddings through the Two-Way Transformer.
         Args:
             image_embedding (torch.Tensor): Image to attend to, with shape (B, embedding_dim, H, W).
@@ -127,12 +123,11 @@ class TwoWayTransformer(nn.Module):
 class TwoWayAttentionBlock(nn.Module):
-    """
-    A two-way attention block for simultaneous attention to image and query points.
+    """A two-way attention block for simultaneous attention to image and query points.
     This class implements a specialized transformer block with four main layers: self-attention on sparse inputs,
-    cross-attention of sparse inputs to dense inputs, MLP block on sparse inputs, and cross-attention of dense
-    inputs to sparse inputs.
+    cross-attention of sparse inputs to dense inputs, MLP block on sparse inputs, and cross-attention of dense inputs to
+    sparse inputs.
     Attributes:
         self_attn (Attention): Self-attention layer for queries.
@@ -167,12 +162,11 @@ class TwoWayAttentionBlock(nn.Module):
         attention_downsample_rate: int = 2,
         skip_first_layer_pe: bool = False,
     ) -> None:
-        """
-        Initialize a TwoWayAttentionBlock for simultaneous attention to image and query points.
+        """Initialize a TwoWayAttentionBlock for simultaneous attention to image and query points.
         This block implements a specialized transformer layer with four main components: self-attention on sparse
-        inputs, cross-attention of sparse inputs to dense inputs, MLP block on sparse inputs, and cross-attention
-        of dense inputs to sparse inputs.
+        inputs, cross-attention of sparse inputs to dense inputs, MLP block on sparse inputs, and cross-attention of
+        dense inputs to sparse inputs.
         Args:
             embedding_dim (int): Channel dimension of the embeddings.
@@ -200,8 +194,7 @@ class TwoWayAttentionBlock(nn.Module):
     def forward(
         self, queries: torch.Tensor, keys: torch.Tensor, query_pe: torch.Tensor, key_pe: torch.Tensor
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        Apply two-way attention to process query and key embeddings in a transformer block.
+        """Apply two-way attention to process query and key embeddings in a transformer block.
         Args:
             queries (torch.Tensor): Query embeddings with shape (B, N_queries, embedding_dim).
@@ -245,11 +238,10 @@ class TwoWayAttentionBlock(nn.Module):
 class Attention(nn.Module):
-    """
-    An attention layer with downscaling capability for embedding size after projection.
+    """An attention layer with downscaling capability for embedding size after projection.
-    This class implements a multi-head attention mechanism with the option to downsample the internal
-    dimension of queries, keys, and values.
+    This class implements a multi-head attention mechanism with the option to downsample the internal dimension of
+    queries, keys, and values.
     Attributes:
         embedding_dim (int): Dimensionality of input embeddings.
@@ -280,10 +272,9 @@ class Attention(nn.Module):
         embedding_dim: int,
         num_heads: int,
         downsample_rate: int = 1,
-        kv_in_dim: int = None,
+        kv_in_dim: int | None = None,
     ) -> None:
-        """
-        Initialize the Attention module with specified dimensions and settings.
+        """Initialize the Attention module with specified dimensions and settings.
         Args:
             embedding_dim (int): Dimensionality of input embeddings.
@@ -321,8 +312,7 @@ class Attention(nn.Module):
         return x.reshape(b, n_tokens, n_heads * c_per_head)  # B x N_tokens x C
     def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
-        """
-        Apply multi-head attention to query, key, and value tensors with optional downsampling.
+        """Apply multi-head attention to query, key, and value tensors with optional downsampling.
         Args:
             q (torch.Tensor): Query tensor with shape (B, N_q, embedding_dim).

dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.196py3-none-any.whl → 8.3.248py3-none-any.whl