PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.222__py3-none-any.whl → 8.3.225__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.222py3-none-any.whl → 8.3.225py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (158) hide show

{dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/METADATA +2 -2
dgenerate_ultralytics_headless-8.3.225.dist-info/RECORD +286 -0
tests/conftest.py +5 -8
tests/test_cli.py +1 -8
tests/test_python.py +1 -2
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +34 -49
ultralytics/cfg/datasets/ImageNet.yaml +1 -1
ultralytics/cfg/datasets/kitti.yaml +27 -0
ultralytics/cfg/datasets/lvis.yaml +5 -5
ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
ultralytics/data/annotator.py +3 -4
ultralytics/data/augment.py +244 -323
ultralytics/data/base.py +12 -22
ultralytics/data/build.py +47 -40
ultralytics/data/converter.py +32 -42
ultralytics/data/dataset.py +43 -71
ultralytics/data/loaders.py +22 -34
ultralytics/data/split.py +5 -6
ultralytics/data/split_dota.py +8 -15
ultralytics/data/utils.py +27 -36
ultralytics/engine/exporter.py +49 -116
ultralytics/engine/model.py +144 -180
ultralytics/engine/predictor.py +18 -29
ultralytics/engine/results.py +165 -231
ultralytics/engine/trainer.py +11 -19
ultralytics/engine/tuner.py +13 -23
ultralytics/engine/validator.py +6 -10
ultralytics/hub/__init__.py +7 -12
ultralytics/hub/auth.py +6 -12
ultralytics/hub/google/__init__.py +7 -10
ultralytics/hub/session.py +15 -25
ultralytics/hub/utils.py +3 -6
ultralytics/models/fastsam/model.py +6 -8
ultralytics/models/fastsam/predict.py +5 -10
ultralytics/models/fastsam/utils.py +1 -2
ultralytics/models/fastsam/val.py +2 -4
ultralytics/models/nas/model.py +5 -8
ultralytics/models/nas/predict.py +7 -9
ultralytics/models/nas/val.py +1 -2
ultralytics/models/rtdetr/model.py +5 -8
ultralytics/models/rtdetr/predict.py +15 -18
ultralytics/models/rtdetr/train.py +10 -13
ultralytics/models/rtdetr/val.py +13 -20
ultralytics/models/sam/amg.py +12 -18
ultralytics/models/sam/build.py +6 -9
ultralytics/models/sam/model.py +16 -23
ultralytics/models/sam/modules/blocks.py +62 -84
ultralytics/models/sam/modules/decoders.py +17 -24
ultralytics/models/sam/modules/encoders.py +40 -56
ultralytics/models/sam/modules/memory_attention.py +10 -16
ultralytics/models/sam/modules/sam.py +41 -47
ultralytics/models/sam/modules/tiny_encoder.py +64 -83
ultralytics/models/sam/modules/transformer.py +17 -27
ultralytics/models/sam/modules/utils.py +31 -42
ultralytics/models/sam/predict.py +172 -209
ultralytics/models/utils/loss.py +14 -26
ultralytics/models/utils/ops.py +13 -17
ultralytics/models/yolo/classify/predict.py +8 -11
ultralytics/models/yolo/classify/train.py +8 -16
ultralytics/models/yolo/classify/val.py +13 -20
ultralytics/models/yolo/detect/predict.py +4 -8
ultralytics/models/yolo/detect/train.py +11 -20
ultralytics/models/yolo/detect/val.py +38 -48
ultralytics/models/yolo/model.py +35 -47
ultralytics/models/yolo/obb/predict.py +5 -8
ultralytics/models/yolo/obb/train.py +11 -14
ultralytics/models/yolo/obb/val.py +20 -28
ultralytics/models/yolo/pose/predict.py +5 -8
ultralytics/models/yolo/pose/train.py +4 -8
ultralytics/models/yolo/pose/val.py +31 -39
ultralytics/models/yolo/segment/predict.py +9 -14
ultralytics/models/yolo/segment/train.py +3 -6
ultralytics/models/yolo/segment/val.py +16 -26
ultralytics/models/yolo/world/train.py +8 -14
ultralytics/models/yolo/world/train_world.py +11 -16
ultralytics/models/yolo/yoloe/predict.py +16 -23
ultralytics/models/yolo/yoloe/train.py +30 -43
ultralytics/models/yolo/yoloe/train_seg.py +5 -10
ultralytics/models/yolo/yoloe/val.py +15 -20
ultralytics/nn/autobackend.py +10 -18
ultralytics/nn/modules/activation.py +4 -6
ultralytics/nn/modules/block.py +99 -185
ultralytics/nn/modules/conv.py +45 -90
ultralytics/nn/modules/head.py +44 -98
ultralytics/nn/modules/transformer.py +44 -76
ultralytics/nn/modules/utils.py +14 -19
ultralytics/nn/tasks.py +86 -146
ultralytics/nn/text_model.py +25 -40
ultralytics/solutions/ai_gym.py +10 -16
ultralytics/solutions/analytics.py +7 -10
ultralytics/solutions/config.py +4 -5
ultralytics/solutions/distance_calculation.py +9 -12
ultralytics/solutions/heatmap.py +7 -13
ultralytics/solutions/instance_segmentation.py +5 -8
ultralytics/solutions/object_blurrer.py +7 -10
ultralytics/solutions/object_counter.py +8 -12
ultralytics/solutions/object_cropper.py +5 -8
ultralytics/solutions/parking_management.py +12 -14
ultralytics/solutions/queue_management.py +4 -6
ultralytics/solutions/region_counter.py +7 -10
ultralytics/solutions/security_alarm.py +14 -19
ultralytics/solutions/similarity_search.py +7 -12
ultralytics/solutions/solutions.py +31 -53
ultralytics/solutions/speed_estimation.py +6 -9
ultralytics/solutions/streamlit_inference.py +2 -4
ultralytics/solutions/trackzone.py +7 -10
ultralytics/solutions/vision_eye.py +5 -8
ultralytics/trackers/basetrack.py +2 -4
ultralytics/trackers/bot_sort.py +6 -11
ultralytics/trackers/byte_tracker.py +10 -15
ultralytics/trackers/track.py +3 -6
ultralytics/trackers/utils/gmc.py +6 -12
ultralytics/trackers/utils/kalman_filter.py +35 -43
ultralytics/trackers/utils/matching.py +6 -10
ultralytics/utils/__init__.py +61 -100
ultralytics/utils/autobatch.py +2 -4
ultralytics/utils/autodevice.py +11 -13
ultralytics/utils/benchmarks.py +25 -35
ultralytics/utils/callbacks/base.py +8 -10
ultralytics/utils/callbacks/clearml.py +2 -4
ultralytics/utils/callbacks/comet.py +30 -44
ultralytics/utils/callbacks/dvc.py +13 -18
ultralytics/utils/callbacks/mlflow.py +4 -5
ultralytics/utils/callbacks/neptune.py +4 -6
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +4 -6
ultralytics/utils/callbacks/wb.py +10 -13
ultralytics/utils/checks.py +29 -56
ultralytics/utils/cpu.py +1 -2
ultralytics/utils/dist.py +8 -12
ultralytics/utils/downloads.py +17 -27
ultralytics/utils/errors.py +6 -8
ultralytics/utils/events.py +2 -4
ultralytics/utils/export/__init__.py +4 -239
ultralytics/utils/export/engine.py +237 -0
ultralytics/utils/export/imx.py +11 -17
ultralytics/utils/export/tensorflow.py +217 -0
ultralytics/utils/files.py +10 -15
ultralytics/utils/git.py +5 -7
ultralytics/utils/instance.py +30 -51
ultralytics/utils/logger.py +11 -15
ultralytics/utils/loss.py +8 -14
ultralytics/utils/metrics.py +98 -138
ultralytics/utils/nms.py +13 -16
ultralytics/utils/ops.py +47 -74
ultralytics/utils/patches.py +11 -18
ultralytics/utils/plotting.py +29 -42
ultralytics/utils/tal.py +25 -39
ultralytics/utils/torch_utils.py +45 -73
ultralytics/utils/tqdm.py +6 -8
ultralytics/utils/triton.py +9 -12
ultralytics/utils/tuner.py +1 -2
dgenerate_ultralytics_headless-8.3.222.dist-info/RECORD +0 -283
{dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/top_level.txt +0 -0

ultralytics/models/sam/modules/memory_attention.py CHANGED Viewed

@@ -11,8 +11,7 @@ from .blocks import RoPEAttention
 class MemoryAttentionLayer(nn.Module):
-    """
-    Implements a memory attention layer with self-attention and cross-attention mechanisms for neural networks.
+    """Implements a memory attention layer with self-attention and cross-attention mechanisms for neural networks.
     This class combines self-attention, cross-attention, and feedforward components to process input tensors and
     generate memory-based attention outputs.
@@ -61,8 +60,7 @@ class MemoryAttentionLayer(nn.Module):
         pos_enc_at_cross_attn_keys: bool = True,
         pos_enc_at_cross_attn_queries: bool = False,
     ):
-        """
-        Initialize a memory attention layer with self-attention, cross-attention, and feedforward components.
+        """Initialize a memory attention layer with self-attention, cross-attention, and feedforward components.
         Args:
             d_model (int): Dimensionality of the model.
@@ -145,8 +143,7 @@ class MemoryAttentionLayer(nn.Module):
         query_pos: torch.Tensor | None = None,
         num_k_exclude_rope: int = 0,
     ) -> torch.Tensor:
-        """
-        Process input tensors through self-attention, cross-attention, and feedforward network layers.
+        """Process input tensors through self-attention, cross-attention, and feedforward network layers.
         Args:
             tgt (torch.Tensor): Target tensor for self-attention with shape (N, L, D).
@@ -168,11 +165,10 @@ class MemoryAttentionLayer(nn.Module):
 class MemoryAttention(nn.Module):
-    """
-    Memory attention module for processing sequential data with self and cross-attention mechanisms.
+    """Memory attention module for processing sequential data with self and cross-attention mechanisms.
-    This class implements a multi-layer attention mechanism that combines self-attention and cross-attention
-    for processing sequential data, particularly useful in transformer-like architectures.
+    This class implements a multi-layer attention mechanism that combines self-attention and cross-attention for
+    processing sequential data, particularly useful in transformer-like architectures.
     Attributes:
         d_model (int): The dimension of the model's hidden state.
@@ -206,11 +202,10 @@ class MemoryAttention(nn.Module):
         num_layers: int,
         batch_first: bool = True,  # Do layers expect batch first input?
     ):
-        """
-        Initialize MemoryAttention with specified layers and normalization for sequential data processing.
+        """Initialize MemoryAttention with specified layers and normalization for sequential data processing.
-        This class implements a multi-layer attention mechanism that combines self-attention and cross-attention
-        for processing sequential data, particularly useful in transformer-like architectures.
+        This class implements a multi-layer attention mechanism that combines self-attention and cross-attention for
+        processing sequential data, particularly useful in transformer-like architectures.
         Args:
             d_model (int): The dimension of the model's hidden state.
@@ -247,8 +242,7 @@ class MemoryAttention(nn.Module):
         memory_pos: torch.Tensor | None = None,  # pos_enc for cross-attention inputs
         num_obj_ptr_tokens: int = 0,  # number of object pointer *tokens*
     ) -> torch.Tensor:
-        """
-        Process inputs through attention layers, applying self and cross-attention with positional encoding.
+        """Process inputs through attention layers, applying self and cross-attention with positional encoding.
         Args:
             curr (torch.Tensor): Self-attention input tensor, representing the current state.

ultralytics/models/sam/modules/sam.py CHANGED Viewed

@@ -23,11 +23,10 @@ NO_OBJ_SCORE = -1024.0
 class SAMModel(nn.Module):
-    """
-    Segment Anything Model (SAM) for object segmentation tasks.
+    """Segment Anything Model (SAM) for object segmentation tasks.
-    This class combines image encoders, prompt encoders, and mask decoders to predict object masks from images
-    and input prompts.
+    This class combines image encoders, prompt encoders, and mask decoders to predict object masks from images and input
+    prompts.
     Attributes:
         mask_threshold (float): Threshold value for mask prediction.
@@ -61,8 +60,7 @@ class SAMModel(nn.Module):
         pixel_mean: list[float] = (123.675, 116.28, 103.53),
         pixel_std: list[float] = (58.395, 57.12, 57.375),
     ) -> None:
-        """
-        Initialize the SAMModel class to predict object masks from an image and input prompts.
+        """Initialize the SAMModel class to predict object masks from an image and input prompts.
         Args:
             image_encoder (ImageEncoderViT): The backbone used to encode the image into image embeddings.
@@ -98,11 +96,10 @@ class SAMModel(nn.Module):
 class SAM2Model(torch.nn.Module):
-    """
-    SAM2Model class for Segment Anything Model 2 with memory-based video object segmentation capabilities.
+    """SAM2Model class for Segment Anything Model 2 with memory-based video object segmentation capabilities.
-    This class extends the functionality of SAM to handle video sequences, incorporating memory mechanisms
-    for temporal consistency and efficient tracking of objects across frames.
+    This class extends the functionality of SAM to handle video sequences, incorporating memory mechanisms for temporal
+    consistency and efficient tracking of objects across frames.
     Attributes:
         mask_threshold (float): Threshold value for mask prediction.
@@ -136,24 +133,24 @@ class SAM2Model(torch.nn.Module):
         use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection.
         no_obj_embed_spatial (torch.Tensor | None): No-object embedding for spatial frames.
         max_cond_frames_in_attn (int): Maximum number of conditioning frames to participate in memory attention.
-        directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the
-            first frame.
-        multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial
-            conditioning frames.
+        directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the first
+            frame.
+        multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial conditioning
+            frames.
         multimask_min_pt_num (int): Minimum number of clicks to use multimask output in SAM.
         multimask_max_pt_num (int): Maximum number of clicks to use multimask output in SAM.
         multimask_output_for_tracking (bool): Whether to use multimask output for tracking.
         use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers.
         iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1].
         memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation.
-        non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in
-            memory encoder during evaluation.
+        non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in memory
+            encoder during evaluation.
         sigmoid_scale_for_mem_enc (float): Scale factor for mask sigmoid probability.
         sigmoid_bias_for_mem_enc (float): Bias factor for mask sigmoid probability.
-        binarize_mask_from_pts_for_mem_enc (bool): Whether to binarize sigmoid mask logits on interacted frames
-            with clicks during evaluation.
-        use_mask_input_as_output_without_sam (bool): Whether to directly output the input mask without using SAM
-            prompt encoder and mask decoder on frames with mask input.
+        binarize_mask_from_pts_for_mem_enc (bool): Whether to binarize sigmoid mask logits on interacted frames with
+            clicks during evaluation.
+        use_mask_input_as_output_without_sam (bool): Whether to directly output the input mask without using SAM prompt
+            encoder and mask decoder on frames with mask input.
     Methods:
         forward_image: Process image batch through encoder to extract multi-level features.
@@ -208,8 +205,7 @@ class SAM2Model(torch.nn.Module):
         sam_mask_decoder_extra_args=None,
         compile_image_encoder: bool = False,
     ):
-        """
-        Initialize the SAM2Model for video object segmentation with memory-based tracking.
+        """Initialize the SAM2Model for video object segmentation with memory-based tracking.
         Args:
             image_encoder (nn.Module): Visual encoder for extracting image features.
@@ -220,35 +216,35 @@ class SAM2Model(torch.nn.Module):
             backbone_stride (int): Stride of the image backbone output.
             sigmoid_scale_for_mem_enc (float): Scale factor for mask sigmoid probability.
             sigmoid_bias_for_mem_enc (float): Bias factor for mask sigmoid probability.
-            binarize_mask_from_pts_for_mem_enc (bool): Whether to binarize sigmoid mask logits on interacted frames
-                with clicks during evaluation.
+            binarize_mask_from_pts_for_mem_enc (bool): Whether to binarize sigmoid mask logits on interacted frames with
+                clicks during evaluation.
             use_mask_input_as_output_without_sam (bool): Whether to directly output the input mask without using SAM
                 prompt encoder and mask decoder on frames with mask input.
             max_cond_frames_in_attn (int): Maximum number of conditioning frames to participate in memory attention.
-            directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the
-                first frame.
+            directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the first
+                frame.
             use_high_res_features_in_sam (bool): Whether to use high-resolution feature maps in the SAM mask decoder.
-            multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial
-                conditioning frames.
+            multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial conditioning
+                frames.
             multimask_min_pt_num (int): Minimum number of clicks to use multimask output in SAM.
             multimask_max_pt_num (int): Maximum number of clicks to use multimask output in SAM.
             multimask_output_for_tracking (bool): Whether to use multimask output for tracking.
             use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers.
             iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1].
             memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation.
-            non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in
-                memory encoder during evaluation.
+            non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in memory
+                encoder during evaluation.
             use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder.
             max_obj_ptrs_in_encoder (int): Maximum number of object pointers from other frames in encoder
                 cross-attention.
-            add_tpos_enc_to_obj_ptrs (bool): Whether to add temporal positional encoding to object pointers in
-                the encoder.
+            add_tpos_enc_to_obj_ptrs (bool): Whether to add temporal positional encoding to object pointers in the
+                encoder.
             proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional
                 encoding in object pointers.
             use_signed_tpos_enc_to_obj_ptrs (bool): Whether to use signed distance in the temporal positional encoding
                 in the object pointers.
-            only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past
-                during evaluation.
+            only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past during
+                evaluation.
             pred_obj_scores (bool): Whether to predict if there is an object in the frame.
             pred_obj_scores_mlp (bool): Whether to use an MLP to predict object scores.
             fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present.
@@ -428,25 +424,23 @@ class SAM2Model(torch.nn.Module):
         high_res_features=None,
         multimask_output=False,
     ):
-        """
-        Forward pass through SAM prompt encoders and mask heads.
+        """Forward pass through SAM prompt encoders and mask heads.
         This method processes image features and optional point/mask inputs to generate object masks and scores.
         Args:
             backbone_features (torch.Tensor): Image features with shape (B, C, H, W).
             point_inputs (dict[str, torch.Tensor] | None): Dictionary containing point prompts.
-                'point_coords': Tensor of shape (B, P, 2) with float32 dtype, containing absolute
-                    pixel-unit coordinates in (x, y) format for P input points.
-                'point_labels': Tensor of shape (B, P) with int32 dtype, where 1 means positive clicks,
-                    0 means negative clicks, and -1 means padding.
-            mask_inputs (torch.Tensor | None): Mask of shape (B, 1, H*16, W*16), float or bool, with the
-                same spatial size as the image.
-            high_res_features (list[torch.Tensor] | None): List of two feature maps with shapes
-                (B, C, 4*H, 4*W) and (B, C, 2*H, 2*W) respectively, used as high-resolution feature maps
-                for SAM decoder.
-            multimask_output (bool): If True, output 3 candidate masks and their IoU estimates; if False,
-                output only 1 mask and its IoU estimate.
+            'point_coords': Tensor of shape (B, P, 2) with float32 dtype, containing absolute pixel-unit coordinates in
+                (x, y) format for P input points.
+            'point_labels': Tensor of shape (B, P) with int32 dtype, where 1 means positive clicks, 0 means negative
+                clicks, and -1 means padding.
+            mask_inputs (torch.Tensor | None): Mask of shape (B, 1, H*16, W*16), float or bool, with the same spatial
+                size as the image.
+            high_res_features (list[torch.Tensor] | None): List of two feature maps with shapes (B, C, 4*H, 4*W) and (B,
+                C, 2*H, 2*W) respectively, used as high-resolution feature maps for SAM decoder.
+            multimask_output (bool): If True, output 3 candidate masks and their IoU estimates; if False, output only 1
+                mask and its IoU estimate.
         Returns:
             low_res_multimasks (torch.Tensor): Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.

ultralytics/models/sam/modules/tiny_encoder.py CHANGED Viewed

@@ -22,12 +22,11 @@ from ultralytics.utils.instance import to_2tuple
 class Conv2d_BN(torch.nn.Sequential):
-    """
-    A sequential container that performs 2D convolution followed by batch normalization.
+    """A sequential container that performs 2D convolution followed by batch normalization.
-    This module combines a 2D convolution layer with batch normalization, providing a common building block
-    for convolutional neural networks. The batch normalization weights and biases are initialized to specific
-    values for optimal training performance.
+    This module combines a 2D convolution layer with batch normalization, providing a common building block for
+    convolutional neural networks. The batch normalization weights and biases are initialized to specific values for
+    optimal training performance.
     Attributes:
         c (torch.nn.Conv2d): 2D convolution layer.
@@ -52,8 +51,7 @@ class Conv2d_BN(torch.nn.Sequential):
         groups: int = 1,
         bn_weight_init: float = 1,
     ):
-        """
-        Initialize a sequential container with 2D convolution followed by batch normalization.
+        """Initialize a sequential container with 2D convolution followed by batch normalization.
         Args:
             a (int): Number of input channels.
@@ -74,11 +72,10 @@ class Conv2d_BN(torch.nn.Sequential):
 class PatchEmbed(nn.Module):
-    """
-    Embed images into patches and project them into a specified embedding dimension.
+    """Embed images into patches and project them into a specified embedding dimension.
-    This module converts input images into patch embeddings using a sequence of convolutional layers,
-    effectively downsampling the spatial dimensions while increasing the channel dimension.
+    This module converts input images into patch embeddings using a sequence of convolutional layers, effectively
+    downsampling the spatial dimensions while increasing the channel dimension.
     Attributes:
         patches_resolution (tuple[int, int]): Resolution of the patches after embedding.
@@ -97,8 +94,7 @@ class PatchEmbed(nn.Module):
     """
     def __init__(self, in_chans: int, embed_dim: int, resolution: int, activation):
-        """
-        Initialize patch embedding with convolutional layers for image-to-patch conversion and projection.
+        """Initialize patch embedding with convolutional layers for image-to-patch conversion and projection.
         Args:
             in_chans (int): Number of input channels.
@@ -125,11 +121,10 @@ class PatchEmbed(nn.Module):
 class MBConv(nn.Module):
-    """
-    Mobile Inverted Bottleneck Conv (MBConv) layer, part of the EfficientNet architecture.
+    """Mobile Inverted Bottleneck Conv (MBConv) layer, part of the EfficientNet architecture.
-    This module implements the mobile inverted bottleneck convolution with expansion, depthwise convolution,
-    and projection phases, along with residual connections for improved gradient flow.
+    This module implements the mobile inverted bottleneck convolution with expansion, depthwise convolution, and
+    projection phases, along with residual connections for improved gradient flow.
     Attributes:
         in_chans (int): Number of input channels.
@@ -153,8 +148,7 @@ class MBConv(nn.Module):
     """
     def __init__(self, in_chans: int, out_chans: int, expand_ratio: float, activation, drop_path: float):
-        """
-        Initialize the MBConv layer with specified input/output channels, expansion ratio, and activation.
+        """Initialize the MBConv layer with specified input/output channels, expansion ratio, and activation.
         Args:
             in_chans (int): Number of input channels.
@@ -195,12 +189,11 @@ class MBConv(nn.Module):
 class PatchMerging(nn.Module):
-    """
-    Merge neighboring patches in the feature map and project to a new dimension.
+    """Merge neighboring patches in the feature map and project to a new dimension.
-    This class implements a patch merging operation that combines spatial information and adjusts the feature
-    dimension using a series of convolutional layers with batch normalization. It effectively reduces spatial
-    resolution while potentially increasing channel dimensions.
+    This class implements a patch merging operation that combines spatial information and adjusts the feature dimension
+    using a series of convolutional layers with batch normalization. It effectively reduces spatial resolution while
+    potentially increasing channel dimensions.
     Attributes:
         input_resolution (tuple[int, int]): The input resolution (height, width) of the feature map.
@@ -221,8 +214,7 @@ class PatchMerging(nn.Module):
     """
     def __init__(self, input_resolution: tuple[int, int], dim: int, out_dim: int, activation):
-        """
-        Initialize the PatchMerging module for merging and projecting neighboring patches in feature maps.
+        """Initialize the PatchMerging module for merging and projecting neighboring patches in feature maps.
         Args:
             input_resolution (tuple[int, int]): The input resolution (height, width) of the feature map.
@@ -259,11 +251,10 @@ class PatchMerging(nn.Module):
 class ConvLayer(nn.Module):
-    """
-    Convolutional Layer featuring multiple MobileNetV3-style inverted bottleneck convolutions (MBConv).
+    """Convolutional Layer featuring multiple MobileNetV3-style inverted bottleneck convolutions (MBConv).
-    This layer optionally applies downsample operations to the output and supports gradient checkpointing
-    for memory efficiency during training.
+    This layer optionally applies downsample operations to the output and supports gradient checkpointing for memory
+    efficiency during training.
     Attributes:
         dim (int): Dimensionality of the input and output.
@@ -293,11 +284,10 @@ class ConvLayer(nn.Module):
         out_dim: int | None = None,
         conv_expand_ratio: float = 4.0,
     ):
-        """
-        Initialize the ConvLayer with the given dimensions and settings.
+        """Initialize the ConvLayer with the given dimensions and settings.
-        This layer consists of multiple MobileNetV3-style inverted bottleneck convolutions (MBConv) and
-        optionally applies downsampling to the output.
+        This layer consists of multiple MobileNetV3-style inverted bottleneck convolutions (MBConv) and optionally
+        applies downsampling to the output.
         Args:
             dim (int): The dimensionality of the input and output.
@@ -307,7 +297,7 @@ class ConvLayer(nn.Module):
             drop_path (float | list[float], optional): Drop path rate. Single float or a list of floats for each MBConv.
             downsample (Optional[nn.Module], optional): Function for downsampling the output. None to skip downsampling.
             use_checkpoint (bool, optional): Whether to use gradient checkpointing to save memory.
-            out_dim (Optional[int], optional): The dimensionality of the output. None means it will be the same as `dim`.
+            out_dim (Optional[int], optional): Output dimensions. None means it will be the same as `dim`.
             conv_expand_ratio (float, optional): Expansion ratio for the MBConv layers.
         """
         super().__init__()
@@ -345,11 +335,10 @@ class ConvLayer(nn.Module):
 class MLP(nn.Module):
-    """
-    Multi-layer Perceptron (MLP) module for transformer architectures.
+    """Multi-layer Perceptron (MLP) module for transformer architectures.
-    This module applies layer normalization, two fully-connected layers with an activation function in between,
-    and dropout. It is commonly used in transformer-based architectures for processing token embeddings.
+    This module applies layer normalization, two fully-connected layers with an activation function in between, and
+    dropout. It is commonly used in transformer-based architectures for processing token embeddings.
     Attributes:
         norm (nn.LayerNorm): Layer normalization applied to the input.
@@ -376,8 +365,7 @@ class MLP(nn.Module):
         activation=nn.GELU,
         drop: float = 0.0,
     ):
-        """
-        Initialize a multi-layer perceptron with configurable input, hidden, and output dimensions.
+        """Initialize a multi-layer perceptron with configurable input, hidden, and output dimensions.
         Args:
             in_features (int): Number of input features.
@@ -406,12 +394,11 @@ class MLP(nn.Module):
 class Attention(torch.nn.Module):
-    """
-    Multi-head attention module with spatial awareness and trainable attention biases.
+    """Multi-head attention module with spatial awareness and trainable attention biases.
-    This module implements a multi-head attention mechanism with support for spatial awareness, applying
-    attention biases based on spatial resolution. It includes trainable attention biases for each unique
-    offset between spatial positions in the resolution grid.
+    This module implements a multi-head attention mechanism with support for spatial awareness, applying attention
+    biases based on spatial resolution. It includes trainable attention biases for each unique offset between spatial
+    positions in the resolution grid.
     Attributes:
         num_heads (int): Number of attention heads.
@@ -444,12 +431,11 @@ class Attention(torch.nn.Module):
         attn_ratio: float = 4,
         resolution: tuple[int, int] = (14, 14),
     ):
-        """
-        Initialize the Attention module for multi-head attention with spatial awareness.
+        """Initialize the Attention module for multi-head attention with spatial awareness.
-        This module implements a multi-head attention mechanism with support for spatial awareness, applying
-        attention biases based on spatial resolution. It includes trainable attention biases for each unique
-        offset between spatial positions in the resolution grid.
+        This module implements a multi-head attention mechanism with support for spatial awareness, applying attention
+        biases based on spatial resolution. It includes trainable attention biases for each unique offset between
+        spatial positions in the resolution grid.
         Args:
             dim (int): The dimensionality of the input and output.
@@ -521,12 +507,11 @@ class Attention(torch.nn.Module):
 class TinyViTBlock(nn.Module):
-    """
-    TinyViT Block that applies self-attention and a local convolution to the input.
+    """TinyViT Block that applies self-attention and a local convolution to the input.
-    This block is a key component of the TinyViT architecture, combining self-attention mechanisms with
-    local convolutions to process input features efficiently. It supports windowed attention for
-    computational efficiency and includes residual connections.
+    This block is a key component of the TinyViT architecture, combining self-attention mechanisms with local
+    convolutions to process input features efficiently. It supports windowed attention for computational efficiency and
+    includes residual connections.
     Attributes:
         dim (int): The dimensionality of the input and output.
@@ -559,11 +544,10 @@ class TinyViTBlock(nn.Module):
         local_conv_size: int = 3,
         activation=nn.GELU,
     ):
-        """
-        Initialize a TinyViT block with self-attention and local convolution.
+        """Initialize a TinyViT block with self-attention and local convolution.
-        This block is a key component of the TinyViT architecture, combining self-attention mechanisms with
-        local convolutions to process input features efficiently.
+        This block is a key component of the TinyViT architecture, combining self-attention mechanisms with local
+        convolutions to process input features efficiently.
         Args:
             dim (int): Dimensionality of the input and output features.
@@ -644,8 +628,7 @@ class TinyViTBlock(nn.Module):
         return x + self.drop_path(self.mlp(x))
     def extra_repr(self) -> str:
-        """
-        Return a string representation of the TinyViTBlock's parameters.
+        """Return a string representation of the TinyViTBlock's parameters.
         This method provides a formatted string containing key information about the TinyViTBlock, including its
         dimension, input resolution, number of attention heads, window size, and MLP ratio.
@@ -665,12 +648,11 @@ class TinyViTBlock(nn.Module):
 class BasicLayer(nn.Module):
-    """
-    A basic TinyViT layer for one stage in a TinyViT architecture.
+    """A basic TinyViT layer for one stage in a TinyViT architecture.
-    This class represents a single layer in the TinyViT model, consisting of multiple TinyViT blocks
-    and an optional downsampling operation. It processes features at a specific resolution and
-    dimensionality within the overall architecture.
+    This class represents a single layer in the TinyViT model, consisting of multiple TinyViT blocks and an optional
+    downsampling operation. It processes features at a specific resolution and dimensionality within the overall
+    architecture.
     Attributes:
         dim (int): The dimensionality of the input and output features.
@@ -704,11 +686,10 @@ class BasicLayer(nn.Module):
         activation=nn.GELU,
         out_dim: int | None = None,
     ):
-        """
-        Initialize a BasicLayer in the TinyViT architecture.
+        """Initialize a BasicLayer in the TinyViT architecture.
-        This layer consists of multiple TinyViT blocks and an optional downsampling operation. It is designed to
-        process feature maps at a specific resolution and dimensionality within the TinyViT model.
+        This layer consists of multiple TinyViT blocks and an optional downsampling operation. It is designed to process
+        feature maps at a specific resolution and dimensionality within the TinyViT model.
         Args:
             dim (int): Dimensionality of the input and output features.
@@ -718,12 +699,14 @@ class BasicLayer(nn.Module):
             window_size (int): Size of the local window for attention computation.
             mlp_ratio (float, optional): Ratio of MLP hidden dimension to embedding dimension.
             drop (float, optional): Dropout rate.
-            drop_path (float | list[float], optional): Stochastic depth rate. Can be a float or a list of floats for each block.
-            downsample (nn.Module | None, optional): Downsampling layer at the end of the layer. None to skip downsampling.
+            drop_path (float | list[float], optional): Stochastic depth rate. Can be a float or a list of floats for
+                each block.
+            downsample (nn.Module | None, optional): Downsampling layer at the end of the layer. None to skip
+                downsampling.
             use_checkpoint (bool, optional): Whether to use gradient checkpointing to save memory.
             local_conv_size (int, optional): Kernel size for the local convolution in each TinyViT block.
             activation (nn.Module): Activation function used in the MLP.
-            out_dim (int | None, optional): Output dimension after downsampling. None means it will be the same as `dim`.
+            out_dim (int | None, optional): Output dimension after downsampling. None means it will be the same as dim.
         """
         super().__init__()
         self.dim = dim
@@ -768,12 +751,11 @@ class BasicLayer(nn.Module):
 class TinyViT(nn.Module):
-    """
-    TinyViT: A compact vision transformer architecture for efficient image classification and feature extraction.
+    """TinyViT: A compact vision transformer architecture for efficient image classification and feature extraction.
-    This class implements the TinyViT model, which combines elements of vision transformers and convolutional
-    neural networks for improved efficiency and performance on vision tasks. It features hierarchical processing
-    with patch embedding, multiple stages of attention and convolution blocks, and a feature refinement neck.
+    This class implements the TinyViT model, which combines elements of vision transformers and convolutional neural
+    networks for improved efficiency and performance on vision tasks. It features hierarchical processing with patch
+    embedding, multiple stages of attention and convolution blocks, and a feature refinement neck.
     Attributes:
         img_size (int): Input image size.
@@ -813,11 +795,10 @@ class TinyViT(nn.Module):
         local_conv_size: int = 3,
         layer_lr_decay: float = 1.0,
     ):
-        """
-        Initialize the TinyViT model.
+        """Initialize the TinyViT model.
-        This constructor sets up the TinyViT architecture, including patch embedding, multiple layers of
-        attention and convolution blocks, and a classification head.
+        This constructor sets up the TinyViT architecture, including patch embedding, multiple layers of attention and
+        convolution blocks, and a classification head.
         Args:
             img_size (int, optional): Size of the input image.

dgenerate-ultralytics-headless 8.3.222__py3-none-any.whl → 8.3.225__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.222py3-none-any.whl → 8.3.225py3-none-any.whl