PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.193__py3-none-any.whl → 8.3.195__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.193py3-none-any.whl → 8.3.195py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

{dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/METADATA +1 -2
{dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/RECORD +104 -102
tests/test_exports.py +8 -5
tests/test_python.py +2 -2
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +8 -8
ultralytics/data/annotator.py +1 -1
ultralytics/data/augment.py +75 -75
ultralytics/data/base.py +12 -12
ultralytics/data/converter.py +4 -4
ultralytics/data/dataset.py +7 -7
ultralytics/data/loaders.py +15 -15
ultralytics/data/split_dota.py +10 -10
ultralytics/data/utils.py +12 -12
ultralytics/engine/exporter.py +4 -4
ultralytics/engine/model.py +13 -13
ultralytics/engine/predictor.py +13 -13
ultralytics/engine/results.py +21 -21
ultralytics/hub/__init__.py +1 -2
ultralytics/hub/google/__init__.py +2 -2
ultralytics/hub/session.py +7 -7
ultralytics/hub/utils.py +0 -101
ultralytics/models/fastsam/model.py +5 -5
ultralytics/models/fastsam/predict.py +11 -11
ultralytics/models/nas/model.py +1 -1
ultralytics/models/rtdetr/predict.py +2 -2
ultralytics/models/rtdetr/val.py +4 -4
ultralytics/models/sam/amg.py +6 -6
ultralytics/models/sam/build.py +9 -9
ultralytics/models/sam/model.py +7 -7
ultralytics/models/sam/modules/blocks.py +6 -6
ultralytics/models/sam/modules/decoders.py +1 -1
ultralytics/models/sam/modules/encoders.py +27 -27
ultralytics/models/sam/modules/sam.py +4 -4
ultralytics/models/sam/modules/tiny_encoder.py +18 -18
ultralytics/models/sam/modules/utils.py +8 -8
ultralytics/models/sam/predict.py +66 -66
ultralytics/models/utils/loss.py +22 -22
ultralytics/models/utils/ops.py +8 -8
ultralytics/models/yolo/classify/predict.py +2 -2
ultralytics/models/yolo/classify/train.py +8 -8
ultralytics/models/yolo/classify/val.py +4 -4
ultralytics/models/yolo/detect/predict.py +3 -3
ultralytics/models/yolo/detect/train.py +6 -6
ultralytics/models/yolo/detect/val.py +32 -32
ultralytics/models/yolo/model.py +6 -6
ultralytics/models/yolo/obb/train.py +1 -1
ultralytics/models/yolo/obb/val.py +13 -13
ultralytics/models/yolo/pose/val.py +11 -11
ultralytics/models/yolo/segment/predict.py +4 -4
ultralytics/models/yolo/segment/train.py +1 -1
ultralytics/models/yolo/segment/val.py +14 -14
ultralytics/models/yolo/world/train.py +9 -9
ultralytics/models/yolo/world/train_world.py +1 -1
ultralytics/models/yolo/yoloe/predict.py +4 -4
ultralytics/models/yolo/yoloe/train.py +4 -4
ultralytics/nn/autobackend.py +10 -13
ultralytics/nn/modules/block.py +6 -6
ultralytics/nn/modules/conv.py +2 -2
ultralytics/nn/modules/head.py +4 -4
ultralytics/nn/tasks.py +13 -13
ultralytics/nn/text_model.py +3 -3
ultralytics/solutions/ai_gym.py +2 -2
ultralytics/solutions/analytics.py +3 -3
ultralytics/solutions/config.py +5 -5
ultralytics/solutions/distance_calculation.py +2 -2
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +4 -4
ultralytics/solutions/object_counter.py +4 -4
ultralytics/solutions/parking_management.py +7 -7
ultralytics/solutions/queue_management.py +3 -3
ultralytics/solutions/region_counter.py +4 -4
ultralytics/solutions/similarity_search.py +2 -2
ultralytics/solutions/solutions.py +48 -48
ultralytics/solutions/streamlit_inference.py +1 -1
ultralytics/solutions/trackzone.py +4 -4
ultralytics/solutions/vision_eye.py +1 -1
ultralytics/trackers/byte_tracker.py +11 -11
ultralytics/trackers/utils/gmc.py +3 -3
ultralytics/trackers/utils/matching.py +5 -5
ultralytics/utils/__init__.py +1 -2
ultralytics/utils/autodevice.py +2 -2
ultralytics/utils/benchmarks.py +10 -10
ultralytics/utils/callbacks/clearml.py +1 -1
ultralytics/utils/callbacks/comet.py +5 -5
ultralytics/utils/callbacks/hub.py +2 -1
ultralytics/utils/checks.py +5 -5
ultralytics/utils/cpu.py +90 -0
ultralytics/utils/dist.py +1 -1
ultralytics/utils/downloads.py +2 -2
ultralytics/utils/events.py +115 -0
ultralytics/utils/export.py +5 -5
ultralytics/utils/instance.py +2 -2
ultralytics/utils/metrics.py +35 -35
ultralytics/utils/nms.py +4 -4
ultralytics/utils/ops.py +4 -2
ultralytics/utils/patches.py +2 -2
ultralytics/utils/plotting.py +9 -9
ultralytics/utils/torch_utils.py +2 -6
ultralytics/utils/triton.py +5 -5
{dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/top_level.txt +0 -0

ultralytics/models/sam/modules/encoders.py CHANGED Viewed

@@ -83,7 +83,7 @@ class ImageEncoderViT(nn.Module):
             use_rel_pos (bool): If True, adds relative positional embeddings to attention maps.
             rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
             window_size (int): Size of attention window for windowed attention blocks.
-            global_attn_indexes (Tuple[int, ...]): Indices of blocks that use global attention.
+            global_attn_indexes (tuple[int, ...]): Indices of blocks that use global attention.
         Examples:
             >>> encoder = ImageEncoderViT(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12)
@@ -161,13 +161,13 @@ class PromptEncoder(nn.Module):
     Attributes:
         embed_dim (int): Dimension of the embeddings.
-        input_image_size (Tuple[int, int]): Size of the input image as (H, W).
-        image_embedding_size (Tuple[int, int]): Spatial size of the image embedding as (H, W).
+        input_image_size (tuple[int, int]): Size of the input image as (H, W).
+        image_embedding_size (tuple[int, int]): Spatial size of the image embedding as (H, W).
         pe_layer (PositionEmbeddingRandom): Module for random position embedding.
         num_point_embeddings (int): Number of point embeddings for different types of points.
         point_embeddings (nn.ModuleList): List of point embeddings.
         not_a_point_embed (nn.Embedding): Embedding for points that are not part of any label.
-        mask_input_size (Tuple[int, int]): Size of the input mask.
+        mask_input_size (tuple[int, int]): Size of the input mask.
         mask_downscaling (nn.Sequential): Neural network for downscaling the mask.
         no_mask_embed (nn.Embedding): Embedding for cases where no mask is provided.
@@ -198,8 +198,8 @@ class PromptEncoder(nn.Module):
         Args:
             embed_dim (int): The dimension of the embeddings.
-            image_embedding_size (Tuple[int, int]): The spatial size of the image embedding as (H, W).
-            input_image_size (Tuple[int, int]): The padded size of the input image as (H, W).
+            image_embedding_size (tuple[int, int]): The spatial size of the image embedding as (H, W).
+            input_image_size (tuple[int, int]): The padded size of the input image as (H, W).
             mask_in_chans (int): The number of hidden channels used for encoding input masks.
             activation (Type[nn.Module]): The activation function to use when encoding input masks.
@@ -310,7 +310,7 @@ class PromptEncoder(nn.Module):
         Embed different types of prompts, returning both sparse and dense embeddings.
         Args:
-            points (Tuple[torch.Tensor, torch.Tensor] | None): Point coordinates and labels to embed. The first
+            points (tuple[torch.Tensor, torch.Tensor] | None): Point coordinates and labels to embed. The first
                 tensor contains coordinates with shape (B, N, 2), and the second tensor contains labels with
                 shape (B, N).
             boxes (torch.Tensor | None): Boxes to embed with shape (B, M, 2, 2), where M is the number of boxes.
@@ -522,10 +522,10 @@ class FpnNeck(nn.Module):
     Attributes:
         position_encoding (PositionEmbeddingSine): Sinusoidal positional encoding module.
         convs (nn.ModuleList): List of convolutional layers for each backbone level.
-        backbone_channel_list (List[int]): List of channel dimensions from the backbone.
+        backbone_channel_list (list[int]): List of channel dimensions from the backbone.
         fpn_interp_model (str): Interpolation mode for FPN feature resizing.
         fuse_type (str): Type of feature fusion, either 'sum' or 'avg'.
-        fpn_top_down_levels (List[int]): Levels to have top-down features in outputs.
+        fpn_top_down_levels (list[int]): Levels to have top-down features in outputs.
     Methods:
         forward: Perform forward pass through the FPN neck.
@@ -558,13 +558,13 @@ class FpnNeck(nn.Module):
         Args:
             d_model (int): Dimension of the model.
-            backbone_channel_list (List[int]): List of channel dimensions from the backbone.
+            backbone_channel_list (list[int]): List of channel dimensions from the backbone.
             kernel_size (int): Kernel size for the convolutional layers.
             stride (int): Stride for the convolutional layers.
             padding (int): Padding for the convolutional layers.
             fpn_interp_model (str): Interpolation mode for FPN feature resizing.
             fuse_type (str): Type of feature fusion, either 'sum' or 'avg'.
-            fpn_top_down_levels (Optional[List[int]]): Levels to have top-down features in outputs.
+            fpn_top_down_levels (Optional[list[int]]): Levels to have top-down features in outputs.
         Examples:
             >>> backbone_channels = [64, 128, 256, 512]
@@ -610,12 +610,12 @@ class FpnNeck(nn.Module):
         and top-down feature fusion. It generates output feature maps and corresponding positional encodings.
         Args:
-            xs (List[torch.Tensor]): List of input tensors from the backbone, each with shape (B, C, H, W).
+            xs (list[torch.Tensor]): List of input tensors from the backbone, each with shape (B, C, H, W).
         Returns:
-            out (List[torch.Tensor]): List of output feature maps after FPN processing, each with shape
+            out (list[torch.Tensor]): List of output feature maps after FPN processing, each with shape
                 (B, d_model, H, W).
-            pos (List[torch.Tensor]): List of positional encodings corresponding to each output feature map.
+            pos (list[torch.Tensor]): List of positional encodings corresponding to each output feature map.
         Examples:
             >>> fpn_neck = FpnNeck(d_model=256, backbone_channel_list=[64, 128, 256, 512])
@@ -664,18 +664,18 @@ class Hiera(nn.Module):
     with optional pooling and global attention mechanisms.
     Attributes:
-        window_spec (Tuple[int, ...]): Window sizes for each stage.
-        q_stride (Tuple[int, int]): Downsampling stride between stages.
-        stage_ends (List[int]): Indices of the last block in each stage.
-        q_pool_blocks (List[int]): Indices of blocks where pooling is applied.
+        window_spec (tuple[int, ...]): Window sizes for each stage.
+        q_stride (tuple[int, int]): Downsampling stride between stages.
+        stage_ends (list[int]): Indices of the last block in each stage.
+        q_pool_blocks (list[int]): Indices of blocks where pooling is applied.
         return_interm_layers (bool): Whether to return intermediate layer outputs.
         patch_embed (PatchEmbed): Module for patch embedding.
-        global_att_blocks (Tuple[int, ...]): Indices of blocks with global attention.
-        window_pos_embed_bkg_spatial_size (Tuple[int, int]): Spatial size for window positional embedding background.
+        global_att_blocks (tuple[int, ...]): Indices of blocks with global attention.
+        window_pos_embed_bkg_spatial_size (tuple[int, int]): Spatial size for window positional embedding background.
         pos_embed (nn.Parameter): Positional embedding for the background.
         pos_embed_window (nn.Parameter): Positional embedding for the window.
         blocks (nn.ModuleList): List of MultiScaleBlock modules.
-        channel_list (List[int]): List of output channel dimensions for each stage.
+        channel_list (list[int]): List of output channel dimensions for each stage.
     Methods:
         _get_pos_embed: Generate positional embeddings by interpolating and combining window and background embeddings.
@@ -727,13 +727,13 @@ class Hiera(nn.Module):
             num_heads (int): Initial number of attention heads.
             drop_path_rate (float): Stochastic depth rate.
             q_pool (int): Number of query pooling stages.
-            q_stride (Tuple[int, int]): Downsampling stride between stages.
-            stages (Tuple[int, ...]): Number of blocks per stage.
+            q_stride (tuple[int, int]): Downsampling stride between stages.
+            stages (tuple[int, ...]): Number of blocks per stage.
             dim_mul (float): Dimension multiplier factor at stage transitions.
             head_mul (float): Head multiplier factor at stage transitions.
-            window_pos_embed_bkg_spatial_size (Tuple[int, int]): Spatial size for window positional embedding background.
-            window_spec (Tuple[int, ...]): Window sizes for each stage when not using global attention.
-            global_att_blocks (Tuple[int, ...]): Indices of blocks that use global attention.
+            window_pos_embed_bkg_spatial_size (tuple[int, int]): Spatial size for window positional embedding background.
+            window_spec (tuple[int, ...]): Window sizes for each stage when not using global attention.
+            global_att_blocks (tuple[int, ...]): Indices of blocks that use global attention.
             return_interm_layers (bool): Whether to return intermediate layer outputs.
         Examples:
@@ -823,7 +823,7 @@ class Hiera(nn.Module):
             x (torch.Tensor): Input tensor with shape (B, C, H, W) representing a batch of images.
         Returns:
-            (List[torch.Tensor]): List of feature maps at different scales, each with shape (B, C_i, H_i, W_i), where
+            (list[torch.Tensor]): List of feature maps at different scales, each with shape (B, C_i, H_i, W_i), where
                 C_i is the channel dimension and H_i, W_i are the spatial dimensions at scale i. The list is ordered
                 from highest resolution (fine features) to lowest resolution (coarse features) if return_interm_layers
                 is True, otherwise contains only the final output.

ultralytics/models/sam/modules/sam.py CHANGED Viewed

@@ -68,8 +68,8 @@ class SAMModel(nn.Module):
             image_encoder (ImageEncoderViT): The backbone used to encode the image into image embeddings.
             prompt_encoder (PromptEncoder): Encodes various types of input prompts.
             mask_decoder (MaskDecoder): Predicts masks from the image embeddings and encoded prompts.
-            pixel_mean (List[float]): Mean values for normalizing pixels in the input image.
-            pixel_std (List[float]): Standard deviation values for normalizing pixels in the input image.
+            pixel_mean (list[float]): Mean values for normalizing pixels in the input image.
+            pixel_std (list[float]): Standard deviation values for normalizing pixels in the input image.
         Examples:
             >>> image_encoder = ImageEncoderViT(...)
@@ -435,14 +435,14 @@ class SAM2Model(torch.nn.Module):
         Args:
             backbone_features (torch.Tensor): Image features with shape (B, C, H, W).
-            point_inputs (Dict[str, torch.Tensor] | None): Dictionary containing point prompts.
+            point_inputs (dict[str, torch.Tensor] | None): Dictionary containing point prompts.
                 'point_coords': Tensor of shape (B, P, 2) with float32 dtype, containing absolute
                     pixel-unit coordinates in (x, y) format for P input points.
                 'point_labels': Tensor of shape (B, P) with int32 dtype, where 1 means positive clicks,
                     0 means negative clicks, and -1 means padding.
             mask_inputs (torch.Tensor | None): Mask of shape (B, 1, H*16, W*16), float or bool, with the
                 same spatial size as the image.
-            high_res_features (List[torch.Tensor] | None): List of two feature maps with shapes
+            high_res_features (list[torch.Tensor] | None): List of two feature maps with shapes
                 (B, C, 4*H, 4*W) and (B, C, 2*H, 2*W) respectively, used as high-resolution feature maps
                 for SAM decoder.
             multimask_output (bool): If True, output 3 candidate masks and their IoU estimates; if False,

ultralytics/models/sam/modules/tiny_encoder.py CHANGED Viewed

@@ -81,7 +81,7 @@ class PatchEmbed(nn.Module):
     effectively downsampling the spatial dimensions while increasing the channel dimension.
     Attributes:
-        patches_resolution (Tuple[int, int]): Resolution of the patches after embedding.
+        patches_resolution (tuple[int, int]): Resolution of the patches after embedding.
         num_patches (int): Total number of patches.
         in_chans (int): Number of input channels.
         embed_dim (int): Dimension of the embedding.
@@ -203,7 +203,7 @@ class PatchMerging(nn.Module):
     resolution while potentially increasing channel dimensions.
     Attributes:
-        input_resolution (Tuple[int, int]): The input resolution (height, width) of the feature map.
+        input_resolution (tuple[int, int]): The input resolution (height, width) of the feature map.
         dim (int): The input dimension of the feature map.
         out_dim (int): The output dimension after merging and projection.
         act (nn.Module): The activation function used between convolutions.
@@ -225,7 +225,7 @@ class PatchMerging(nn.Module):
         Initialize the PatchMerging module for merging and projecting neighboring patches in feature maps.
         Args:
-            input_resolution (Tuple[int, int]): The input resolution (height, width) of the feature map.
+            input_resolution (tuple[int, int]): The input resolution (height, width) of the feature map.
             dim (int): The input dimension of the feature map.
             out_dim (int): The output dimension after merging and projection.
             activation (nn.Module): The activation function used between convolutions.
@@ -267,7 +267,7 @@ class ConvLayer(nn.Module):
     Attributes:
         dim (int): Dimensionality of the input and output.
-        input_resolution (Tuple[int, int]): Resolution of the input image.
+        input_resolution (tuple[int, int]): Resolution of the input image.
         depth (int): Number of MBConv layers in the block.
         use_checkpoint (bool): Whether to use gradient checkpointing to save memory.
         blocks (nn.ModuleList): List of MBConv layers.
@@ -301,10 +301,10 @@ class ConvLayer(nn.Module):
         Args:
             dim (int): The dimensionality of the input and output.
-            input_resolution (Tuple[int, int]): The resolution of the input image.
+            input_resolution (tuple[int, int]): The resolution of the input image.
             depth (int): The number of MBConv layers in the block.
             activation (nn.Module): Activation function applied after each convolution.
-            drop_path (float | List[float], optional): Drop path rate. Single float or a list of floats for each MBConv.
+            drop_path (float | list[float], optional): Drop path rate. Single float or a list of floats for each MBConv.
             downsample (Optional[nn.Module], optional): Function for downsampling the output. None to skip downsampling.
             use_checkpoint (bool, optional): Whether to use gradient checkpointing to save memory.
             out_dim (Optional[int], optional): The dimensionality of the output. None means it will be the same as `dim`.
@@ -456,7 +456,7 @@ class Attention(torch.nn.Module):
             key_dim (int): The dimensionality of the keys and queries.
             num_heads (int, optional): Number of attention heads.
             attn_ratio (float, optional): Attention ratio, affecting the dimensions of the value vectors.
-            resolution (Tuple[int, int], optional): Spatial resolution of the input feature map.
+            resolution (tuple[int, int], optional): Spatial resolution of the input feature map.
         """
         super().__init__()
@@ -530,7 +530,7 @@ class TinyViTBlock(nn.Module):
     Attributes:
         dim (int): The dimensionality of the input and output.
-        input_resolution (Tuple[int, int]): Spatial resolution of the input feature map.
+        input_resolution (tuple[int, int]): Spatial resolution of the input feature map.
         num_heads (int): Number of attention heads.
         window_size (int): Size of the attention window.
         mlp_ratio (float): Ratio of MLP hidden dimension to embedding dimension.
@@ -567,7 +567,7 @@ class TinyViTBlock(nn.Module):
         Args:
             dim (int): Dimensionality of the input and output features.
-            input_resolution (Tuple[int, int]): Spatial resolution of the input feature map (height, width).
+            input_resolution (tuple[int, int]): Spatial resolution of the input feature map (height, width).
             num_heads (int): Number of attention heads.
             window_size (int, optional): Size of the attention window. Must be greater than 0.
             mlp_ratio (float, optional): Ratio of MLP hidden dimension to embedding dimension.
@@ -674,7 +674,7 @@ class BasicLayer(nn.Module):
     Attributes:
         dim (int): The dimensionality of the input and output features.
-        input_resolution (Tuple[int, int]): Spatial resolution of the input feature map.
+        input_resolution (tuple[int, int]): Spatial resolution of the input feature map.
         depth (int): Number of TinyViT blocks in this layer.
         use_checkpoint (bool): Whether to use gradient checkpointing to save memory.
         blocks (nn.ModuleList): List of TinyViT blocks that make up this layer.
@@ -712,13 +712,13 @@ class BasicLayer(nn.Module):
         Args:
             dim (int): Dimensionality of the input and output features.
-            input_resolution (Tuple[int, int]): Spatial resolution of the input feature map (height, width).
+            input_resolution (tuple[int, int]): Spatial resolution of the input feature map (height, width).
             depth (int): Number of TinyViT blocks in this layer.
             num_heads (int): Number of attention heads in each TinyViT block.
             window_size (int): Size of the local window for attention computation.
             mlp_ratio (float, optional): Ratio of MLP hidden dimension to embedding dimension.
             drop (float, optional): Dropout rate.
-            drop_path (float | List[float], optional): Stochastic depth rate. Can be a float or a list of floats for each block.
+            drop_path (float | list[float], optional): Stochastic depth rate. Can be a float or a list of floats for each block.
             downsample (nn.Module | None, optional): Downsampling layer at the end of the layer. None to skip downsampling.
             use_checkpoint (bool, optional): Whether to use gradient checkpointing to save memory.
             local_conv_size (int, optional): Kernel size for the local convolution in each TinyViT block.
@@ -778,11 +778,11 @@ class TinyViT(nn.Module):
     Attributes:
         img_size (int): Input image size.
         num_classes (int): Number of classification classes.
-        depths (Tuple[int, int, int, int]): Number of blocks in each stage.
+        depths (tuple[int, int, int, int]): Number of blocks in each stage.
         num_layers (int): Total number of layers in the network.
         mlp_ratio (float): Ratio of MLP hidden dimension to embedding dimension.
         patch_embed (PatchEmbed): Module for patch embedding.
-        patches_resolution (Tuple[int, int]): Resolution of embedded patches.
+        patches_resolution (tuple[int, int]): Resolution of embedded patches.
         layers (nn.ModuleList): List of network layers.
         norm_head (nn.LayerNorm): Layer normalization for the classifier head.
         head (nn.Linear): Linear layer for final classification.
@@ -823,10 +823,10 @@ class TinyViT(nn.Module):
             img_size (int, optional): Size of the input image.
             in_chans (int, optional): Number of input channels.
             num_classes (int, optional): Number of classes for classification.
-            embed_dims (Tuple[int, int, int, int], optional): Embedding dimensions for each stage.
-            depths (Tuple[int, int, int, int], optional): Number of blocks in each stage.
-            num_heads (Tuple[int, int, int, int], optional): Number of attention heads in each stage.
-            window_sizes (Tuple[int, int, int, int], optional): Window sizes for each stage.
+            embed_dims (tuple[int, int, int, int], optional): Embedding dimensions for each stage.
+            depths (tuple[int, int, int, int], optional): Number of blocks in each stage.
+            num_heads (tuple[int, int, int, int], optional): Number of attention heads in each stage.
+            window_sizes (tuple[int, int, int, int], optional): Window sizes for each stage.
             mlp_ratio (float, optional): Ratio of MLP hidden dim to embedding dim.
             drop_rate (float, optional): Dropout rate.
             drop_path_rate (float, optional): Stochastic depth rate.

ultralytics/models/sam/modules/utils.py CHANGED Viewed

@@ -14,12 +14,12 @@ def select_closest_cond_frames(frame_idx: int, cond_frame_outputs: dict[int, Any
     Args:
         frame_idx (int): Current frame index.
-        cond_frame_outputs (Dict[int, Any]): Dictionary of conditioning frame outputs keyed by frame indices.
+        cond_frame_outputs (dict[int, Any]): Dictionary of conditioning frame outputs keyed by frame indices.
         max_cond_frame_num (int): Maximum number of conditioning frames to select.
     Returns:
-        selected_outputs (Dict[int, Any]): Selected items from cond_frame_outputs.
-        unselected_outputs (Dict[int, Any]): Items not selected from cond_frame_outputs.
+        selected_outputs (dict[int, Any]): Selected items from cond_frame_outputs.
+        unselected_outputs (dict[int, Any]): Items not selected from cond_frame_outputs.
     Examples:
         >>> frame_idx = 5
@@ -229,7 +229,7 @@ def window_partition(x: torch.Tensor, window_size: int):
     Returns:
         windows (torch.Tensor): Partitioned windows with shape (B * num_windows, window_size, window_size, C).
-        padded_h_w (Tuple[int, int]): Padded height and width before partition.
+        padded_h_w (tuple[int, int]): Padded height and width before partition.
     Examples:
         >>> x = torch.randn(1, 16, 16, 3)
@@ -262,8 +262,8 @@ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: tuple[in
             window_size, C), where B is the batch size, num_windows is the number of windows, window_size is
             the size of each window, and C is the number of channels.
         window_size (int): Size of each window.
-        pad_hw (Tuple[int, int]): Padded height and width (Hp, Wp) of the input before windowing.
-        hw (Tuple[int, int]): Original height and width (H, W) of the input before padding and windowing.
+        pad_hw (tuple[int, int]): Padded height and width (Hp, Wp) of the input before windowing.
+        hw (tuple[int, int]): Original height and width (H, W) of the input before padding and windowing.
     Returns:
         (torch.Tensor): Unpartitioned sequences with shape (B, H, W, C), where B is the batch size, H and W
@@ -350,8 +350,8 @@ def add_decomposed_rel_pos(
         q (torch.Tensor): Query tensor in the attention layer with shape (B, q_h * q_w, C).
         rel_pos_h (torch.Tensor): Relative position embeddings for height axis with shape (Lh, C).
         rel_pos_w (torch.Tensor): Relative position embeddings for width axis with shape (Lw, C).
-        q_size (Tuple[int, int]): Spatial sequence size of query q as (q_h, q_w).
-        k_size (Tuple[int, int]): Spatial sequence size of key k as (k_h, k_w).
+        q_size (tuple[int, int]): Spatial sequence size of query q as (q_h, q_w).
+        k_size (tuple[int, int]): Spatial sequence size of key k as (k_h, k_w).
     Returns:
         (torch.Tensor): Updated attention map with added relative positional embeddings, shape

dgenerate-ultralytics-headless 8.3.193__py3-none-any.whl → 8.3.195__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.193py3-none-any.whl → 8.3.195py3-none-any.whl