PyPI - dgenerate-ultralytics-headless - Versions diffs - 8.3.190__py3-none-any.whl → 8.3.192__py3-none-any.whl - Mend

dgenerate-ultralytics-headless 8.3.190py3-none-any.whl → 8.3.192py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

{dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/METADATA +1 -1
{dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/RECORD +103 -102
tests/test_cuda.py +6 -5
tests/test_exports.py +1 -6
tests/test_python.py +1 -4
tests/test_solutions.py +1 -1
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -14
ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
ultralytics/cfg/datasets/VisDrone.yaml +4 -4
ultralytics/data/annotator.py +6 -6
ultralytics/data/augment.py +53 -51
ultralytics/data/base.py +15 -13
ultralytics/data/build.py +7 -4
ultralytics/data/converter.py +9 -10
ultralytics/data/dataset.py +24 -22
ultralytics/data/loaders.py +13 -11
ultralytics/data/split.py +4 -3
ultralytics/data/split_dota.py +14 -12
ultralytics/data/utils.py +29 -23
ultralytics/engine/exporter.py +2 -2
ultralytics/engine/model.py +16 -14
ultralytics/engine/predictor.py +8 -6
ultralytics/engine/results.py +54 -52
ultralytics/engine/trainer.py +8 -3
ultralytics/engine/tuner.py +230 -42
ultralytics/hub/google/__init__.py +7 -6
ultralytics/hub/session.py +8 -6
ultralytics/hub/utils.py +3 -4
ultralytics/models/fastsam/model.py +8 -6
ultralytics/models/nas/model.py +5 -3
ultralytics/models/rtdetr/train.py +4 -3
ultralytics/models/rtdetr/val.py +6 -4
ultralytics/models/sam/amg.py +13 -10
ultralytics/models/sam/model.py +3 -2
ultralytics/models/sam/modules/blocks.py +21 -21
ultralytics/models/sam/modules/decoders.py +11 -11
ultralytics/models/sam/modules/encoders.py +25 -25
ultralytics/models/sam/modules/memory_attention.py +9 -8
ultralytics/models/sam/modules/sam.py +8 -10
ultralytics/models/sam/modules/tiny_encoder.py +21 -20
ultralytics/models/sam/modules/transformer.py +6 -5
ultralytics/models/sam/modules/utils.py +7 -5
ultralytics/models/sam/predict.py +32 -31
ultralytics/models/utils/loss.py +29 -27
ultralytics/models/utils/ops.py +10 -8
ultralytics/models/yolo/classify/train.py +9 -7
ultralytics/models/yolo/classify/val.py +11 -9
ultralytics/models/yolo/detect/predict.py +1 -1
ultralytics/models/yolo/detect/train.py +8 -6
ultralytics/models/yolo/detect/val.py +22 -20
ultralytics/models/yolo/model.py +14 -14
ultralytics/models/yolo/obb/train.py +5 -3
ultralytics/models/yolo/obb/val.py +11 -9
ultralytics/models/yolo/pose/train.py +7 -5
ultralytics/models/yolo/pose/val.py +12 -10
ultralytics/models/yolo/segment/train.py +4 -5
ultralytics/models/yolo/segment/val.py +13 -11
ultralytics/models/yolo/world/train.py +10 -8
ultralytics/models/yolo/yoloe/train.py +10 -10
ultralytics/models/yolo/yoloe/val.py +11 -9
ultralytics/nn/autobackend.py +17 -19
ultralytics/nn/modules/block.py +12 -12
ultralytics/nn/modules/conv.py +4 -3
ultralytics/nn/modules/head.py +41 -37
ultralytics/nn/modules/transformer.py +22 -21
ultralytics/nn/tasks.py +2 -2
ultralytics/nn/text_model.py +6 -5
ultralytics/solutions/analytics.py +7 -5
ultralytics/solutions/config.py +12 -10
ultralytics/solutions/distance_calculation.py +3 -3
ultralytics/solutions/heatmap.py +4 -2
ultralytics/solutions/object_counter.py +5 -3
ultralytics/solutions/parking_management.py +4 -2
ultralytics/solutions/region_counter.py +7 -5
ultralytics/solutions/similarity_search.py +5 -3
ultralytics/solutions/solutions.py +38 -36
ultralytics/solutions/streamlit_inference.py +8 -7
ultralytics/trackers/bot_sort.py +11 -9
ultralytics/trackers/byte_tracker.py +17 -15
ultralytics/trackers/utils/gmc.py +4 -3
ultralytics/utils/__init__.py +16 -88
ultralytics/utils/autobatch.py +3 -2
ultralytics/utils/autodevice.py +10 -10
ultralytics/utils/benchmarks.py +11 -10
ultralytics/utils/callbacks/comet.py +9 -9
ultralytics/utils/checks.py +17 -26
ultralytics/utils/export.py +12 -11
ultralytics/utils/files.py +8 -7
ultralytics/utils/git.py +139 -0
ultralytics/utils/instance.py +8 -7
ultralytics/utils/loss.py +15 -13
ultralytics/utils/metrics.py +62 -62
ultralytics/utils/ops.py +3 -2
ultralytics/utils/patches.py +6 -4
ultralytics/utils/plotting.py +20 -18
ultralytics/utils/torch_utils.py +4 -2
ultralytics/utils/tqdm.py +18 -14
ultralytics/utils/triton.py +3 -2
{dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/WHEEL +0 -0
{dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/entry_points.txt +0 -0
{dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/licenses/LICENSE +0 -0
{dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/top_level.txt +0 -0

ultralytics/models/sam/amg.py CHANGED Viewed

@@ -1,15 +1,18 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 import math
+from collections.abc import Generator
 from itertools import product
-from typing import Any, Generator, List, Tuple
+from typing import Any
 import numpy as np
 import torch
 def is_box_near_crop_edge(
-    boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], atol: float = 20.0
+    boxes: torch.Tensor, crop_box: list[int], orig_box: list[int], atol: float = 20.0
 ) -> torch.Tensor:
     """
     Determine if bounding boxes are near the edge of a cropped image region using a specified tolerance.
@@ -38,7 +41,7 @@ def is_box_near_crop_edge(
     return torch.any(near_crop_edge, dim=1)
-def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
+def batch_iterator(batch_size: int, *args) -> Generator[list[Any]]:
     """
     Yield batches of data from input arguments with specified batch size for efficient processing.
@@ -106,14 +109,14 @@ def build_point_grid(n_per_side: int) -> np.ndarray:
     return np.stack([points_x, points_y], axis=-1).reshape(-1, 2)
-def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> List[np.ndarray]:
+def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> list[np.ndarray]:
     """Generate point grids for multiple crop layers with varying scales and densities."""
     return [build_point_grid(int(n_per_side / (scale_per_layer**i))) for i in range(n_layers + 1)]
 def generate_crop_boxes(
-    im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
-) -> Tuple[List[List[int]], List[int]]:
+    im_size: tuple[int, ...], n_layers: int, overlap_ratio: float
+) -> tuple[list[list[int]], list[int]]:
     """
     Generate crop boxes of varying sizes for multiscale image processing, with layered overlapping regions.
@@ -163,7 +166,7 @@ def generate_crop_boxes(
     return crop_boxes, layer_idxs
-def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
+def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: list[int]) -> torch.Tensor:
     """Uncrop bounding boxes by adding the crop box offset to their coordinates."""
     x0, y0, _, _ = crop_box
     offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device)
@@ -173,7 +176,7 @@ def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
     return boxes + offset
-def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
+def uncrop_points(points: torch.Tensor, crop_box: list[int]) -> torch.Tensor:
     """Uncrop points by adding the crop box offset to their coordinates."""
     x0, y0, _, _ = crop_box
     offset = torch.tensor([[x0, y0]], device=points.device)
@@ -183,7 +186,7 @@ def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
     return points + offset
-def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int) -> torch.Tensor:
+def uncrop_masks(masks: torch.Tensor, crop_box: list[int], orig_h: int, orig_w: int) -> torch.Tensor:
     """Uncrop masks by padding them to the original image size, handling coordinate transformations."""
     x0, y0, x1, y1 = crop_box
     if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h:
@@ -194,7 +197,7 @@ def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w:
     return torch.nn.functional.pad(masks, pad, value=0)
-def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tuple[np.ndarray, bool]:
+def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> tuple[np.ndarray, bool]:
     """
     Remove small disconnected regions or holes in a mask based on area threshold and mode.

ultralytics/models/sam/model.py CHANGED Viewed

@@ -14,8 +14,9 @@ Key Features:
     - Trained on SA-1B dataset
 """
+from __future__ import annotations
 from pathlib import Path
-from typing import Dict, Type
 from ultralytics.engine.model import Model
 from ultralytics.utils.torch_utils import model_info
@@ -154,7 +155,7 @@ class SAM(Model):
         return model_info(self.model, detailed=detailed, verbose=verbose)
     @property
-    def task_map(self) -> Dict[str, Dict[str, Type[Predictor]]]:
+    def task_map(self) -> dict[str, dict[str, type[Predictor]]]:
         """
         Provide a mapping from the 'segment' task to its corresponding 'Predictor'.

ultralytics/models/sam/modules/blocks.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 import copy
 import math
 from functools import partial
-from typing import Optional, Tuple, Type, Union
 import numpy as np
 import torch
@@ -81,7 +81,7 @@ class MaskDownSampler(nn.Module):
         stride: int = 4,
         padding: int = 0,
         total_stride: int = 16,
-        activation: Type[nn.Module] = nn.GELU,
+        activation: type[nn.Module] = nn.GELU,
     ):
         """Initialize a mask downsampler module for progressive downsampling and channel expansion."""
         super().__init__()
@@ -227,7 +227,7 @@ class Fuser(nn.Module):
         torch.Size([1, 256, 32, 32])
     """
-    def __init__(self, layer: nn.Module, num_layers: int, dim: Optional[int] = None, input_projection: bool = False):
+    def __init__(self, layer: nn.Module, num_layers: int, dim: int | None = None, input_projection: bool = False):
         """
         Initialize the Fuser module for feature fusion through multiple layers.
@@ -295,7 +295,7 @@ class SAM2TwoWayAttentionBlock(TwoWayAttentionBlock):
         embedding_dim: int,
         num_heads: int,
         mlp_dim: int = 2048,
-        activation: Type[nn.Module] = nn.ReLU,
+        activation: type[nn.Module] = nn.ReLU,
         attention_downsample_rate: int = 2,
         skip_first_layer_pe: bool = False,
     ) -> None:
@@ -359,7 +359,7 @@ class SAM2TwoWayTransformer(TwoWayTransformer):
         embedding_dim: int,
         num_heads: int,
         mlp_dim: int,
-        activation: Type[nn.Module] = nn.ReLU,
+        activation: type[nn.Module] = nn.ReLU,
         attention_downsample_rate: int = 2,
     ) -> None:
         """
@@ -432,7 +432,7 @@ class RoPEAttention(Attention):
         *args,
         rope_theta: float = 10000.0,
         rope_k_repeat: bool = False,
-        feat_sizes: Tuple[int, int] = (32, 32),  # [w, h] for stride 16 feats at 512 resolution
+        feat_sizes: tuple[int, int] = (32, 32),  # [w, h] for stride 16 feats at 512 resolution
         **kwargs,
     ):
         """Initialize RoPEAttention with rotary position encoding for enhanced positional awareness."""
@@ -618,9 +618,9 @@ class MultiScaleBlock(nn.Module):
         num_heads: int,
         mlp_ratio: float = 4.0,
         drop_path: float = 0.0,
-        norm_layer: Union[nn.Module, str] = "LayerNorm",
-        q_stride: Tuple[int, int] = None,
-        act_layer: Type[nn.Module] = nn.GELU,
+        norm_layer: nn.Module | str = "LayerNorm",
+        q_stride: tuple[int, int] = None,
+        act_layer: type[nn.Module] = nn.GELU,
         window_size: int = 0,
     ):
         """Initialize a multiscale attention block with window partitioning and optional query pooling."""
@@ -728,7 +728,7 @@ class PositionEmbeddingSine(nn.Module):
         num_pos_feats: int,
         temperature: int = 10000,
         normalize: bool = True,
-        scale: Optional[float] = None,
+        scale: float | None = None,
     ):
         """Initialize sinusoidal position embeddings for 2D image inputs."""
         super().__init__()
@@ -744,7 +744,7 @@ class PositionEmbeddingSine(nn.Module):
         self.cache = {}
-    def _encode_xy(self, x: torch.Tensor, y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+    def _encode_xy(self, x: torch.Tensor, y: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         """Encode 2D positions using sine/cosine functions for transformer positional embeddings."""
         assert len(x) == len(y) and x.ndim == y.ndim == 1
         x_embed = x * self.scale
@@ -833,7 +833,7 @@ class PositionEmbeddingRandom(nn.Module):
         torch.Size([128, 32, 32])
     """
-    def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
+    def __init__(self, num_pos_feats: int = 64, scale: float | None = None) -> None:
         """Initialize random spatial frequency position embedding for transformers."""
         super().__init__()
         if scale is None or scale <= 0.0:
@@ -853,7 +853,7 @@ class PositionEmbeddingRandom(nn.Module):
         # Outputs d_1 x ... x d_n x C shape
         return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
-    def forward(self, size: Tuple[int, int]) -> torch.Tensor:
+    def forward(self, size: tuple[int, int]) -> torch.Tensor:
         """Generate positional encoding for a grid using random spatial frequencies."""
         h, w = size
         grid = torch.ones(
@@ -869,7 +869,7 @@ class PositionEmbeddingRandom(nn.Module):
         pe = self._pe_encoding(torch.stack([x_embed, y_embed], dim=-1))
         return pe.permute(2, 0, 1)  # C x H x W
-    def forward_with_coords(self, coords_input: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor:
+    def forward_with_coords(self, coords_input: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
         """Positionally encode input coordinates, normalizing them to [0,1] based on the given image size."""
         coords = coords_input.clone()
         coords[:, :, 0] = coords[:, :, 0] / image_size[1]
@@ -910,12 +910,12 @@ class Block(nn.Module):
         num_heads: int,
         mlp_ratio: float = 4.0,
         qkv_bias: bool = True,
-        norm_layer: Type[nn.Module] = nn.LayerNorm,
-        act_layer: Type[nn.Module] = nn.GELU,
+        norm_layer: type[nn.Module] = nn.LayerNorm,
+        act_layer: type[nn.Module] = nn.GELU,
         use_rel_pos: bool = False,
         rel_pos_zero_init: bool = True,
         window_size: int = 0,
-        input_size: Optional[Tuple[int, int]] = None,
+        input_size: tuple[int, int] | None = None,
     ) -> None:
         """
         Initialize a transformer block with optional window attention and relative positional embeddings.
@@ -1012,7 +1012,7 @@ class REAttention(nn.Module):
         qkv_bias: bool = True,
         use_rel_pos: bool = False,
         rel_pos_zero_init: bool = True,
-        input_size: Optional[Tuple[int, int]] = None,
+        input_size: tuple[int, int] | None = None,
     ) -> None:
         """
         Initialize a Relative Position Attention module for transformer-based architectures.
@@ -1093,9 +1093,9 @@ class PatchEmbed(nn.Module):
     def __init__(
         self,
-        kernel_size: Tuple[int, int] = (16, 16),
-        stride: Tuple[int, int] = (16, 16),
-        padding: Tuple[int, int] = (0, 0),
+        kernel_size: tuple[int, int] = (16, 16),
+        stride: tuple[int, int] = (16, 16),
+        padding: tuple[int, int] = (0, 0),
         in_chans: int = 3,
         embed_dim: int = 768,
     ) -> None:

ultralytics/models/sam/modules/decoders.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-from typing import List, Optional, Tuple, Type
+from __future__ import annotations
 import torch
 from torch import nn
@@ -43,7 +43,7 @@ class MaskDecoder(nn.Module):
         transformer_dim: int,
         transformer: nn.Module,
         num_multimask_outputs: int = 3,
-        activation: Type[nn.Module] = nn.GELU,
+        activation: type[nn.Module] = nn.GELU,
         iou_head_depth: int = 3,
         iou_head_hidden_dim: int = 256,
     ) -> None:
@@ -93,7 +93,7 @@ class MaskDecoder(nn.Module):
         sparse_prompt_embeddings: torch.Tensor,
         dense_prompt_embeddings: torch.Tensor,
         multimask_output: bool,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Predict masks given image and prompt embeddings.
@@ -137,7 +137,7 @@ class MaskDecoder(nn.Module):
         image_pe: torch.Tensor,
         sparse_prompt_embeddings: torch.Tensor,
         dense_prompt_embeddings: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         """Predict masks and quality scores using image and prompt embeddings via transformer architecture."""
         # Concatenate output tokens
         output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
@@ -158,7 +158,7 @@ class MaskDecoder(nn.Module):
         # Upscale mask embeddings and predict masks using the mask tokens
         src = src.transpose(1, 2).view(b, c, h, w)
         upscaled_embedding = self.output_upscaling(src)
-        hyper_in_list: List[torch.Tensor] = [
+        hyper_in_list: list[torch.Tensor] = [
             self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]) for i in range(self.num_mask_tokens)
         ]
         hyper_in = torch.stack(hyper_in_list, dim=1)
@@ -221,7 +221,7 @@ class SAM2MaskDecoder(nn.Module):
         transformer_dim: int,
         transformer: nn.Module,
         num_multimask_outputs: int = 3,
-        activation: Type[nn.Module] = nn.GELU,
+        activation: type[nn.Module] = nn.GELU,
         iou_head_depth: int = 3,
         iou_head_hidden_dim: int = 256,
         use_high_res_features: bool = False,
@@ -317,8 +317,8 @@ class SAM2MaskDecoder(nn.Module):
         dense_prompt_embeddings: torch.Tensor,
         multimask_output: bool,
         repeat_image: bool,
-        high_res_features: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        high_res_features: list[torch.Tensor] | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         """
         Predict masks given image and prompt embeddings.
@@ -385,8 +385,8 @@ class SAM2MaskDecoder(nn.Module):
         sparse_prompt_embeddings: torch.Tensor,
         dense_prompt_embeddings: torch.Tensor,
         repeat_image: bool,
-        high_res_features: Optional[List[torch.Tensor]] = None,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        high_res_features: list[torch.Tensor] | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         """Predict instance segmentation masks from image and prompt embeddings using a transformer."""
         # Concatenate output tokens
         s = 0
@@ -431,7 +431,7 @@ class SAM2MaskDecoder(nn.Module):
             upscaled_embedding = act1(ln1(dc1(src) + feat_s1))
             upscaled_embedding = act2(dc2(upscaled_embedding) + feat_s0)
-        hyper_in_list: List[torch.Tensor] = [
+        hyper_in_list: list[torch.Tensor] = [
             self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]) for i in range(self.num_mask_tokens)
         ]
         hyper_in = torch.stack(hyper_in_list, dim=1)

ultralytics/models/sam/modules/encoders.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-from typing import List, Optional, Tuple, Type
+from __future__ import annotations
 import torch
 import torch.nn as nn
@@ -56,13 +56,13 @@ class ImageEncoderViT(nn.Module):
         mlp_ratio: float = 4.0,
         out_chans: int = 256,
         qkv_bias: bool = True,
-        norm_layer: Type[nn.Module] = nn.LayerNorm,
-        act_layer: Type[nn.Module] = nn.GELU,
+        norm_layer: type[nn.Module] = nn.LayerNorm,
+        act_layer: type[nn.Module] = nn.GELU,
         use_abs_pos: bool = True,
         use_rel_pos: bool = False,
         rel_pos_zero_init: bool = True,
         window_size: int = 0,
-        global_attn_indexes: Tuple[int, ...] = (),
+        global_attn_indexes: tuple[int, ...] = (),
     ) -> None:
         """
         Initialize an ImageEncoderViT instance for encoding images using Vision Transformer architecture.
@@ -101,7 +101,7 @@ class ImageEncoderViT(nn.Module):
             embed_dim=embed_dim,
         )
-        self.pos_embed: Optional[nn.Parameter] = None
+        self.pos_embed: nn.Parameter | None = None
         if use_abs_pos:
             # Initialize absolute positional embedding with pretrain image size
             self.pos_embed = nn.Parameter(torch.zeros(1, img_size // patch_size, img_size // patch_size, embed_dim))
@@ -188,10 +188,10 @@ class PromptEncoder(nn.Module):
     def __init__(
         self,
         embed_dim: int,
-        image_embedding_size: Tuple[int, int],
-        input_image_size: Tuple[int, int],
+        image_embedding_size: tuple[int, int],
+        input_image_size: tuple[int, int],
         mask_in_chans: int,
-        activation: Type[nn.Module] = nn.GELU,
+        activation: type[nn.Module] = nn.GELU,
     ) -> None:
         """
         Initialize the PromptEncoder module for encoding various types of prompts.
@@ -286,9 +286,9 @@ class PromptEncoder(nn.Module):
     @staticmethod
     def _get_batch_size(
-        points: Optional[Tuple[torch.Tensor, torch.Tensor]],
-        boxes: Optional[torch.Tensor],
-        masks: Optional[torch.Tensor],
+        points: tuple[torch.Tensor, torch.Tensor] | None,
+        boxes: torch.Tensor | None,
+        masks: torch.Tensor | None,
     ) -> int:
         """Get the batch size of the output given the batch size of the input prompts."""
         if points is not None:
@@ -302,10 +302,10 @@ class PromptEncoder(nn.Module):
     def forward(
         self,
-        points: Optional[Tuple[torch.Tensor, torch.Tensor]],
-        boxes: Optional[torch.Tensor],
-        masks: Optional[torch.Tensor],
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        points: tuple[torch.Tensor, torch.Tensor] | None,
+        boxes: torch.Tensor | None,
+        masks: torch.Tensor | None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         """
         Embed different types of prompts, returning both sparse and dense embeddings.
@@ -542,13 +542,13 @@ class FpnNeck(nn.Module):
     def __init__(
         self,
         d_model: int,
-        backbone_channel_list: List[int],
+        backbone_channel_list: list[int],
         kernel_size: int = 1,
         stride: int = 1,
         padding: int = 0,
         fpn_interp_model: str = "bilinear",
         fuse_type: str = "sum",
-        fpn_top_down_levels: Optional[List[int]] = None,
+        fpn_top_down_levels: list[int] | None = None,
     ):
         """
         Initialize a modified Feature Pyramid Network (FPN) neck.
@@ -602,7 +602,7 @@ class FpnNeck(nn.Module):
             fpn_top_down_levels = range(len(self.convs))
         self.fpn_top_down_levels = list(fpn_top_down_levels)
-    def forward(self, xs: List[torch.Tensor]):
+    def forward(self, xs: list[torch.Tensor]):
         """
         Perform forward pass through the Feature Pyramid Network (FPN) neck.
@@ -695,20 +695,20 @@ class Hiera(nn.Module):
         num_heads: int = 1,  # initial number of heads
         drop_path_rate: float = 0.0,  # stochastic depth
         q_pool: int = 3,  # number of q_pool stages
-        q_stride: Tuple[int, int] = (2, 2),  # downsample stride bet. stages
-        stages: Tuple[int, ...] = (2, 3, 16, 3),  # blocks per stage
+        q_stride: tuple[int, int] = (2, 2),  # downsample stride bet. stages
+        stages: tuple[int, ...] = (2, 3, 16, 3),  # blocks per stage
         dim_mul: float = 2.0,  # dim_mul factor at stage shift
         head_mul: float = 2.0,  # head_mul factor at stage shift
-        window_pos_embed_bkg_spatial_size: Tuple[int, int] = (14, 14),
+        window_pos_embed_bkg_spatial_size: tuple[int, int] = (14, 14),
         # window size per stage, when not using global att.
-        window_spec: Tuple[int, ...] = (
+        window_spec: tuple[int, ...] = (
             8,
             4,
             14,
             7,
         ),
         # global attn in these blocks
-        global_att_blocks: Tuple[int, ...] = (
+        global_att_blocks: tuple[int, ...] = (
             12,
             16,
             20,
@@ -806,7 +806,7 @@ class Hiera(nn.Module):
             else [self.blocks[-1].dim_out]
         )
-    def _get_pos_embed(self, hw: Tuple[int, int]) -> torch.Tensor:
+    def _get_pos_embed(self, hw: tuple[int, int]) -> torch.Tensor:
         """Generate positional embeddings by interpolating and combining window and background embeddings."""
         h, w = hw
         window_embed = self.pos_embed_window
@@ -815,7 +815,7 @@ class Hiera(nn.Module):
         pos_embed = pos_embed.permute(0, 2, 3, 1)
         return pos_embed
-    def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
+    def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
         """
         Perform forward pass through Hiera model, extracting multiscale features from input images.

ultralytics/models/sam/modules/memory_attention.py CHANGED Viewed

@@ -1,7 +1,8 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+from __future__ import annotations
 import copy
-from typing import Optional
 import torch
 from torch import nn
@@ -103,7 +104,7 @@ class MemoryAttentionLayer(nn.Module):
         self.pos_enc_at_cross_attn_queries = pos_enc_at_cross_attn_queries
         self.pos_enc_at_cross_attn_keys = pos_enc_at_cross_attn_keys
-    def _forward_sa(self, tgt: torch.Tensor, query_pos: Optional[torch.Tensor]) -> torch.Tensor:
+    def _forward_sa(self, tgt: torch.Tensor, query_pos: torch.Tensor | None) -> torch.Tensor:
         """Perform self-attention on input tensor using positional encoding and RoPE attention mechanism."""
         tgt2 = self.norm1(tgt)
         q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
@@ -115,8 +116,8 @@ class MemoryAttentionLayer(nn.Module):
         self,
         tgt: torch.Tensor,
         memory: torch.Tensor,
-        query_pos: Optional[torch.Tensor],
-        pos: Optional[torch.Tensor],
+        query_pos: torch.Tensor | None,
+        pos: torch.Tensor | None,
         num_k_exclude_rope: int = 0,
     ) -> torch.Tensor:
         """Perform cross-attention between target and memory tensors using RoPEAttention mechanism."""
@@ -140,8 +141,8 @@ class MemoryAttentionLayer(nn.Module):
         self,
         tgt: torch.Tensor,
         memory: torch.Tensor,
-        pos: Optional[torch.Tensor] = None,
-        query_pos: Optional[torch.Tensor] = None,
+        pos: torch.Tensor | None = None,
+        query_pos: torch.Tensor | None = None,
         num_k_exclude_rope: int = 0,
     ) -> torch.Tensor:
         """
@@ -242,8 +243,8 @@ class MemoryAttention(nn.Module):
         self,
         curr: torch.Tensor,  # self-attention inputs
         memory: torch.Tensor,  # cross-attention inputs
-        curr_pos: Optional[torch.Tensor] = None,  # pos_enc for self-attention inputs
-        memory_pos: Optional[torch.Tensor] = None,  # pos_enc for cross-attention inputs
+        curr_pos: torch.Tensor | None = None,  # pos_enc for self-attention inputs
+        memory_pos: torch.Tensor | None = None,  # pos_enc for cross-attention inputs
         num_obj_ptr_tokens: int = 0,  # number of object pointer *tokens*
     ) -> torch.Tensor:
         """

ultralytics/models/sam/modules/sam.py CHANGED Viewed

@@ -3,10 +3,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from typing import List
+from __future__ import annotations
 import torch
 import torch.nn.functional as F
@@ -61,8 +58,8 @@ class SAMModel(nn.Module):
         image_encoder: ImageEncoderViT,
         prompt_encoder: PromptEncoder,
         mask_decoder: MaskDecoder,
-        pixel_mean: List[float] = (123.675, 116.28, 103.53),
-        pixel_std: List[float] = (58.395, 57.12, 57.375),
+        pixel_mean: list[float] = (123.675, 116.28, 103.53),
+        pixel_std: list[float] = (58.395, 57.12, 57.375),
     ) -> None:
         """
         Initialize the SAMModel class to predict object masks from an image and input prompts.
@@ -959,7 +956,6 @@ class SAM2Model(torch.nn.Module):
         prev_sam_mask_logits=None,
     ):
         """Perform a single tracking step, updating object masks and memory features based on current frame inputs."""
-        current_out = {}
         sam_outputs, _, _ = self._track_step(
             frame_idx,
             is_init_cond_frame,
@@ -975,9 +971,11 @@ class SAM2Model(torch.nn.Module):
         )
         _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = sam_outputs
-        current_out["pred_masks"] = low_res_masks
-        current_out["pred_masks_high_res"] = high_res_masks
-        current_out["obj_ptr"] = obj_ptr
+        current_out = {
+            "pred_masks": low_res_masks,
+            "pred_masks_high_res": high_res_masks,
+            "obj_ptr": obj_ptr,
+        }
         if not self.training:
             # Only add this in inference (to avoid unused param in activation checkpointing;
             # it's mainly used in the demo to encode spatial memories w/ consolidated masks)

dgenerate-ultralytics-headless 8.3.190__py3-none-any.whl → 8.3.192__py3-none-any.whl

dgenerate-ultralytics-headless 8.3.190py3-none-any.whl → 8.3.192py3-none-any.whl