dgenerate-ultralytics-headless 8.3.190__py3-none-any.whl → 8.3.191__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/RECORD +102 -101
  3. tests/test_cuda.py +6 -5
  4. tests/test_exports.py +1 -6
  5. tests/test_python.py +1 -4
  6. tests/test_solutions.py +1 -1
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +16 -14
  9. ultralytics/cfg/datasets/VisDrone.yaml +4 -4
  10. ultralytics/data/annotator.py +6 -6
  11. ultralytics/data/augment.py +53 -51
  12. ultralytics/data/base.py +15 -13
  13. ultralytics/data/build.py +7 -4
  14. ultralytics/data/converter.py +9 -10
  15. ultralytics/data/dataset.py +24 -22
  16. ultralytics/data/loaders.py +13 -11
  17. ultralytics/data/split.py +4 -3
  18. ultralytics/data/split_dota.py +14 -12
  19. ultralytics/data/utils.py +29 -23
  20. ultralytics/engine/exporter.py +2 -2
  21. ultralytics/engine/model.py +16 -14
  22. ultralytics/engine/predictor.py +8 -6
  23. ultralytics/engine/results.py +54 -52
  24. ultralytics/engine/trainer.py +7 -2
  25. ultralytics/engine/tuner.py +4 -3
  26. ultralytics/hub/google/__init__.py +7 -6
  27. ultralytics/hub/session.py +8 -6
  28. ultralytics/hub/utils.py +3 -4
  29. ultralytics/models/fastsam/model.py +8 -6
  30. ultralytics/models/nas/model.py +5 -3
  31. ultralytics/models/rtdetr/train.py +4 -3
  32. ultralytics/models/rtdetr/val.py +6 -4
  33. ultralytics/models/sam/amg.py +13 -10
  34. ultralytics/models/sam/model.py +3 -2
  35. ultralytics/models/sam/modules/blocks.py +21 -21
  36. ultralytics/models/sam/modules/decoders.py +11 -11
  37. ultralytics/models/sam/modules/encoders.py +25 -25
  38. ultralytics/models/sam/modules/memory_attention.py +9 -8
  39. ultralytics/models/sam/modules/sam.py +8 -10
  40. ultralytics/models/sam/modules/tiny_encoder.py +21 -20
  41. ultralytics/models/sam/modules/transformer.py +6 -5
  42. ultralytics/models/sam/modules/utils.py +7 -5
  43. ultralytics/models/sam/predict.py +32 -31
  44. ultralytics/models/utils/loss.py +29 -27
  45. ultralytics/models/utils/ops.py +10 -8
  46. ultralytics/models/yolo/classify/train.py +7 -5
  47. ultralytics/models/yolo/classify/val.py +10 -8
  48. ultralytics/models/yolo/detect/predict.py +1 -1
  49. ultralytics/models/yolo/detect/train.py +8 -6
  50. ultralytics/models/yolo/detect/val.py +21 -19
  51. ultralytics/models/yolo/model.py +14 -14
  52. ultralytics/models/yolo/obb/train.py +5 -3
  53. ultralytics/models/yolo/obb/val.py +11 -9
  54. ultralytics/models/yolo/pose/train.py +7 -5
  55. ultralytics/models/yolo/pose/val.py +11 -9
  56. ultralytics/models/yolo/segment/train.py +4 -5
  57. ultralytics/models/yolo/segment/val.py +12 -10
  58. ultralytics/models/yolo/world/train.py +9 -7
  59. ultralytics/models/yolo/yoloe/train.py +7 -6
  60. ultralytics/models/yolo/yoloe/val.py +10 -8
  61. ultralytics/nn/autobackend.py +17 -19
  62. ultralytics/nn/modules/block.py +12 -12
  63. ultralytics/nn/modules/conv.py +4 -3
  64. ultralytics/nn/modules/head.py +41 -37
  65. ultralytics/nn/modules/transformer.py +22 -21
  66. ultralytics/nn/tasks.py +2 -2
  67. ultralytics/nn/text_model.py +6 -5
  68. ultralytics/solutions/analytics.py +7 -5
  69. ultralytics/solutions/config.py +12 -10
  70. ultralytics/solutions/distance_calculation.py +3 -3
  71. ultralytics/solutions/heatmap.py +4 -2
  72. ultralytics/solutions/object_counter.py +5 -3
  73. ultralytics/solutions/parking_management.py +4 -2
  74. ultralytics/solutions/region_counter.py +7 -5
  75. ultralytics/solutions/similarity_search.py +5 -3
  76. ultralytics/solutions/solutions.py +38 -36
  77. ultralytics/solutions/streamlit_inference.py +8 -7
  78. ultralytics/trackers/bot_sort.py +11 -9
  79. ultralytics/trackers/byte_tracker.py +17 -15
  80. ultralytics/trackers/utils/gmc.py +4 -3
  81. ultralytics/utils/__init__.py +16 -88
  82. ultralytics/utils/autobatch.py +3 -2
  83. ultralytics/utils/autodevice.py +10 -10
  84. ultralytics/utils/benchmarks.py +11 -10
  85. ultralytics/utils/callbacks/comet.py +9 -9
  86. ultralytics/utils/checks.py +17 -26
  87. ultralytics/utils/export.py +12 -11
  88. ultralytics/utils/files.py +8 -7
  89. ultralytics/utils/git.py +139 -0
  90. ultralytics/utils/instance.py +8 -7
  91. ultralytics/utils/loss.py +15 -13
  92. ultralytics/utils/metrics.py +62 -62
  93. ultralytics/utils/ops.py +3 -2
  94. ultralytics/utils/patches.py +6 -4
  95. ultralytics/utils/plotting.py +18 -16
  96. ultralytics/utils/torch_utils.py +4 -2
  97. ultralytics/utils/tqdm.py +15 -12
  98. ultralytics/utils/triton.py +3 -2
  99. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/WHEEL +0 -0
  100. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/entry_points.txt +0 -0
  101. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/licenses/LICENSE +0 -0
  102. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,13 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import shutil
4
6
  import threading
5
7
  import time
6
8
  from http import HTTPStatus
7
9
  from pathlib import Path
8
- from typing import Any, Dict, Optional
10
+ from typing import Any
9
11
  from urllib.parse import parse_qs, urlparse
10
12
 
11
13
  from ultralytics import __version__
@@ -90,7 +92,7 @@ class HUBTrainingSession:
90
92
  )
91
93
 
92
94
  @classmethod
93
- def create_session(cls, identifier: str, args: Optional[Dict[str, Any]] = None):
95
+ def create_session(cls, identifier: str, args: dict[str, Any] | None = None):
94
96
  """
95
97
  Create an authenticated HUBTrainingSession or return None.
96
98
 
@@ -137,7 +139,7 @@ class HUBTrainingSession:
137
139
  self.model.start_heartbeat(self.rate_limits["heartbeat"])
138
140
  LOGGER.info(f"{PREFIX}View model at {self.model_url} 🚀")
139
141
 
140
- def create_model(self, model_args: Dict[str, Any]):
142
+ def create_model(self, model_args: dict[str, Any]):
141
143
  """
142
144
  Initialize a HUB training session with the specified model arguments.
143
145
 
@@ -204,7 +206,7 @@ class HUBTrainingSession:
204
206
  HUBModelError: If the identifier format is not recognized.
205
207
  """
206
208
  api_key, model_id, filename = None, None, None
207
- if str(identifier).endswith((".pt", ".yaml")):
209
+ if identifier.endswith((".pt", ".yaml")):
208
210
  filename = identifier
209
211
  elif identifier.startswith(f"{HUB_WEB_ROOT}/models/"):
210
212
  parsed_url = urlparse(identifier)
@@ -254,8 +256,8 @@ class HUBTrainingSession:
254
256
  timeout: int = 30,
255
257
  thread: bool = True,
256
258
  verbose: bool = True,
257
- progress_total: Optional[int] = None,
258
- stream_response: Optional[bool] = None,
259
+ progress_total: int | None = None,
260
+ stream_response: bool | None = None,
259
261
  *args,
260
262
  **kwargs,
261
263
  ):
ultralytics/hub/utils.py CHANGED
@@ -11,8 +11,8 @@ from ultralytics import __version__
11
11
  from ultralytics.utils import (
12
12
  ARGV,
13
13
  ENVIRONMENT,
14
+ GIT,
14
15
  IS_COLAB,
15
- IS_GIT_DIR,
16
16
  IS_PIP_PACKAGE,
17
17
  LOGGER,
18
18
  ONLINE,
@@ -23,7 +23,6 @@ from ultralytics.utils import (
23
23
  TQDM,
24
24
  TryExcept,
25
25
  colorstr,
26
- get_git_origin_url,
27
26
  )
28
27
  from ultralytics.utils.downloads import GITHUB_ASSETS_NAMES
29
28
  from ultralytics.utils.torch_utils import get_cpu_info
@@ -205,7 +204,7 @@ class Events:
205
204
  self.t = 0.0 # rate limit timer (seconds)
206
205
  self.metadata = {
207
206
  "cli": Path(ARGV[0]).name == "yolo",
208
- "install": "git" if IS_GIT_DIR else "pip" if IS_PIP_PACKAGE else "other",
207
+ "install": "git" if GIT.is_repo else "pip" if IS_PIP_PACKAGE else "other",
209
208
  "python": PYTHON_VERSION.rsplit(".", 1)[0], # i.e. 3.13
210
209
  "CPU": get_cpu_info(),
211
210
  # "GPU": get_gpu_info(index=0) if cuda else None,
@@ -219,7 +218,7 @@ class Events:
219
218
  and RANK in {-1, 0}
220
219
  and not TESTS_RUNNING
221
220
  and ONLINE
222
- and (IS_PIP_PACKAGE or get_git_origin_url() == "https://github.com/ultralytics/ultralytics.git")
221
+ and (IS_PIP_PACKAGE or GIT.origin == "https://github.com/ultralytics/ultralytics.git")
223
222
  )
224
223
 
225
224
  def __call__(self, cfg, device=None):
@@ -1,7 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from pathlib import Path
4
- from typing import Any, Dict, List, Optional
6
+ from typing import Any
5
7
 
6
8
  from ultralytics.engine.model import Model
7
9
 
@@ -45,10 +47,10 @@ class FastSAM(Model):
45
47
  self,
46
48
  source,
47
49
  stream: bool = False,
48
- bboxes: Optional[List] = None,
49
- points: Optional[List] = None,
50
- labels: Optional[List] = None,
51
- texts: Optional[List] = None,
50
+ bboxes: list | None = None,
51
+ points: list | None = None,
52
+ labels: list | None = None,
53
+ texts: list | None = None,
52
54
  **kwargs: Any,
53
55
  ):
54
56
  """
@@ -74,6 +76,6 @@ class FastSAM(Model):
74
76
  return super().predict(source, stream, prompts=prompts, **kwargs)
75
77
 
76
78
  @property
77
- def task_map(self) -> Dict[str, Dict[str, Any]]:
79
+ def task_map(self) -> dict[str, dict[str, Any]]:
78
80
  """Returns a dictionary mapping segment task to corresponding predictor and validator classes."""
79
81
  return {"segment": {"predictor": FastSAMPredictor, "validator": FastSAMValidator}}
@@ -1,7 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from pathlib import Path
4
- from typing import Any, Dict
6
+ from typing import Any
5
7
 
6
8
  import torch
7
9
 
@@ -80,7 +82,7 @@ class NAS(Model):
80
82
  self.model.args = {**DEFAULT_CFG_DICT, **self.overrides} # for export()
81
83
  self.model.eval()
82
84
 
83
- def info(self, detailed: bool = False, verbose: bool = True) -> Dict[str, Any]:
85
+ def info(self, detailed: bool = False, verbose: bool = True) -> dict[str, Any]:
84
86
  """
85
87
  Log model information.
86
88
 
@@ -94,6 +96,6 @@ class NAS(Model):
94
96
  return model_info(self.model, detailed=detailed, verbose=verbose, imgsz=640)
95
97
 
96
98
  @property
97
- def task_map(self) -> Dict[str, Dict[str, Any]]:
99
+ def task_map(self) -> dict[str, dict[str, Any]]:
98
100
  """Return a dictionary mapping tasks to respective predictor and validator classes."""
99
101
  return {"detect": {"predictor": NASPredictor, "validator": NASValidator}}
@@ -1,7 +1,8 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from copy import copy
4
- from typing import Optional
5
6
 
6
7
  from ultralytics.models.yolo.detect import DetectionTrainer
7
8
  from ultralytics.nn.tasks import RTDETRDetectionModel
@@ -41,7 +42,7 @@ class RTDETRTrainer(DetectionTrainer):
41
42
  >>> trainer.train()
42
43
  """
43
44
 
44
- def get_model(self, cfg: Optional[dict] = None, weights: Optional[str] = None, verbose: bool = True):
45
+ def get_model(self, cfg: dict | None = None, weights: str | None = None, verbose: bool = True):
45
46
  """
46
47
  Initialize and return an RT-DETR model for object detection tasks.
47
48
 
@@ -58,7 +59,7 @@ class RTDETRTrainer(DetectionTrainer):
58
59
  model.load(weights)
59
60
  return model
60
61
 
61
- def build_dataset(self, img_path: str, mode: str = "val", batch: Optional[int] = None):
62
+ def build_dataset(self, img_path: str, mode: str = "val", batch: int | None = None):
62
63
  """
63
64
  Build and return an RT-DETR dataset for training or validation.
64
65
 
@@ -1,7 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from pathlib import Path
4
- from typing import Any, Dict, List, Tuple, Union
6
+ from typing import Any
5
7
 
6
8
  import torch
7
9
 
@@ -155,8 +157,8 @@ class RTDETRValidator(DetectionValidator):
155
157
  )
156
158
 
157
159
  def postprocess(
158
- self, preds: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]]
159
- ) -> List[Dict[str, torch.Tensor]]:
160
+ self, preds: torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor]
161
+ ) -> list[dict[str, torch.Tensor]]:
160
162
  """
161
163
  Apply Non-maximum suppression to prediction outputs.
162
164
 
@@ -187,7 +189,7 @@ class RTDETRValidator(DetectionValidator):
187
189
 
188
190
  return [{"bboxes": x[:, :4], "conf": x[:, 4], "cls": x[:, 5]} for x in outputs]
189
191
 
190
- def pred_to_json(self, predn: Dict[str, torch.Tensor], pbatch: Dict[str, Any]) -> None:
192
+ def pred_to_json(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> None:
191
193
  """
192
194
  Serialize YOLO predictions to COCO json format.
193
195
 
@@ -1,15 +1,18 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import math
6
+ from collections.abc import Generator
4
7
  from itertools import product
5
- from typing import Any, Generator, List, Tuple
8
+ from typing import Any
6
9
 
7
10
  import numpy as np
8
11
  import torch
9
12
 
10
13
 
11
14
  def is_box_near_crop_edge(
12
- boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], atol: float = 20.0
15
+ boxes: torch.Tensor, crop_box: list[int], orig_box: list[int], atol: float = 20.0
13
16
  ) -> torch.Tensor:
14
17
  """
15
18
  Determine if bounding boxes are near the edge of a cropped image region using a specified tolerance.
@@ -38,7 +41,7 @@ def is_box_near_crop_edge(
38
41
  return torch.any(near_crop_edge, dim=1)
39
42
 
40
43
 
41
- def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
44
+ def batch_iterator(batch_size: int, *args) -> Generator[list[Any]]:
42
45
  """
43
46
  Yield batches of data from input arguments with specified batch size for efficient processing.
44
47
 
@@ -106,14 +109,14 @@ def build_point_grid(n_per_side: int) -> np.ndarray:
106
109
  return np.stack([points_x, points_y], axis=-1).reshape(-1, 2)
107
110
 
108
111
 
109
- def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> List[np.ndarray]:
112
+ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: int) -> list[np.ndarray]:
110
113
  """Generate point grids for multiple crop layers with varying scales and densities."""
111
114
  return [build_point_grid(int(n_per_side / (scale_per_layer**i))) for i in range(n_layers + 1)]
112
115
 
113
116
 
114
117
  def generate_crop_boxes(
115
- im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
116
- ) -> Tuple[List[List[int]], List[int]]:
118
+ im_size: tuple[int, ...], n_layers: int, overlap_ratio: float
119
+ ) -> tuple[list[list[int]], list[int]]:
117
120
  """
118
121
  Generate crop boxes of varying sizes for multiscale image processing, with layered overlapping regions.
119
122
 
@@ -163,7 +166,7 @@ def generate_crop_boxes(
163
166
  return crop_boxes, layer_idxs
164
167
 
165
168
 
166
- def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
169
+ def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: list[int]) -> torch.Tensor:
167
170
  """Uncrop bounding boxes by adding the crop box offset to their coordinates."""
168
171
  x0, y0, _, _ = crop_box
169
172
  offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device)
@@ -173,7 +176,7 @@ def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
173
176
  return boxes + offset
174
177
 
175
178
 
176
- def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
179
+ def uncrop_points(points: torch.Tensor, crop_box: list[int]) -> torch.Tensor:
177
180
  """Uncrop points by adding the crop box offset to their coordinates."""
178
181
  x0, y0, _, _ = crop_box
179
182
  offset = torch.tensor([[x0, y0]], device=points.device)
@@ -183,7 +186,7 @@ def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
183
186
  return points + offset
184
187
 
185
188
 
186
- def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int) -> torch.Tensor:
189
+ def uncrop_masks(masks: torch.Tensor, crop_box: list[int], orig_h: int, orig_w: int) -> torch.Tensor:
187
190
  """Uncrop masks by padding them to the original image size, handling coordinate transformations."""
188
191
  x0, y0, x1, y1 = crop_box
189
192
  if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h:
@@ -194,7 +197,7 @@ def uncrop_masks(masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w:
194
197
  return torch.nn.functional.pad(masks, pad, value=0)
195
198
 
196
199
 
197
- def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tuple[np.ndarray, bool]:
200
+ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> tuple[np.ndarray, bool]:
198
201
  """
199
202
  Remove small disconnected regions or holes in a mask based on area threshold and mode.
200
203
 
@@ -14,8 +14,9 @@ Key Features:
14
14
  - Trained on SA-1B dataset
15
15
  """
16
16
 
17
+ from __future__ import annotations
18
+
17
19
  from pathlib import Path
18
- from typing import Dict, Type
19
20
 
20
21
  from ultralytics.engine.model import Model
21
22
  from ultralytics.utils.torch_utils import model_info
@@ -154,7 +155,7 @@ class SAM(Model):
154
155
  return model_info(self.model, detailed=detailed, verbose=verbose)
155
156
 
156
157
  @property
157
- def task_map(self) -> Dict[str, Dict[str, Type[Predictor]]]:
158
+ def task_map(self) -> dict[str, dict[str, type[Predictor]]]:
158
159
  """
159
160
  Provide a mapping from the 'segment' task to its corresponding 'Predictor'.
160
161
 
@@ -1,9 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ from __future__ import annotations
2
3
 
3
4
  import copy
4
5
  import math
5
6
  from functools import partial
6
- from typing import Optional, Tuple, Type, Union
7
7
 
8
8
  import numpy as np
9
9
  import torch
@@ -81,7 +81,7 @@ class MaskDownSampler(nn.Module):
81
81
  stride: int = 4,
82
82
  padding: int = 0,
83
83
  total_stride: int = 16,
84
- activation: Type[nn.Module] = nn.GELU,
84
+ activation: type[nn.Module] = nn.GELU,
85
85
  ):
86
86
  """Initialize a mask downsampler module for progressive downsampling and channel expansion."""
87
87
  super().__init__()
@@ -227,7 +227,7 @@ class Fuser(nn.Module):
227
227
  torch.Size([1, 256, 32, 32])
228
228
  """
229
229
 
230
- def __init__(self, layer: nn.Module, num_layers: int, dim: Optional[int] = None, input_projection: bool = False):
230
+ def __init__(self, layer: nn.Module, num_layers: int, dim: int | None = None, input_projection: bool = False):
231
231
  """
232
232
  Initialize the Fuser module for feature fusion through multiple layers.
233
233
 
@@ -295,7 +295,7 @@ class SAM2TwoWayAttentionBlock(TwoWayAttentionBlock):
295
295
  embedding_dim: int,
296
296
  num_heads: int,
297
297
  mlp_dim: int = 2048,
298
- activation: Type[nn.Module] = nn.ReLU,
298
+ activation: type[nn.Module] = nn.ReLU,
299
299
  attention_downsample_rate: int = 2,
300
300
  skip_first_layer_pe: bool = False,
301
301
  ) -> None:
@@ -359,7 +359,7 @@ class SAM2TwoWayTransformer(TwoWayTransformer):
359
359
  embedding_dim: int,
360
360
  num_heads: int,
361
361
  mlp_dim: int,
362
- activation: Type[nn.Module] = nn.ReLU,
362
+ activation: type[nn.Module] = nn.ReLU,
363
363
  attention_downsample_rate: int = 2,
364
364
  ) -> None:
365
365
  """
@@ -432,7 +432,7 @@ class RoPEAttention(Attention):
432
432
  *args,
433
433
  rope_theta: float = 10000.0,
434
434
  rope_k_repeat: bool = False,
435
- feat_sizes: Tuple[int, int] = (32, 32), # [w, h] for stride 16 feats at 512 resolution
435
+ feat_sizes: tuple[int, int] = (32, 32), # [w, h] for stride 16 feats at 512 resolution
436
436
  **kwargs,
437
437
  ):
438
438
  """Initialize RoPEAttention with rotary position encoding for enhanced positional awareness."""
@@ -618,9 +618,9 @@ class MultiScaleBlock(nn.Module):
618
618
  num_heads: int,
619
619
  mlp_ratio: float = 4.0,
620
620
  drop_path: float = 0.0,
621
- norm_layer: Union[nn.Module, str] = "LayerNorm",
622
- q_stride: Tuple[int, int] = None,
623
- act_layer: Type[nn.Module] = nn.GELU,
621
+ norm_layer: nn.Module | str = "LayerNorm",
622
+ q_stride: tuple[int, int] = None,
623
+ act_layer: type[nn.Module] = nn.GELU,
624
624
  window_size: int = 0,
625
625
  ):
626
626
  """Initialize a multiscale attention block with window partitioning and optional query pooling."""
@@ -728,7 +728,7 @@ class PositionEmbeddingSine(nn.Module):
728
728
  num_pos_feats: int,
729
729
  temperature: int = 10000,
730
730
  normalize: bool = True,
731
- scale: Optional[float] = None,
731
+ scale: float | None = None,
732
732
  ):
733
733
  """Initialize sinusoidal position embeddings for 2D image inputs."""
734
734
  super().__init__()
@@ -744,7 +744,7 @@ class PositionEmbeddingSine(nn.Module):
744
744
 
745
745
  self.cache = {}
746
746
 
747
- def _encode_xy(self, x: torch.Tensor, y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
747
+ def _encode_xy(self, x: torch.Tensor, y: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
748
748
  """Encode 2D positions using sine/cosine functions for transformer positional embeddings."""
749
749
  assert len(x) == len(y) and x.ndim == y.ndim == 1
750
750
  x_embed = x * self.scale
@@ -833,7 +833,7 @@ class PositionEmbeddingRandom(nn.Module):
833
833
  torch.Size([128, 32, 32])
834
834
  """
835
835
 
836
- def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
836
+ def __init__(self, num_pos_feats: int = 64, scale: float | None = None) -> None:
837
837
  """Initialize random spatial frequency position embedding for transformers."""
838
838
  super().__init__()
839
839
  if scale is None or scale <= 0.0:
@@ -853,7 +853,7 @@ class PositionEmbeddingRandom(nn.Module):
853
853
  # Outputs d_1 x ... x d_n x C shape
854
854
  return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
855
855
 
856
- def forward(self, size: Tuple[int, int]) -> torch.Tensor:
856
+ def forward(self, size: tuple[int, int]) -> torch.Tensor:
857
857
  """Generate positional encoding for a grid using random spatial frequencies."""
858
858
  h, w = size
859
859
  grid = torch.ones(
@@ -869,7 +869,7 @@ class PositionEmbeddingRandom(nn.Module):
869
869
  pe = self._pe_encoding(torch.stack([x_embed, y_embed], dim=-1))
870
870
  return pe.permute(2, 0, 1) # C x H x W
871
871
 
872
- def forward_with_coords(self, coords_input: torch.Tensor, image_size: Tuple[int, int]) -> torch.Tensor:
872
+ def forward_with_coords(self, coords_input: torch.Tensor, image_size: tuple[int, int]) -> torch.Tensor:
873
873
  """Positionally encode input coordinates, normalizing them to [0,1] based on the given image size."""
874
874
  coords = coords_input.clone()
875
875
  coords[:, :, 0] = coords[:, :, 0] / image_size[1]
@@ -910,12 +910,12 @@ class Block(nn.Module):
910
910
  num_heads: int,
911
911
  mlp_ratio: float = 4.0,
912
912
  qkv_bias: bool = True,
913
- norm_layer: Type[nn.Module] = nn.LayerNorm,
914
- act_layer: Type[nn.Module] = nn.GELU,
913
+ norm_layer: type[nn.Module] = nn.LayerNorm,
914
+ act_layer: type[nn.Module] = nn.GELU,
915
915
  use_rel_pos: bool = False,
916
916
  rel_pos_zero_init: bool = True,
917
917
  window_size: int = 0,
918
- input_size: Optional[Tuple[int, int]] = None,
918
+ input_size: tuple[int, int] | None = None,
919
919
  ) -> None:
920
920
  """
921
921
  Initialize a transformer block with optional window attention and relative positional embeddings.
@@ -1012,7 +1012,7 @@ class REAttention(nn.Module):
1012
1012
  qkv_bias: bool = True,
1013
1013
  use_rel_pos: bool = False,
1014
1014
  rel_pos_zero_init: bool = True,
1015
- input_size: Optional[Tuple[int, int]] = None,
1015
+ input_size: tuple[int, int] | None = None,
1016
1016
  ) -> None:
1017
1017
  """
1018
1018
  Initialize a Relative Position Attention module for transformer-based architectures.
@@ -1093,9 +1093,9 @@ class PatchEmbed(nn.Module):
1093
1093
 
1094
1094
  def __init__(
1095
1095
  self,
1096
- kernel_size: Tuple[int, int] = (16, 16),
1097
- stride: Tuple[int, int] = (16, 16),
1098
- padding: Tuple[int, int] = (0, 0),
1096
+ kernel_size: tuple[int, int] = (16, 16),
1097
+ stride: tuple[int, int] = (16, 16),
1098
+ padding: tuple[int, int] = (0, 0),
1099
1099
  in_chans: int = 3,
1100
1100
  embed_dim: int = 768,
1101
1101
  ) -> None:
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- from typing import List, Optional, Tuple, Type
3
+ from __future__ import annotations
4
4
 
5
5
  import torch
6
6
  from torch import nn
@@ -43,7 +43,7 @@ class MaskDecoder(nn.Module):
43
43
  transformer_dim: int,
44
44
  transformer: nn.Module,
45
45
  num_multimask_outputs: int = 3,
46
- activation: Type[nn.Module] = nn.GELU,
46
+ activation: type[nn.Module] = nn.GELU,
47
47
  iou_head_depth: int = 3,
48
48
  iou_head_hidden_dim: int = 256,
49
49
  ) -> None:
@@ -93,7 +93,7 @@ class MaskDecoder(nn.Module):
93
93
  sparse_prompt_embeddings: torch.Tensor,
94
94
  dense_prompt_embeddings: torch.Tensor,
95
95
  multimask_output: bool,
96
- ) -> Tuple[torch.Tensor, torch.Tensor]:
96
+ ) -> tuple[torch.Tensor, torch.Tensor]:
97
97
  """
98
98
  Predict masks given image and prompt embeddings.
99
99
 
@@ -137,7 +137,7 @@ class MaskDecoder(nn.Module):
137
137
  image_pe: torch.Tensor,
138
138
  sparse_prompt_embeddings: torch.Tensor,
139
139
  dense_prompt_embeddings: torch.Tensor,
140
- ) -> Tuple[torch.Tensor, torch.Tensor]:
140
+ ) -> tuple[torch.Tensor, torch.Tensor]:
141
141
  """Predict masks and quality scores using image and prompt embeddings via transformer architecture."""
142
142
  # Concatenate output tokens
143
143
  output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
@@ -158,7 +158,7 @@ class MaskDecoder(nn.Module):
158
158
  # Upscale mask embeddings and predict masks using the mask tokens
159
159
  src = src.transpose(1, 2).view(b, c, h, w)
160
160
  upscaled_embedding = self.output_upscaling(src)
161
- hyper_in_list: List[torch.Tensor] = [
161
+ hyper_in_list: list[torch.Tensor] = [
162
162
  self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]) for i in range(self.num_mask_tokens)
163
163
  ]
164
164
  hyper_in = torch.stack(hyper_in_list, dim=1)
@@ -221,7 +221,7 @@ class SAM2MaskDecoder(nn.Module):
221
221
  transformer_dim: int,
222
222
  transformer: nn.Module,
223
223
  num_multimask_outputs: int = 3,
224
- activation: Type[nn.Module] = nn.GELU,
224
+ activation: type[nn.Module] = nn.GELU,
225
225
  iou_head_depth: int = 3,
226
226
  iou_head_hidden_dim: int = 256,
227
227
  use_high_res_features: bool = False,
@@ -317,8 +317,8 @@ class SAM2MaskDecoder(nn.Module):
317
317
  dense_prompt_embeddings: torch.Tensor,
318
318
  multimask_output: bool,
319
319
  repeat_image: bool,
320
- high_res_features: Optional[List[torch.Tensor]] = None,
321
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
320
+ high_res_features: list[torch.Tensor] | None = None,
321
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
322
322
  """
323
323
  Predict masks given image and prompt embeddings.
324
324
 
@@ -385,8 +385,8 @@ class SAM2MaskDecoder(nn.Module):
385
385
  sparse_prompt_embeddings: torch.Tensor,
386
386
  dense_prompt_embeddings: torch.Tensor,
387
387
  repeat_image: bool,
388
- high_res_features: Optional[List[torch.Tensor]] = None,
389
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
388
+ high_res_features: list[torch.Tensor] | None = None,
389
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
390
390
  """Predict instance segmentation masks from image and prompt embeddings using a transformer."""
391
391
  # Concatenate output tokens
392
392
  s = 0
@@ -431,7 +431,7 @@ class SAM2MaskDecoder(nn.Module):
431
431
  upscaled_embedding = act1(ln1(dc1(src) + feat_s1))
432
432
  upscaled_embedding = act2(dc2(upscaled_embedding) + feat_s0)
433
433
 
434
- hyper_in_list: List[torch.Tensor] = [
434
+ hyper_in_list: list[torch.Tensor] = [
435
435
  self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]) for i in range(self.num_mask_tokens)
436
436
  ]
437
437
  hyper_in = torch.stack(hyper_in_list, dim=1)