dgenerate-ultralytics-headless 8.3.190__py3-none-any.whl → 8.3.192__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/RECORD +103 -102
  3. tests/test_cuda.py +6 -5
  4. tests/test_exports.py +1 -6
  5. tests/test_python.py +1 -4
  6. tests/test_solutions.py +1 -1
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +16 -14
  9. ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
  10. ultralytics/cfg/datasets/VisDrone.yaml +4 -4
  11. ultralytics/data/annotator.py +6 -6
  12. ultralytics/data/augment.py +53 -51
  13. ultralytics/data/base.py +15 -13
  14. ultralytics/data/build.py +7 -4
  15. ultralytics/data/converter.py +9 -10
  16. ultralytics/data/dataset.py +24 -22
  17. ultralytics/data/loaders.py +13 -11
  18. ultralytics/data/split.py +4 -3
  19. ultralytics/data/split_dota.py +14 -12
  20. ultralytics/data/utils.py +29 -23
  21. ultralytics/engine/exporter.py +2 -2
  22. ultralytics/engine/model.py +16 -14
  23. ultralytics/engine/predictor.py +8 -6
  24. ultralytics/engine/results.py +54 -52
  25. ultralytics/engine/trainer.py +8 -3
  26. ultralytics/engine/tuner.py +230 -42
  27. ultralytics/hub/google/__init__.py +7 -6
  28. ultralytics/hub/session.py +8 -6
  29. ultralytics/hub/utils.py +3 -4
  30. ultralytics/models/fastsam/model.py +8 -6
  31. ultralytics/models/nas/model.py +5 -3
  32. ultralytics/models/rtdetr/train.py +4 -3
  33. ultralytics/models/rtdetr/val.py +6 -4
  34. ultralytics/models/sam/amg.py +13 -10
  35. ultralytics/models/sam/model.py +3 -2
  36. ultralytics/models/sam/modules/blocks.py +21 -21
  37. ultralytics/models/sam/modules/decoders.py +11 -11
  38. ultralytics/models/sam/modules/encoders.py +25 -25
  39. ultralytics/models/sam/modules/memory_attention.py +9 -8
  40. ultralytics/models/sam/modules/sam.py +8 -10
  41. ultralytics/models/sam/modules/tiny_encoder.py +21 -20
  42. ultralytics/models/sam/modules/transformer.py +6 -5
  43. ultralytics/models/sam/modules/utils.py +7 -5
  44. ultralytics/models/sam/predict.py +32 -31
  45. ultralytics/models/utils/loss.py +29 -27
  46. ultralytics/models/utils/ops.py +10 -8
  47. ultralytics/models/yolo/classify/train.py +9 -7
  48. ultralytics/models/yolo/classify/val.py +11 -9
  49. ultralytics/models/yolo/detect/predict.py +1 -1
  50. ultralytics/models/yolo/detect/train.py +8 -6
  51. ultralytics/models/yolo/detect/val.py +22 -20
  52. ultralytics/models/yolo/model.py +14 -14
  53. ultralytics/models/yolo/obb/train.py +5 -3
  54. ultralytics/models/yolo/obb/val.py +11 -9
  55. ultralytics/models/yolo/pose/train.py +7 -5
  56. ultralytics/models/yolo/pose/val.py +12 -10
  57. ultralytics/models/yolo/segment/train.py +4 -5
  58. ultralytics/models/yolo/segment/val.py +13 -11
  59. ultralytics/models/yolo/world/train.py +10 -8
  60. ultralytics/models/yolo/yoloe/train.py +10 -10
  61. ultralytics/models/yolo/yoloe/val.py +11 -9
  62. ultralytics/nn/autobackend.py +17 -19
  63. ultralytics/nn/modules/block.py +12 -12
  64. ultralytics/nn/modules/conv.py +4 -3
  65. ultralytics/nn/modules/head.py +41 -37
  66. ultralytics/nn/modules/transformer.py +22 -21
  67. ultralytics/nn/tasks.py +2 -2
  68. ultralytics/nn/text_model.py +6 -5
  69. ultralytics/solutions/analytics.py +7 -5
  70. ultralytics/solutions/config.py +12 -10
  71. ultralytics/solutions/distance_calculation.py +3 -3
  72. ultralytics/solutions/heatmap.py +4 -2
  73. ultralytics/solutions/object_counter.py +5 -3
  74. ultralytics/solutions/parking_management.py +4 -2
  75. ultralytics/solutions/region_counter.py +7 -5
  76. ultralytics/solutions/similarity_search.py +5 -3
  77. ultralytics/solutions/solutions.py +38 -36
  78. ultralytics/solutions/streamlit_inference.py +8 -7
  79. ultralytics/trackers/bot_sort.py +11 -9
  80. ultralytics/trackers/byte_tracker.py +17 -15
  81. ultralytics/trackers/utils/gmc.py +4 -3
  82. ultralytics/utils/__init__.py +16 -88
  83. ultralytics/utils/autobatch.py +3 -2
  84. ultralytics/utils/autodevice.py +10 -10
  85. ultralytics/utils/benchmarks.py +11 -10
  86. ultralytics/utils/callbacks/comet.py +9 -9
  87. ultralytics/utils/checks.py +17 -26
  88. ultralytics/utils/export.py +12 -11
  89. ultralytics/utils/files.py +8 -7
  90. ultralytics/utils/git.py +139 -0
  91. ultralytics/utils/instance.py +8 -7
  92. ultralytics/utils/loss.py +15 -13
  93. ultralytics/utils/metrics.py +62 -62
  94. ultralytics/utils/ops.py +3 -2
  95. ultralytics/utils/patches.py +6 -4
  96. ultralytics/utils/plotting.py +20 -18
  97. ultralytics/utils/torch_utils.py +4 -2
  98. ultralytics/utils/tqdm.py +18 -14
  99. ultralytics/utils/triton.py +3 -2
  100. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/WHEEL +0 -0
  101. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/entry_points.txt +0 -0
  102. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/licenses/LICENSE +0 -0
  103. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/top_level.txt +0 -0
@@ -9,8 +9,9 @@
9
9
  # Build the TinyViT Model
10
10
  # --------------------------------------------------------
11
11
 
12
+ from __future__ import annotations
13
+
12
14
  import itertools
13
- from typing import List, Optional, Tuple, Union
14
15
 
15
16
  import torch
16
17
  import torch.nn as nn
@@ -106,7 +107,7 @@ class PatchEmbed(nn.Module):
106
107
  activation (nn.Module): Activation function to use between convolutions.
107
108
  """
108
109
  super().__init__()
109
- img_size: Tuple[int, int] = to_2tuple(resolution)
110
+ img_size: tuple[int, int] = to_2tuple(resolution)
110
111
  self.patches_resolution = (img_size[0] // 4, img_size[1] // 4)
111
112
  self.num_patches = self.patches_resolution[0] * self.patches_resolution[1]
112
113
  self.in_chans = in_chans
@@ -219,7 +220,7 @@ class PatchMerging(nn.Module):
219
220
  torch.Size([4, 3136, 128])
220
221
  """
221
222
 
222
- def __init__(self, input_resolution: Tuple[int, int], dim: int, out_dim: int, activation):
223
+ def __init__(self, input_resolution: tuple[int, int], dim: int, out_dim: int, activation):
223
224
  """
224
225
  Initialize the PatchMerging module for merging and projecting neighboring patches in feature maps.
225
226
 
@@ -283,13 +284,13 @@ class ConvLayer(nn.Module):
283
284
  def __init__(
284
285
  self,
285
286
  dim: int,
286
- input_resolution: Tuple[int, int],
287
+ input_resolution: tuple[int, int],
287
288
  depth: int,
288
289
  activation,
289
- drop_path: Union[float, List[float]] = 0.0,
290
- downsample: Optional[nn.Module] = None,
290
+ drop_path: float | list[float] = 0.0,
291
+ downsample: nn.Module | None = None,
291
292
  use_checkpoint: bool = False,
292
- out_dim: Optional[int] = None,
293
+ out_dim: int | None = None,
293
294
  conv_expand_ratio: float = 4.0,
294
295
  ):
295
296
  """
@@ -370,8 +371,8 @@ class MLP(nn.Module):
370
371
  def __init__(
371
372
  self,
372
373
  in_features: int,
373
- hidden_features: Optional[int] = None,
374
- out_features: Optional[int] = None,
374
+ hidden_features: int | None = None,
375
+ out_features: int | None = None,
375
376
  activation=nn.GELU,
376
377
  drop: float = 0.0,
377
378
  ):
@@ -441,7 +442,7 @@ class Attention(torch.nn.Module):
441
442
  key_dim: int,
442
443
  num_heads: int = 8,
443
444
  attn_ratio: float = 4,
444
- resolution: Tuple[int, int] = (14, 14),
445
+ resolution: tuple[int, int] = (14, 14),
445
446
  ):
446
447
  """
447
448
  Initialize the Attention module for multi-head attention with spatial awareness.
@@ -549,7 +550,7 @@ class TinyViTBlock(nn.Module):
549
550
  def __init__(
550
551
  self,
551
552
  dim: int,
552
- input_resolution: Tuple[int, int],
553
+ input_resolution: tuple[int, int],
553
554
  num_heads: int,
554
555
  window_size: int = 7,
555
556
  mlp_ratio: float = 4.0,
@@ -690,18 +691,18 @@ class BasicLayer(nn.Module):
690
691
  def __init__(
691
692
  self,
692
693
  dim: int,
693
- input_resolution: Tuple[int, int],
694
+ input_resolution: tuple[int, int],
694
695
  depth: int,
695
696
  num_heads: int,
696
697
  window_size: int,
697
698
  mlp_ratio: float = 4.0,
698
699
  drop: float = 0.0,
699
- drop_path: Union[float, List[float]] = 0.0,
700
- downsample: Optional[nn.Module] = None,
700
+ drop_path: float | list[float] = 0.0,
701
+ downsample: nn.Module | None = None,
701
702
  use_checkpoint: bool = False,
702
703
  local_conv_size: int = 3,
703
704
  activation=nn.GELU,
704
- out_dim: Optional[int] = None,
705
+ out_dim: int | None = None,
705
706
  ):
706
707
  """
707
708
  Initialize a BasicLayer in the TinyViT architecture.
@@ -800,10 +801,10 @@ class TinyViT(nn.Module):
800
801
  img_size: int = 224,
801
802
  in_chans: int = 3,
802
803
  num_classes: int = 1000,
803
- embed_dims: Tuple[int, int, int, int] = (96, 192, 384, 768),
804
- depths: Tuple[int, int, int, int] = (2, 2, 6, 2),
805
- num_heads: Tuple[int, int, int, int] = (3, 6, 12, 24),
806
- window_sizes: Tuple[int, int, int, int] = (7, 7, 14, 7),
804
+ embed_dims: tuple[int, int, int, int] = (96, 192, 384, 768),
805
+ depths: tuple[int, int, int, int] = (2, 2, 6, 2),
806
+ num_heads: tuple[int, int, int, int] = (3, 6, 12, 24),
807
+ window_sizes: tuple[int, int, int, int] = (7, 7, 14, 7),
807
808
  mlp_ratio: float = 4.0,
808
809
  drop_rate: float = 0.0,
809
810
  drop_path_rate: float = 0.1,
@@ -980,7 +981,7 @@ class TinyViT(nn.Module):
980
981
  """Perform the forward pass through the TinyViT model, extracting features from the input image."""
981
982
  return self.forward_features(x)
982
983
 
983
- def set_imgsz(self, imgsz: List[int] = [1024, 1024]):
984
+ def set_imgsz(self, imgsz: list[int] = [1024, 1024]):
984
985
  """Set image size to make model compatible with different image sizes."""
985
986
  imgsz = [s // 4 for s in imgsz]
986
987
  self.patches_resolution = imgsz
@@ -1,7 +1,8 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import math
4
- from typing import Tuple, Type
5
6
 
6
7
  import torch
7
8
  from torch import Tensor, nn
@@ -44,7 +45,7 @@ class TwoWayTransformer(nn.Module):
44
45
  embedding_dim: int,
45
46
  num_heads: int,
46
47
  mlp_dim: int,
47
- activation: Type[nn.Module] = nn.ReLU,
48
+ activation: type[nn.Module] = nn.ReLU,
48
49
  attention_downsample_rate: int = 2,
49
50
  ) -> None:
50
51
  """
@@ -85,7 +86,7 @@ class TwoWayTransformer(nn.Module):
85
86
  image_embedding: torch.Tensor,
86
87
  image_pe: torch.Tensor,
87
88
  point_embedding: torch.Tensor,
88
- ) -> Tuple[torch.Tensor, torch.Tensor]:
89
+ ) -> tuple[torch.Tensor, torch.Tensor]:
89
90
  """
90
91
  Process image and point embeddings through the Two-Way Transformer.
91
92
 
@@ -162,7 +163,7 @@ class TwoWayAttentionBlock(nn.Module):
162
163
  embedding_dim: int,
163
164
  num_heads: int,
164
165
  mlp_dim: int = 2048,
165
- activation: Type[nn.Module] = nn.ReLU,
166
+ activation: type[nn.Module] = nn.ReLU,
166
167
  attention_downsample_rate: int = 2,
167
168
  skip_first_layer_pe: bool = False,
168
169
  ) -> None:
@@ -198,7 +199,7 @@ class TwoWayAttentionBlock(nn.Module):
198
199
 
199
200
  def forward(
200
201
  self, queries: torch.Tensor, keys: torch.Tensor, query_pe: torch.Tensor, key_pe: torch.Tensor
201
- ) -> Tuple[torch.Tensor, torch.Tensor]:
202
+ ) -> tuple[torch.Tensor, torch.Tensor]:
202
203
  """
203
204
  Apply two-way attention to process query and key embeddings in a transformer block.
204
205
 
@@ -1,12 +1,14 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- from typing import Any, Dict, Tuple
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
4
6
 
5
7
  import torch
6
8
  import torch.nn.functional as F
7
9
 
8
10
 
9
- def select_closest_cond_frames(frame_idx: int, cond_frame_outputs: Dict[int, Any], max_cond_frame_num: int):
11
+ def select_closest_cond_frames(frame_idx: int, cond_frame_outputs: dict[int, Any], max_cond_frame_num: int):
10
12
  """
11
13
  Select the closest conditioning frames to a given frame index.
12
14
 
@@ -248,7 +250,7 @@ def window_partition(x: torch.Tensor, window_size: int):
248
250
  return windows, (Hp, Wp)
249
251
 
250
252
 
251
- def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[int, int], hw: Tuple[int, int]):
253
+ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: tuple[int, int], hw: tuple[int, int]):
252
254
  """
253
255
  Unpartition windowed sequences into original sequences and remove padding.
254
256
 
@@ -333,8 +335,8 @@ def add_decomposed_rel_pos(
333
335
  q: torch.Tensor,
334
336
  rel_pos_h: torch.Tensor,
335
337
  rel_pos_w: torch.Tensor,
336
- q_size: Tuple[int, int],
337
- k_size: Tuple[int, int],
338
+ q_size: tuple[int, int],
339
+ k_size: tuple[int, int],
338
340
  ) -> torch.Tensor:
339
341
  """
340
342
  Add decomposed Relative Positional Embeddings to the attention map.
@@ -8,8 +8,10 @@ using SAM. It forms an integral part of the Ultralytics framework and is designe
8
8
  segmentation tasks.
9
9
  """
10
10
 
11
+ from __future__ import annotations
12
+
11
13
  from collections import OrderedDict
12
- from typing import Any, Dict, List, Optional, Tuple, Union
14
+ from typing import Any
13
15
 
14
16
  import cv2
15
17
  import numpy as np
@@ -1717,9 +1719,9 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1717
1719
  def __init__(
1718
1720
  self,
1719
1721
  cfg: Any = DEFAULT_CFG,
1720
- overrides: Optional[Dict[str, Any]] = None,
1722
+ overrides: dict[str, Any] | None = None,
1721
1723
  max_obj_num: int = 3,
1722
- _callbacks: Optional[Dict[str, Any]] = None,
1724
+ _callbacks: dict[str, Any] | None = None,
1723
1725
  ) -> None:
1724
1726
  """
1725
1727
  Initialize the predictor with configuration and optional overrides.
@@ -1759,14 +1761,14 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1759
1761
  @smart_inference_mode()
1760
1762
  def inference(
1761
1763
  self,
1762
- img: Union[torch.Tensor, np.ndarray],
1763
- bboxes: Optional[List[List[float]]] = None,
1764
- masks: Optional[Union[torch.Tensor, np.ndarray]] = None,
1765
- points: Optional[List[List[float]]] = None,
1766
- labels: Optional[List[int]] = None,
1767
- obj_ids: Optional[List[int]] = None,
1764
+ img: torch.Tensor | np.ndarray,
1765
+ bboxes: list[list[float]] | None = None,
1766
+ masks: torch.Tensor | np.ndarray | None = None,
1767
+ points: list[list[float]] | None = None,
1768
+ labels: list[int] | None = None,
1769
+ obj_ids: list[int] | None = None,
1768
1770
  update_memory: bool = False,
1769
- ) -> Tuple[torch.Tensor, torch.Tensor]:
1771
+ ) -> tuple[torch.Tensor, torch.Tensor]:
1770
1772
  """
1771
1773
  Perform inference on a single image with optional bounding boxes, masks, points and object IDs.
1772
1774
  It has two modes: one is to run inference on a single image without updating the memory,
@@ -1824,7 +1826,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1824
1826
  pred_scores = torch.clamp_(pred_scores / 32, min=0)
1825
1827
  return pred_masks.flatten(0, 1), pred_scores.flatten(0, 1)
1826
1828
 
1827
- def get_im_features(self, img: Union[torch.Tensor, np.ndarray]) -> None:
1829
+ def get_im_features(self, img: torch.Tensor | np.ndarray) -> None:
1828
1830
  """
1829
1831
  Initialize the image state by processing the input image and extracting features.
1830
1832
 
@@ -1844,10 +1846,10 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1844
1846
  @smart_inference_mode()
1845
1847
  def update_memory(
1846
1848
  self,
1847
- obj_ids: List[int] = None,
1848
- points: Optional[torch.Tensor] = None,
1849
- labels: Optional[torch.Tensor] = None,
1850
- masks: Optional[torch.Tensor] = None,
1849
+ obj_ids: list[int] = None,
1850
+ points: torch.Tensor | None = None,
1851
+ labels: torch.Tensor | None = None,
1852
+ masks: torch.Tensor | None = None,
1851
1853
  ) -> None:
1852
1854
  """
1853
1855
  Append the imgState to the memory_bank and update the memory for the model.
@@ -1923,7 +1925,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1923
1925
  consolidated_out["maskmem_pos_enc"] = maskmem_pos_enc
1924
1926
  self.memory_bank.append(consolidated_out)
1925
1927
 
1926
- def _prepare_memory_conditioned_features(self, obj_idx: Optional[int]) -> torch.Tensor:
1928
+ def _prepare_memory_conditioned_features(self, obj_idx: int | None) -> torch.Tensor:
1927
1929
  """
1928
1930
  Prepare the memory-conditioned features for the current image state. If obj_idx is provided, it supposes to
1929
1931
  prepare features for a specific prompted object in the image. If obj_idx is None, it prepares features for all
@@ -1958,7 +1960,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1958
1960
  *self.feat_sizes[-1],
1959
1961
  )
1960
1962
 
1961
- def get_maskmem_enc(self) -> Tuple[torch.Tensor, torch.Tensor]:
1963
+ def get_maskmem_enc(self) -> tuple[torch.Tensor, torch.Tensor]:
1962
1964
  """Get the memory and positional encoding from the memory, which is used to condition the current image
1963
1965
  features.
1964
1966
  """
@@ -1973,7 +1975,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1973
1975
  memory_pos_embed = torch.cat(to_cat_memory_pos_embed, dim=0)
1974
1976
  return memory, memory_pos_embed
1975
1977
 
1976
- def _obj_id_to_idx(self, obj_id: int) -> Optional[int]:
1978
+ def _obj_id_to_idx(self, obj_id: int) -> int | None:
1977
1979
  """
1978
1980
  Map client-side object id to model-side object index.
1979
1981
 
@@ -1987,11 +1989,11 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1987
1989
 
1988
1990
  def track_step(
1989
1991
  self,
1990
- obj_idx: Optional[int] = None,
1991
- point: Optional[torch.Tensor] = None,
1992
- label: Optional[torch.Tensor] = None,
1993
- mask: Optional[torch.Tensor] = None,
1994
- ) -> Dict[str, Any]:
1992
+ obj_idx: int | None = None,
1993
+ point: torch.Tensor | None = None,
1994
+ label: torch.Tensor | None = None,
1995
+ mask: torch.Tensor | None = None,
1996
+ ) -> dict[str, Any]:
1995
1997
  """
1996
1998
  Tracking step for the current image state to predict masks.
1997
1999
 
@@ -2010,7 +2012,6 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
2010
2012
  current_out (Dict[str, Any]): A dictionary containing the current output with mask predictions and object pointers.
2011
2013
  Keys include 'point_inputs', 'mask_inputs', 'pred_masks', 'pred_masks_high_res', 'obj_ptr', 'object_score_logits'.
2012
2014
  """
2013
- current_out = {}
2014
2015
  if mask is not None and self.model.use_mask_input_as_output_without_sam:
2015
2016
  # When use_mask_input_as_output_without_sam=True, we directly output the mask input
2016
2017
  # (see it as a GT mask) without using a SAM prompt encoder + mask decoder.
@@ -2021,7 +2022,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
2021
2022
  # fused the visual feature with previous memory features in the memory bank
2022
2023
  pix_feat_with_mem = self._prepare_memory_conditioned_features(obj_idx)
2023
2024
  # calculate the first feature if adding obj_idx exists(means adding prompts)
2024
- pix_feat_with_mem = pix_feat_with_mem[0:1] if obj_idx is not None else pix_feat_with_mem
2025
+ pix_feat_with_mem = pix_feat_with_mem[:1] if obj_idx is not None else pix_feat_with_mem
2025
2026
  _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = self.model._forward_sam_heads(
2026
2027
  backbone_features=pix_feat_with_mem,
2027
2028
  point_inputs={"point_coords": point, "point_labels": label} if obj_idx is not None else None,
@@ -2029,9 +2030,9 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
2029
2030
  multimask_output=False,
2030
2031
  high_res_features=[feat[: pix_feat_with_mem.size(0)] for feat in self.high_res_features],
2031
2032
  )
2032
- current_out["pred_masks"] = low_res_masks
2033
- current_out["pred_masks_high_res"] = high_res_masks
2034
- current_out["obj_ptr"] = obj_ptr
2035
- current_out["object_score_logits"] = object_score_logits
2036
-
2037
- return current_out
2033
+ return {
2034
+ "pred_masks": low_res_masks,
2035
+ "pred_masks_high_res": high_res_masks,
2036
+ "obj_ptr": obj_ptr,
2037
+ "object_score_logits": object_score_logits,
2038
+ }
@@ -1,6 +1,8 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- from typing import Any, Dict, List, Optional, Tuple
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
4
6
 
5
7
  import torch
6
8
  import torch.nn as nn
@@ -36,7 +38,7 @@ class DETRLoss(nn.Module):
36
38
  def __init__(
37
39
  self,
38
40
  nc: int = 80,
39
- loss_gain: Optional[Dict[str, float]] = None,
41
+ loss_gain: dict[str, float] | None = None,
40
42
  aux_loss: bool = True,
41
43
  use_fl: bool = True,
42
44
  use_vfl: bool = False,
@@ -79,7 +81,7 @@ class DETRLoss(nn.Module):
79
81
 
80
82
  def _get_loss_class(
81
83
  self, pred_scores: torch.Tensor, targets: torch.Tensor, gt_scores: torch.Tensor, num_gts: int, postfix: str = ""
82
- ) -> Dict[str, torch.Tensor]:
84
+ ) -> dict[str, torch.Tensor]:
83
85
  """
84
86
  Compute classification loss based on predictions, target values, and ground truth scores.
85
87
 
@@ -121,7 +123,7 @@ class DETRLoss(nn.Module):
121
123
 
122
124
  def _get_loss_bbox(
123
125
  self, pred_bboxes: torch.Tensor, gt_bboxes: torch.Tensor, postfix: str = ""
124
- ) -> Dict[str, torch.Tensor]:
126
+ ) -> dict[str, torch.Tensor]:
125
127
  """
126
128
  Compute bounding box and GIoU losses for predicted and ground truth bounding boxes.
127
129
 
@@ -191,12 +193,12 @@ class DETRLoss(nn.Module):
191
193
  pred_scores: torch.Tensor,
192
194
  gt_bboxes: torch.Tensor,
193
195
  gt_cls: torch.Tensor,
194
- gt_groups: List[int],
195
- match_indices: Optional[List[Tuple]] = None,
196
+ gt_groups: list[int],
197
+ match_indices: list[tuple] | None = None,
196
198
  postfix: str = "",
197
- masks: Optional[torch.Tensor] = None,
198
- gt_mask: Optional[torch.Tensor] = None,
199
- ) -> Dict[str, torch.Tensor]:
199
+ masks: torch.Tensor | None = None,
200
+ gt_mask: torch.Tensor | None = None,
201
+ ) -> dict[str, torch.Tensor]:
200
202
  """
201
203
  Get auxiliary losses for intermediate decoder layers.
202
204
 
@@ -258,7 +260,7 @@ class DETRLoss(nn.Module):
258
260
  return loss
259
261
 
260
262
  @staticmethod
261
- def _get_index(match_indices: List[Tuple]) -> Tuple[Tuple[torch.Tensor, torch.Tensor], torch.Tensor]:
263
+ def _get_index(match_indices: list[tuple]) -> tuple[tuple[torch.Tensor, torch.Tensor], torch.Tensor]:
262
264
  """
263
265
  Extract batch indices, source indices, and destination indices from match indices.
264
266
 
@@ -275,8 +277,8 @@ class DETRLoss(nn.Module):
275
277
  return (batch_idx, src_idx), dst_idx
276
278
 
277
279
  def _get_assigned_bboxes(
278
- self, pred_bboxes: torch.Tensor, gt_bboxes: torch.Tensor, match_indices: List[Tuple]
279
- ) -> Tuple[torch.Tensor, torch.Tensor]:
280
+ self, pred_bboxes: torch.Tensor, gt_bboxes: torch.Tensor, match_indices: list[tuple]
281
+ ) -> tuple[torch.Tensor, torch.Tensor]:
280
282
  """
281
283
  Assign predicted bounding boxes to ground truth bounding boxes based on match indices.
282
284
 
@@ -309,12 +311,12 @@ class DETRLoss(nn.Module):
309
311
  pred_scores: torch.Tensor,
310
312
  gt_bboxes: torch.Tensor,
311
313
  gt_cls: torch.Tensor,
312
- gt_groups: List[int],
313
- masks: Optional[torch.Tensor] = None,
314
- gt_mask: Optional[torch.Tensor] = None,
314
+ gt_groups: list[int],
315
+ masks: torch.Tensor | None = None,
316
+ gt_mask: torch.Tensor | None = None,
315
317
  postfix: str = "",
316
- match_indices: Optional[List[Tuple]] = None,
317
- ) -> Dict[str, torch.Tensor]:
318
+ match_indices: list[tuple] | None = None,
319
+ ) -> dict[str, torch.Tensor]:
318
320
  """
319
321
  Calculate losses for a single prediction layer.
320
322
 
@@ -358,10 +360,10 @@ class DETRLoss(nn.Module):
358
360
  self,
359
361
  pred_bboxes: torch.Tensor,
360
362
  pred_scores: torch.Tensor,
361
- batch: Dict[str, Any],
363
+ batch: dict[str, Any],
362
364
  postfix: str = "",
363
365
  **kwargs: Any,
364
- ) -> Dict[str, torch.Tensor]:
366
+ ) -> dict[str, torch.Tensor]:
365
367
  """
366
368
  Calculate loss for predicted bounding boxes and scores.
367
369
 
@@ -407,12 +409,12 @@ class RTDETRDetectionLoss(DETRLoss):
407
409
 
408
410
  def forward(
409
411
  self,
410
- preds: Tuple[torch.Tensor, torch.Tensor],
411
- batch: Dict[str, Any],
412
- dn_bboxes: Optional[torch.Tensor] = None,
413
- dn_scores: Optional[torch.Tensor] = None,
414
- dn_meta: Optional[Dict[str, Any]] = None,
415
- ) -> Dict[str, torch.Tensor]:
412
+ preds: tuple[torch.Tensor, torch.Tensor],
413
+ batch: dict[str, Any],
414
+ dn_bboxes: torch.Tensor | None = None,
415
+ dn_scores: torch.Tensor | None = None,
416
+ dn_meta: dict[str, Any] | None = None,
417
+ ) -> dict[str, torch.Tensor]:
416
418
  """
417
419
  Forward pass to compute detection loss with optional denoising loss.
418
420
 
@@ -448,8 +450,8 @@ class RTDETRDetectionLoss(DETRLoss):
448
450
 
449
451
  @staticmethod
450
452
  def get_dn_match_indices(
451
- dn_pos_idx: List[torch.Tensor], dn_num_group: int, gt_groups: List[int]
452
- ) -> List[Tuple[torch.Tensor, torch.Tensor]]:
453
+ dn_pos_idx: list[torch.Tensor], dn_num_group: int, gt_groups: list[int]
454
+ ) -> list[tuple[torch.Tensor, torch.Tensor]]:
453
455
  """
454
456
  Get match indices for denoising.
455
457
 
@@ -1,6 +1,8 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- from typing import Any, Dict, List, Optional, Tuple
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
4
6
 
5
7
  import torch
6
8
  import torch.nn as nn
@@ -47,7 +49,7 @@ class HungarianMatcher(nn.Module):
47
49
 
48
50
  def __init__(
49
51
  self,
50
- cost_gain: Optional[Dict[str, float]] = None,
52
+ cost_gain: dict[str, float] | None = None,
51
53
  use_fl: bool = True,
52
54
  with_mask: bool = False,
53
55
  num_sample_points: int = 12544,
@@ -82,10 +84,10 @@ class HungarianMatcher(nn.Module):
82
84
  pred_scores: torch.Tensor,
83
85
  gt_bboxes: torch.Tensor,
84
86
  gt_cls: torch.Tensor,
85
- gt_groups: List[int],
86
- masks: Optional[torch.Tensor] = None,
87
- gt_mask: Optional[List[torch.Tensor]] = None,
88
- ) -> List[Tuple[torch.Tensor, torch.Tensor]]:
87
+ gt_groups: list[int],
88
+ masks: torch.Tensor | None = None,
89
+ gt_mask: list[torch.Tensor] | None = None,
90
+ ) -> list[tuple[torch.Tensor, torch.Tensor]]:
89
91
  """
90
92
  Compute optimal assignment between predictions and ground truth using Hungarian algorithm.
91
93
 
@@ -187,7 +189,7 @@ class HungarianMatcher(nn.Module):
187
189
 
188
190
 
189
191
  def get_cdn_group(
190
- batch: Dict[str, Any],
192
+ batch: dict[str, Any],
191
193
  num_classes: int,
192
194
  num_queries: int,
193
195
  class_embed: torch.Tensor,
@@ -195,7 +197,7 @@ def get_cdn_group(
195
197
  cls_noise_ratio: float = 0.5,
196
198
  box_noise_scale: float = 1.0,
197
199
  training: bool = False,
198
- ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor], Optional[Dict[str, Any]]]:
200
+ ) -> tuple[torch.Tensor | None, torch.Tensor | None, torch.Tensor | None, dict[str, Any] | None]:
199
201
  """
200
202
  Generate contrastive denoising training group with positive and negative samples from ground truths.
201
203
 
@@ -1,7 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from copy import copy
4
- from typing import Any, Dict, Optional
6
+ from typing import Any
5
7
 
6
8
  import torch
7
9
 
@@ -49,7 +51,7 @@ class ClassificationTrainer(BaseTrainer):
49
51
  >>> trainer.train()
50
52
  """
51
53
 
52
- def __init__(self, cfg=DEFAULT_CFG, overrides: Optional[Dict[str, Any]] = None, _callbacks=None):
54
+ def __init__(self, cfg=DEFAULT_CFG, overrides: dict[str, Any] | None = None, _callbacks=None):
53
55
  """
54
56
  Initialize a ClassificationTrainer object.
55
57
 
@@ -162,10 +164,10 @@ class ClassificationTrainer(BaseTrainer):
162
164
  self.model.transforms = loader.dataset.torch_transforms
163
165
  return loader
164
166
 
165
- def preprocess_batch(self, batch: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
167
+ def preprocess_batch(self, batch: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
166
168
  """Preprocess a batch of images and classes."""
167
- batch["img"] = batch["img"].to(self.device)
168
- batch["cls"] = batch["cls"].to(self.device)
169
+ batch["img"] = batch["img"].to(self.device, non_blocking=True)
170
+ batch["cls"] = batch["cls"].to(self.device, non_blocking=True)
169
171
  return batch
170
172
 
171
173
  def progress_string(self) -> str:
@@ -185,7 +187,7 @@ class ClassificationTrainer(BaseTrainer):
185
187
  self.test_loader, self.save_dir, args=copy(self.args), _callbacks=self.callbacks
186
188
  )
187
189
 
188
- def label_loss_items(self, loss_items: Optional[torch.Tensor] = None, prefix: str = "train"):
190
+ def label_loss_items(self, loss_items: torch.Tensor | None = None, prefix: str = "train"):
189
191
  """
190
192
  Return a loss dict with labelled training loss items tensor.
191
193
 
@@ -220,7 +222,7 @@ class ClassificationTrainer(BaseTrainer):
220
222
  self.metrics.pop("fitness", None)
221
223
  self.run_callbacks("on_fit_epoch_end")
222
224
 
223
- def plot_training_samples(self, batch: Dict[str, torch.Tensor], ni: int):
225
+ def plot_training_samples(self, batch: dict[str, torch.Tensor], ni: int):
224
226
  """
225
227
  Plot training samples with their annotations.
226
228
 
@@ -1,7 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from pathlib import Path
4
- from typing import Any, Dict, List, Tuple, Union
6
+ from typing import Any
5
7
 
6
8
  import torch
7
9
 
@@ -85,14 +87,14 @@ class ClassificationValidator(BaseValidator):
85
87
  self.targets = []
86
88
  self.confusion_matrix = ConfusionMatrix(names=model.names)
87
89
 
88
- def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
90
+ def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
89
91
  """Preprocess input batch by moving data to device and converting to appropriate dtype."""
90
92
  batch["img"] = batch["img"].to(self.device, non_blocking=True)
91
93
  batch["img"] = batch["img"].half() if self.args.half else batch["img"].float()
92
- batch["cls"] = batch["cls"].to(self.device)
94
+ batch["cls"] = batch["cls"].to(self.device, non_blocking=True)
93
95
  return batch
94
96
 
95
- def update_metrics(self, preds: torch.Tensor, batch: Dict[str, Any]) -> None:
97
+ def update_metrics(self, preds: torch.Tensor, batch: dict[str, Any]) -> None:
96
98
  """
97
99
  Update running metrics with model predictions and batch targets.
98
100
 
@@ -131,11 +133,11 @@ class ClassificationValidator(BaseValidator):
131
133
  self.metrics.save_dir = self.save_dir
132
134
  self.metrics.confusion_matrix = self.confusion_matrix
133
135
 
134
- def postprocess(self, preds: Union[torch.Tensor, List[torch.Tensor], Tuple[torch.Tensor]]) -> torch.Tensor:
136
+ def postprocess(self, preds: torch.Tensor | list[torch.Tensor] | tuple[torch.Tensor]) -> torch.Tensor:
135
137
  """Extract the primary prediction from model output if it's in a list or tuple format."""
136
138
  return preds[0] if isinstance(preds, (list, tuple)) else preds
137
139
 
138
- def get_stats(self) -> Dict[str, float]:
140
+ def get_stats(self) -> dict[str, float]:
139
141
  """Calculate and return a dictionary of metrics by processing targets and predictions."""
140
142
  self.metrics.process(self.targets, self.pred)
141
143
  return self.metrics.results_dict
@@ -144,7 +146,7 @@ class ClassificationValidator(BaseValidator):
144
146
  """Create a ClassificationDataset instance for validation."""
145
147
  return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
146
148
 
147
- def get_dataloader(self, dataset_path: Union[Path, str], batch_size: int) -> torch.utils.data.DataLoader:
149
+ def get_dataloader(self, dataset_path: Path | str, batch_size: int) -> torch.utils.data.DataLoader:
148
150
  """
149
151
  Build and return a data loader for classification validation.
150
152
 
@@ -163,7 +165,7 @@ class ClassificationValidator(BaseValidator):
163
165
  pf = "%22s" + "%11.3g" * len(self.metrics.keys) # print format
164
166
  LOGGER.info(pf % ("all", self.metrics.top1, self.metrics.top5))
165
167
 
166
- def plot_val_samples(self, batch: Dict[str, Any], ni: int) -> None:
168
+ def plot_val_samples(self, batch: dict[str, Any], ni: int) -> None:
167
169
  """
168
170
  Plot validation image samples with their ground truth labels.
169
171
 
@@ -184,7 +186,7 @@ class ClassificationValidator(BaseValidator):
184
186
  on_plot=self.on_plot,
185
187
  )
186
188
 
187
- def plot_predictions(self, batch: Dict[str, Any], preds: torch.Tensor, ni: int) -> None:
189
+ def plot_predictions(self, batch: dict[str, Any], preds: torch.Tensor, ni: int) -> None:
188
190
  """
189
191
  Plot images with their predicted class labels and save the visualization.
190
192
 
@@ -85,7 +85,7 @@ class DetectionPredictor(BasePredictor):
85
85
  """Extract object features from the feature maps."""
86
86
  import torch
87
87
 
88
- s = min([x.shape[1] for x in feat_maps]) # find smallest vector length
88
+ s = min(x.shape[1] for x in feat_maps) # find shortest vector length
89
89
  obj_feats = torch.cat(
90
90
  [x.permute(0, 2, 3, 1).reshape(x.shape[0], -1, s, x.shape[1] // s).mean(dim=-1) for x in feat_maps], dim=1
91
91
  ) # mean reduce all vectors to same length