dgenerate-ultralytics-headless 8.3.190__py3-none-any.whl → 8.3.191__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/METADATA +1 -1
  2. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/RECORD +102 -101
  3. tests/test_cuda.py +6 -5
  4. tests/test_exports.py +1 -6
  5. tests/test_python.py +1 -4
  6. tests/test_solutions.py +1 -1
  7. ultralytics/__init__.py +1 -1
  8. ultralytics/cfg/__init__.py +16 -14
  9. ultralytics/cfg/datasets/VisDrone.yaml +4 -4
  10. ultralytics/data/annotator.py +6 -6
  11. ultralytics/data/augment.py +53 -51
  12. ultralytics/data/base.py +15 -13
  13. ultralytics/data/build.py +7 -4
  14. ultralytics/data/converter.py +9 -10
  15. ultralytics/data/dataset.py +24 -22
  16. ultralytics/data/loaders.py +13 -11
  17. ultralytics/data/split.py +4 -3
  18. ultralytics/data/split_dota.py +14 -12
  19. ultralytics/data/utils.py +29 -23
  20. ultralytics/engine/exporter.py +2 -2
  21. ultralytics/engine/model.py +16 -14
  22. ultralytics/engine/predictor.py +8 -6
  23. ultralytics/engine/results.py +54 -52
  24. ultralytics/engine/trainer.py +7 -2
  25. ultralytics/engine/tuner.py +4 -3
  26. ultralytics/hub/google/__init__.py +7 -6
  27. ultralytics/hub/session.py +8 -6
  28. ultralytics/hub/utils.py +3 -4
  29. ultralytics/models/fastsam/model.py +8 -6
  30. ultralytics/models/nas/model.py +5 -3
  31. ultralytics/models/rtdetr/train.py +4 -3
  32. ultralytics/models/rtdetr/val.py +6 -4
  33. ultralytics/models/sam/amg.py +13 -10
  34. ultralytics/models/sam/model.py +3 -2
  35. ultralytics/models/sam/modules/blocks.py +21 -21
  36. ultralytics/models/sam/modules/decoders.py +11 -11
  37. ultralytics/models/sam/modules/encoders.py +25 -25
  38. ultralytics/models/sam/modules/memory_attention.py +9 -8
  39. ultralytics/models/sam/modules/sam.py +8 -10
  40. ultralytics/models/sam/modules/tiny_encoder.py +21 -20
  41. ultralytics/models/sam/modules/transformer.py +6 -5
  42. ultralytics/models/sam/modules/utils.py +7 -5
  43. ultralytics/models/sam/predict.py +32 -31
  44. ultralytics/models/utils/loss.py +29 -27
  45. ultralytics/models/utils/ops.py +10 -8
  46. ultralytics/models/yolo/classify/train.py +7 -5
  47. ultralytics/models/yolo/classify/val.py +10 -8
  48. ultralytics/models/yolo/detect/predict.py +1 -1
  49. ultralytics/models/yolo/detect/train.py +8 -6
  50. ultralytics/models/yolo/detect/val.py +21 -19
  51. ultralytics/models/yolo/model.py +14 -14
  52. ultralytics/models/yolo/obb/train.py +5 -3
  53. ultralytics/models/yolo/obb/val.py +11 -9
  54. ultralytics/models/yolo/pose/train.py +7 -5
  55. ultralytics/models/yolo/pose/val.py +11 -9
  56. ultralytics/models/yolo/segment/train.py +4 -5
  57. ultralytics/models/yolo/segment/val.py +12 -10
  58. ultralytics/models/yolo/world/train.py +9 -7
  59. ultralytics/models/yolo/yoloe/train.py +7 -6
  60. ultralytics/models/yolo/yoloe/val.py +10 -8
  61. ultralytics/nn/autobackend.py +17 -19
  62. ultralytics/nn/modules/block.py +12 -12
  63. ultralytics/nn/modules/conv.py +4 -3
  64. ultralytics/nn/modules/head.py +41 -37
  65. ultralytics/nn/modules/transformer.py +22 -21
  66. ultralytics/nn/tasks.py +2 -2
  67. ultralytics/nn/text_model.py +6 -5
  68. ultralytics/solutions/analytics.py +7 -5
  69. ultralytics/solutions/config.py +12 -10
  70. ultralytics/solutions/distance_calculation.py +3 -3
  71. ultralytics/solutions/heatmap.py +4 -2
  72. ultralytics/solutions/object_counter.py +5 -3
  73. ultralytics/solutions/parking_management.py +4 -2
  74. ultralytics/solutions/region_counter.py +7 -5
  75. ultralytics/solutions/similarity_search.py +5 -3
  76. ultralytics/solutions/solutions.py +38 -36
  77. ultralytics/solutions/streamlit_inference.py +8 -7
  78. ultralytics/trackers/bot_sort.py +11 -9
  79. ultralytics/trackers/byte_tracker.py +17 -15
  80. ultralytics/trackers/utils/gmc.py +4 -3
  81. ultralytics/utils/__init__.py +16 -88
  82. ultralytics/utils/autobatch.py +3 -2
  83. ultralytics/utils/autodevice.py +10 -10
  84. ultralytics/utils/benchmarks.py +11 -10
  85. ultralytics/utils/callbacks/comet.py +9 -9
  86. ultralytics/utils/checks.py +17 -26
  87. ultralytics/utils/export.py +12 -11
  88. ultralytics/utils/files.py +8 -7
  89. ultralytics/utils/git.py +139 -0
  90. ultralytics/utils/instance.py +8 -7
  91. ultralytics/utils/loss.py +15 -13
  92. ultralytics/utils/metrics.py +62 -62
  93. ultralytics/utils/ops.py +3 -2
  94. ultralytics/utils/patches.py +6 -4
  95. ultralytics/utils/plotting.py +18 -16
  96. ultralytics/utils/torch_utils.py +4 -2
  97. ultralytics/utils/tqdm.py +15 -12
  98. ultralytics/utils/triton.py +3 -2
  99. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/WHEEL +0 -0
  100. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/entry_points.txt +0 -0
  101. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/licenses/LICENSE +0 -0
  102. {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Block modules."""
3
3
 
4
- from typing import List, Optional, Tuple
4
+ from __future__ import annotations
5
5
 
6
6
  import torch
7
7
  import torch.nn as nn
@@ -192,7 +192,7 @@ class HGBlock(nn.Module):
192
192
  class SPP(nn.Module):
193
193
  """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
194
194
 
195
- def __init__(self, c1: int, c2: int, k: Tuple[int, ...] = (5, 9, 13)):
195
+ def __init__(self, c1: int, c2: int, k: tuple[int, ...] = (5, 9, 13)):
196
196
  """
197
197
  Initialize the SPP layer with input/output channels and pooling kernel sizes.
198
198
 
@@ -471,7 +471,7 @@ class Bottleneck(nn.Module):
471
471
  """Standard bottleneck."""
472
472
 
473
473
  def __init__(
474
- self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
474
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
475
475
  ):
476
476
  """
477
477
  Initialize a standard bottleneck module.
@@ -711,7 +711,7 @@ class ImagePoolingAttn(nn.Module):
711
711
  """ImagePoolingAttn: Enhance the text embeddings with image-aware information."""
712
712
 
713
713
  def __init__(
714
- self, ec: int = 256, ch: Tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
714
+ self, ec: int = 256, ch: tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
715
715
  ):
716
716
  """
717
717
  Initialize ImagePoolingAttn module.
@@ -740,7 +740,7 @@ class ImagePoolingAttn(nn.Module):
740
740
  self.hc = ec // nh
741
741
  self.k = k
742
742
 
743
- def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
743
+ def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
744
744
  """
745
745
  Forward pass of ImagePoolingAttn.
746
746
 
@@ -856,7 +856,7 @@ class RepBottleneck(Bottleneck):
856
856
  """Rep bottleneck."""
857
857
 
858
858
  def __init__(
859
- self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
859
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
860
860
  ):
861
861
  """
862
862
  Initialize RepBottleneck.
@@ -1026,7 +1026,7 @@ class SPPELAN(nn.Module):
1026
1026
  class CBLinear(nn.Module):
1027
1027
  """CBLinear."""
1028
1028
 
1029
- def __init__(self, c1: int, c2s: List[int], k: int = 1, s: int = 1, p: Optional[int] = None, g: int = 1):
1029
+ def __init__(self, c1: int, c2s: list[int], k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
1030
1030
  """
1031
1031
  Initialize CBLinear module.
1032
1032
 
@@ -1042,7 +1042,7 @@ class CBLinear(nn.Module):
1042
1042
  self.c2s = c2s
1043
1043
  self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
1044
1044
 
1045
- def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
1045
+ def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
1046
1046
  """Forward pass through CBLinear layer."""
1047
1047
  return self.conv(x).split(self.c2s, dim=1)
1048
1048
 
@@ -1050,7 +1050,7 @@ class CBLinear(nn.Module):
1050
1050
  class CBFuse(nn.Module):
1051
1051
  """CBFuse."""
1052
1052
 
1053
- def __init__(self, idx: List[int]):
1053
+ def __init__(self, idx: list[int]):
1054
1054
  """
1055
1055
  Initialize CBFuse module.
1056
1056
 
@@ -1060,7 +1060,7 @@ class CBFuse(nn.Module):
1060
1060
  super().__init__()
1061
1061
  self.idx = idx
1062
1062
 
1063
- def forward(self, xs: List[torch.Tensor]) -> torch.Tensor:
1063
+ def forward(self, xs: list[torch.Tensor]) -> torch.Tensor:
1064
1064
  """
1065
1065
  Forward pass through CBFuse layer.
1066
1066
 
@@ -1974,7 +1974,7 @@ class Residual(nn.Module):
1974
1974
  class SAVPE(nn.Module):
1975
1975
  """Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
1976
1976
 
1977
- def __init__(self, ch: List[int], c3: int, embed: int):
1977
+ def __init__(self, ch: list[int], c3: int, embed: int):
1978
1978
  """
1979
1979
  Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
1980
1980
 
@@ -2002,7 +2002,7 @@ class SAVPE(nn.Module):
2002
2002
  self.cv5 = nn.Conv2d(1, self.c, 3, padding=1)
2003
2003
  self.cv6 = nn.Sequential(Conv(2 * self.c, self.c, 3), nn.Conv2d(self.c, self.c, 3, padding=1))
2004
2004
 
2005
- def forward(self, x: List[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
2005
+ def forward(self, x: list[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
2006
2006
  """Process input features and visual prompts to generate enhanced embeddings."""
2007
2007
  y = [self.cv2[i](xi) for i, xi in enumerate(x)]
2008
2008
  y = self.cv4(torch.cat(y, dim=1))
@@ -1,8 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Convolution modules."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import math
5
- from typing import List
6
7
 
7
8
  import numpy as np
8
9
  import torch
@@ -669,7 +670,7 @@ class Concat(nn.Module):
669
670
  super().__init__()
670
671
  self.d = dimension
671
672
 
672
- def forward(self, x: List[torch.Tensor]):
673
+ def forward(self, x: list[torch.Tensor]):
673
674
  """
674
675
  Concatenate input tensors along specified dimension.
675
676
 
@@ -700,7 +701,7 @@ class Index(nn.Module):
700
701
  super().__init__()
701
702
  self.index = index
702
703
 
703
- def forward(self, x: List[torch.Tensor]):
704
+ def forward(self, x: list[torch.Tensor]):
704
705
  """
705
706
  Select and return a particular index from input.
706
707
 
@@ -1,9 +1,10 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Model head modules."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import copy
5
7
  import math
6
- from typing import List, Optional, Tuple, Union
7
8
 
8
9
  import torch
9
10
  import torch.nn as nn
@@ -76,7 +77,7 @@ class Detect(nn.Module):
76
77
  legacy = False # backward compatibility for v3/v5/v8/v9 models
77
78
  xyxy = False # xyxy or xywh output
78
79
 
79
- def __init__(self, nc: int = 80, ch: Tuple = ()):
80
+ def __init__(self, nc: int = 80, ch: tuple = ()):
80
81
  """
81
82
  Initialize the YOLO detection layer with specified number of classes and channels.
82
83
 
@@ -112,7 +113,7 @@ class Detect(nn.Module):
112
113
  self.one2one_cv2 = copy.deepcopy(self.cv2)
113
114
  self.one2one_cv3 = copy.deepcopy(self.cv3)
114
115
 
115
- def forward(self, x: List[torch.Tensor]) -> Union[List[torch.Tensor], Tuple]:
116
+ def forward(self, x: list[torch.Tensor]) -> list[torch.Tensor] | tuple:
116
117
  """Concatenate and return predicted bounding boxes and class probabilities."""
117
118
  if self.end2end:
118
119
  return self.forward_end2end(x)
@@ -124,7 +125,7 @@ class Detect(nn.Module):
124
125
  y = self._inference(x)
125
126
  return y if self.export else (y, x)
126
127
 
127
- def forward_end2end(self, x: List[torch.Tensor]) -> Union[dict, Tuple]:
128
+ def forward_end2end(self, x: list[torch.Tensor]) -> dict | tuple:
128
129
  """
129
130
  Perform forward pass of the v10Detect module.
130
131
 
@@ -148,7 +149,7 @@ class Detect(nn.Module):
148
149
  y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
149
150
  return y if self.export else (y, {"one2many": x, "one2one": one2one})
150
151
 
151
- def _inference(self, x: List[torch.Tensor]) -> torch.Tensor:
152
+ def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
152
153
  """
153
154
  Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
154
155
 
@@ -200,7 +201,12 @@ class Detect(nn.Module):
200
201
 
201
202
  def decode_bboxes(self, bboxes: torch.Tensor, anchors: torch.Tensor, xywh: bool = True) -> torch.Tensor:
202
203
  """Decode bounding boxes from predictions."""
203
- return dist2bbox(bboxes, anchors, xywh=xywh and not (self.end2end or self.xyxy), dim=1)
204
+ return dist2bbox(
205
+ bboxes,
206
+ anchors,
207
+ xywh=xywh and not self.end2end and not self.xyxy,
208
+ dim=1,
209
+ )
204
210
 
205
211
  @staticmethod
206
212
  def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80) -> torch.Tensor:
@@ -249,7 +255,7 @@ class Segment(Detect):
249
255
  >>> outputs = segment(x)
250
256
  """
251
257
 
252
- def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: Tuple = ()):
258
+ def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: tuple = ()):
253
259
  """
254
260
  Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
255
261
 
@@ -267,7 +273,7 @@ class Segment(Detect):
267
273
  c4 = max(ch[0] // 4, self.nm)
268
274
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
269
275
 
270
- def forward(self, x: List[torch.Tensor]) -> Union[Tuple, List[torch.Tensor]]:
276
+ def forward(self, x: list[torch.Tensor]) -> tuple | list[torch.Tensor]:
271
277
  """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
272
278
  p = self.proto(x[0]) # mask protos
273
279
  bs = p.shape[0] # batch size
@@ -301,7 +307,7 @@ class OBB(Detect):
301
307
  >>> outputs = obb(x)
302
308
  """
303
309
 
304
- def __init__(self, nc: int = 80, ne: int = 1, ch: Tuple = ()):
310
+ def __init__(self, nc: int = 80, ne: int = 1, ch: tuple = ()):
305
311
  """
306
312
  Initialize OBB with number of classes `nc` and layer channels `ch`.
307
313
 
@@ -316,7 +322,7 @@ class OBB(Detect):
316
322
  c4 = max(ch[0] // 4, self.ne)
317
323
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
318
324
 
319
- def forward(self, x: List[torch.Tensor]) -> Union[torch.Tensor, Tuple]:
325
+ def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
320
326
  """Concatenate and return predicted bounding boxes and class probabilities."""
321
327
  bs = x[0].shape[0] # batch size
322
328
  angle = torch.cat([self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2) # OBB theta logits
@@ -357,7 +363,7 @@ class Pose(Detect):
357
363
  >>> outputs = pose(x)
358
364
  """
359
365
 
360
- def __init__(self, nc: int = 80, kpt_shape: Tuple = (17, 3), ch: Tuple = ()):
366
+ def __init__(self, nc: int = 80, kpt_shape: tuple = (17, 3), ch: tuple = ()):
361
367
  """
362
368
  Initialize YOLO network with default parameters and Convolutional Layers.
363
369
 
@@ -373,7 +379,7 @@ class Pose(Detect):
373
379
  c4 = max(ch[0] // 4, self.nk)
374
380
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
375
381
 
376
- def forward(self, x: List[torch.Tensor]) -> Union[torch.Tensor, Tuple]:
382
+ def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
377
383
  """Perform forward pass through YOLO model and return predictions."""
378
384
  bs = x[0].shape[0] # batch size
379
385
  kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
@@ -443,7 +449,7 @@ class Classify(nn.Module):
443
449
 
444
450
  export = False # export mode
445
451
 
446
- def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: Optional[int] = None, g: int = 1):
452
+ def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
447
453
  """
448
454
  Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
449
455
 
@@ -462,7 +468,7 @@ class Classify(nn.Module):
462
468
  self.drop = nn.Dropout(p=0.0, inplace=True)
463
469
  self.linear = nn.Linear(c_, c2) # to x(b,c2)
464
470
 
465
- def forward(self, x: Union[List[torch.Tensor], torch.Tensor]) -> Union[torch.Tensor, Tuple]:
471
+ def forward(self, x: list[torch.Tensor] | torch.Tensor) -> torch.Tensor | tuple:
466
472
  """Perform forward pass of the YOLO model on input image data."""
467
473
  if isinstance(x, list):
468
474
  x = torch.cat(x, 1)
@@ -496,7 +502,7 @@ class WorldDetect(Detect):
496
502
  >>> outputs = world_detect(x, text)
497
503
  """
498
504
 
499
- def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: Tuple = ()):
505
+ def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
500
506
  """
501
507
  Initialize YOLO detection layer with nc classes and layer channels ch.
502
508
 
@@ -511,7 +517,7 @@ class WorldDetect(Detect):
511
517
  self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
512
518
  self.cv4 = nn.ModuleList(BNContrastiveHead(embed) if with_bn else ContrastiveHead() for _ in ch)
513
519
 
514
- def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> Union[List[torch.Tensor], Tuple]:
520
+ def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> list[torch.Tensor] | tuple:
515
521
  """Concatenate and return predicted bounding boxes and class probabilities."""
516
522
  for i in range(self.nl):
517
523
  x[i] = torch.cat((self.cv2[i](x[i]), self.cv4[i](self.cv3[i](x[i]), text)), 1)
@@ -580,7 +586,7 @@ class LRPCHead(nn.Module):
580
586
  linear.bias.data = conv.bias.data
581
587
  return linear
582
588
 
583
- def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) -> Tuple[Tuple, torch.Tensor]:
589
+ def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) -> tuple[tuple, torch.Tensor]:
584
590
  """Process classification and localization features to generate detection proposals."""
585
591
  if self.enabled:
586
592
  pf_score = self.pf(cls_feat)[0, 0].flatten(0)
@@ -629,7 +635,7 @@ class YOLOEDetect(Detect):
629
635
 
630
636
  is_fused = False
631
637
 
632
- def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: Tuple = ()):
638
+ def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
633
639
  """
634
640
  Initialize YOLO detection layer with nc classes and layer channels ch.
635
641
 
@@ -642,7 +648,7 @@ class YOLOEDetect(Detect):
642
648
  super().__init__(nc, ch)
643
649
  c3 = max(ch[0], min(self.nc, 100))
644
650
  assert c3 <= embed
645
- assert with_bn is True
651
+ assert with_bn
646
652
  self.cv3 = (
647
653
  nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
648
654
  if self.legacy
@@ -709,11 +715,11 @@ class YOLOEDetect(Detect):
709
715
  self.reprta = nn.Identity()
710
716
  self.is_fused = True
711
717
 
712
- def get_tpe(self, tpe: Optional[torch.Tensor]) -> Optional[torch.Tensor]:
718
+ def get_tpe(self, tpe: torch.Tensor | None) -> torch.Tensor | None:
713
719
  """Get text prompt embeddings with normalization."""
714
720
  return None if tpe is None else F.normalize(self.reprta(tpe), dim=-1, p=2)
715
721
 
716
- def get_vpe(self, x: List[torch.Tensor], vpe: torch.Tensor) -> torch.Tensor:
722
+ def get_vpe(self, x: list[torch.Tensor], vpe: torch.Tensor) -> torch.Tensor:
717
723
  """Get visual prompt embeddings with spatial awareness."""
718
724
  if vpe.shape[1] == 0: # no visual prompt embeddings
719
725
  return torch.zeros(x[0].shape[0], 0, self.embed, device=x[0].device)
@@ -722,7 +728,7 @@ class YOLOEDetect(Detect):
722
728
  assert vpe.ndim == 3 # (B, N, D)
723
729
  return vpe
724
730
 
725
- def forward_lrpc(self, x: List[torch.Tensor], return_mask: bool = False) -> Union[torch.Tensor, Tuple]:
731
+ def forward_lrpc(self, x: list[torch.Tensor], return_mask: bool = False) -> torch.Tensor | tuple:
726
732
  """Process features with fused text embeddings to generate detections for prompt-free model."""
727
733
  masks = []
728
734
  assert self.is_fused, "Prompt-free inference requires model to be fused!"
@@ -760,9 +766,7 @@ class YOLOEDetect(Detect):
760
766
  else:
761
767
  return y if self.export else (y, x)
762
768
 
763
- def forward(
764
- self, x: List[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False
765
- ) -> Union[torch.Tensor, Tuple]:
769
+ def forward(self, x: list[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False) -> torch.Tensor | tuple:
766
770
  """Process features with class prompt embeddings to generate detections."""
767
771
  if hasattr(self, "lrpc"): # for prompt-free inference
768
772
  return self.forward_lrpc(x, return_mask)
@@ -811,7 +815,7 @@ class YOLOESegment(YOLOEDetect):
811
815
  """
812
816
 
813
817
  def __init__(
814
- self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: Tuple = ()
818
+ self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: tuple = ()
815
819
  ):
816
820
  """
817
821
  Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
@@ -832,7 +836,7 @@ class YOLOESegment(YOLOEDetect):
832
836
  c5 = max(ch[0] // 4, self.nm)
833
837
  self.cv5 = nn.ModuleList(nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nm, 1)) for x in ch)
834
838
 
835
- def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> Union[Tuple, torch.Tensor]:
839
+ def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> tuple | torch.Tensor:
836
840
  """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
837
841
  p = self.proto(x[0]) # mask protos
838
842
  bs = p.shape[0] # batch size
@@ -900,7 +904,7 @@ class RTDETRDecoder(nn.Module):
900
904
  def __init__(
901
905
  self,
902
906
  nc: int = 80,
903
- ch: Tuple = (512, 1024, 2048),
907
+ ch: tuple = (512, 1024, 2048),
904
908
  hd: int = 256, # hidden dim
905
909
  nq: int = 300, # num queries
906
910
  ndp: int = 4, # num decoder points
@@ -976,7 +980,7 @@ class RTDETRDecoder(nn.Module):
976
980
 
977
981
  self._reset_parameters()
978
982
 
979
- def forward(self, x: List[torch.Tensor], batch: Optional[dict] = None) -> Union[Tuple, torch.Tensor]:
983
+ def forward(self, x: list[torch.Tensor], batch: dict | None = None) -> tuple | torch.Tensor:
980
984
  """
981
985
  Run the forward pass of the module, returning bounding box and classification scores for the input.
982
986
 
@@ -1028,12 +1032,12 @@ class RTDETRDecoder(nn.Module):
1028
1032
 
1029
1033
  def _generate_anchors(
1030
1034
  self,
1031
- shapes: List[List[int]],
1035
+ shapes: list[list[int]],
1032
1036
  grid_size: float = 0.05,
1033
1037
  dtype: torch.dtype = torch.float32,
1034
1038
  device: str = "cpu",
1035
1039
  eps: float = 1e-2,
1036
- ) -> Tuple[torch.Tensor, torch.Tensor]:
1040
+ ) -> tuple[torch.Tensor, torch.Tensor]:
1037
1041
  """
1038
1042
  Generate anchor bounding boxes for given shapes with specific grid size and validate them.
1039
1043
 
@@ -1066,7 +1070,7 @@ class RTDETRDecoder(nn.Module):
1066
1070
  anchors = anchors.masked_fill(~valid_mask, float("inf"))
1067
1071
  return anchors, valid_mask
1068
1072
 
1069
- def _get_encoder_input(self, x: List[torch.Tensor]) -> Tuple[torch.Tensor, List[List[int]]]:
1073
+ def _get_encoder_input(self, x: list[torch.Tensor]) -> tuple[torch.Tensor, list[list[int]]]:
1070
1074
  """
1071
1075
  Process and return encoder inputs by getting projection features from input and concatenating them.
1072
1076
 
@@ -1096,10 +1100,10 @@ class RTDETRDecoder(nn.Module):
1096
1100
  def _get_decoder_input(
1097
1101
  self,
1098
1102
  feats: torch.Tensor,
1099
- shapes: List[List[int]],
1100
- dn_embed: Optional[torch.Tensor] = None,
1101
- dn_bbox: Optional[torch.Tensor] = None,
1102
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
1103
+ shapes: list[list[int]],
1104
+ dn_embed: torch.Tensor | None = None,
1105
+ dn_bbox: torch.Tensor | None = None,
1106
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
1103
1107
  """
1104
1108
  Generate and prepare the input required for the decoder from the provided features and shapes.
1105
1109
 
@@ -1204,7 +1208,7 @@ class v10Detect(Detect):
1204
1208
 
1205
1209
  end2end = True
1206
1210
 
1207
- def __init__(self, nc: int = 80, ch: Tuple = ()):
1211
+ def __init__(self, nc: int = 80, ch: tuple = ()):
1208
1212
  """
1209
1213
  Initialize the v10Detect object with the specified number of classes and input channels.
1210
1214
 
@@ -1,8 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Transformer modules."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import math
5
- from typing import List, Optional
6
7
 
7
8
  import torch
8
9
  import torch.nn as nn
@@ -88,16 +89,16 @@ class TransformerEncoderLayer(nn.Module):
88
89
  self.normalize_before = normalize_before
89
90
 
90
91
  @staticmethod
91
- def with_pos_embed(tensor: torch.Tensor, pos: Optional[torch.Tensor] = None) -> torch.Tensor:
92
+ def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None = None) -> torch.Tensor:
92
93
  """Add position embeddings to the tensor if provided."""
93
94
  return tensor if pos is None else tensor + pos
94
95
 
95
96
  def forward_post(
96
97
  self,
97
98
  src: torch.Tensor,
98
- src_mask: Optional[torch.Tensor] = None,
99
- src_key_padding_mask: Optional[torch.Tensor] = None,
100
- pos: Optional[torch.Tensor] = None,
99
+ src_mask: torch.Tensor | None = None,
100
+ src_key_padding_mask: torch.Tensor | None = None,
101
+ pos: torch.Tensor | None = None,
101
102
  ) -> torch.Tensor:
102
103
  """
103
104
  Perform forward pass with post-normalization.
@@ -122,9 +123,9 @@ class TransformerEncoderLayer(nn.Module):
122
123
  def forward_pre(
123
124
  self,
124
125
  src: torch.Tensor,
125
- src_mask: Optional[torch.Tensor] = None,
126
- src_key_padding_mask: Optional[torch.Tensor] = None,
127
- pos: Optional[torch.Tensor] = None,
126
+ src_mask: torch.Tensor | None = None,
127
+ src_key_padding_mask: torch.Tensor | None = None,
128
+ pos: torch.Tensor | None = None,
128
129
  ) -> torch.Tensor:
129
130
  """
130
131
  Perform forward pass with pre-normalization.
@@ -149,9 +150,9 @@ class TransformerEncoderLayer(nn.Module):
149
150
  def forward(
150
151
  self,
151
152
  src: torch.Tensor,
152
- src_mask: Optional[torch.Tensor] = None,
153
- src_key_padding_mask: Optional[torch.Tensor] = None,
154
- pos: Optional[torch.Tensor] = None,
153
+ src_mask: torch.Tensor | None = None,
154
+ src_key_padding_mask: torch.Tensor | None = None,
155
+ pos: torch.Tensor | None = None,
155
156
  ) -> torch.Tensor:
156
157
  """
157
158
  Forward propagate the input through the encoder module.
@@ -533,8 +534,8 @@ class MSDeformAttn(nn.Module):
533
534
  query: torch.Tensor,
534
535
  refer_bbox: torch.Tensor,
535
536
  value: torch.Tensor,
536
- value_shapes: List,
537
- value_mask: Optional[torch.Tensor] = None,
537
+ value_shapes: list,
538
+ value_mask: torch.Tensor | None = None,
538
539
  ) -> torch.Tensor:
539
540
  """
540
541
  Perform forward pass for multiscale deformable attention.
@@ -649,7 +650,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
649
650
  self.norm3 = nn.LayerNorm(d_model)
650
651
 
651
652
  @staticmethod
652
- def with_pos_embed(tensor: torch.Tensor, pos: Optional[torch.Tensor]) -> torch.Tensor:
653
+ def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None) -> torch.Tensor:
653
654
  """Add positional embeddings to the input tensor, if provided."""
654
655
  return tensor if pos is None else tensor + pos
655
656
 
@@ -672,10 +673,10 @@ class DeformableTransformerDecoderLayer(nn.Module):
672
673
  embed: torch.Tensor,
673
674
  refer_bbox: torch.Tensor,
674
675
  feats: torch.Tensor,
675
- shapes: List,
676
- padding_mask: Optional[torch.Tensor] = None,
677
- attn_mask: Optional[torch.Tensor] = None,
678
- query_pos: Optional[torch.Tensor] = None,
676
+ shapes: list,
677
+ padding_mask: torch.Tensor | None = None,
678
+ attn_mask: torch.Tensor | None = None,
679
+ query_pos: torch.Tensor | None = None,
679
680
  ) -> torch.Tensor:
680
681
  """
681
682
  Perform the forward pass through the entire decoder layer.
@@ -749,12 +750,12 @@ class DeformableTransformerDecoder(nn.Module):
749
750
  embed: torch.Tensor, # decoder embeddings
750
751
  refer_bbox: torch.Tensor, # anchor
751
752
  feats: torch.Tensor, # image features
752
- shapes: List, # feature shapes
753
+ shapes: list, # feature shapes
753
754
  bbox_head: nn.Module,
754
755
  score_head: nn.Module,
755
756
  pos_mlp: nn.Module,
756
- attn_mask: Optional[torch.Tensor] = None,
757
- padding_mask: Optional[torch.Tensor] = None,
757
+ attn_mask: torch.Tensor | None = None,
758
+ padding_mask: torch.Tensor | None = None,
758
759
  ):
759
760
  """
760
761
  Perform the forward pass through the entire decoder.
ultralytics/nn/tasks.py CHANGED
@@ -1548,7 +1548,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
1548
1548
  """
1549
1549
  ckpt, weight = torch_safe_load(weight) # load ckpt
1550
1550
  args = {**DEFAULT_CFG_DICT, **(ckpt.get("train_args", {}))} # combine model and default args, preferring model args
1551
- model = (ckpt.get("ema") or ckpt["model"]).to(device).float() # FP32 model
1551
+ model = (ckpt.get("ema") or ckpt["model"]).float() # FP32 model
1552
1552
 
1553
1553
  # Model compatibility updates
1554
1554
  model.args = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # attach args to model
@@ -1557,7 +1557,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
1557
1557
  if not hasattr(model, "stride"):
1558
1558
  model.stride = torch.tensor([32.0])
1559
1559
 
1560
- model = model.fuse().eval() if fuse and hasattr(model, "fuse") else model.eval() # model in eval mode
1560
+ model = (model.fuse() if fuse and hasattr(model, "fuse") else model).eval().to(device) # model in eval mode
1561
1561
 
1562
1562
  # Module updates
1563
1563
  for m in model.modules():
@@ -1,8 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from abc import abstractmethod
4
6
  from pathlib import Path
5
- from typing import List, Union
6
7
 
7
8
  import torch
8
9
  import torch.nn as nn
@@ -91,7 +92,7 @@ class CLIP(TextModel):
91
92
  self.device = device
92
93
  self.eval()
93
94
 
94
- def tokenize(self, texts: Union[str, List[str]]) -> torch.Tensor:
95
+ def tokenize(self, texts: str | list[str]) -> torch.Tensor:
95
96
  """
96
97
  Convert input texts to CLIP tokens.
97
98
 
@@ -135,7 +136,7 @@ class CLIP(TextModel):
135
136
  return txt_feats
136
137
 
137
138
  @smart_inference_mode()
138
- def encode_image(self, image: Union[Image.Image, torch.Tensor], dtype: torch.dtype = torch.float32) -> torch.Tensor:
139
+ def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
139
140
  """
140
141
  Encode preprocessed images into normalized feature vectors.
141
142
 
@@ -234,7 +235,7 @@ class MobileCLIP(TextModel):
234
235
  self.device = device
235
236
  self.eval()
236
237
 
237
- def tokenize(self, texts: List[str]) -> torch.Tensor:
238
+ def tokenize(self, texts: list[str]) -> torch.Tensor:
238
239
  """
239
240
  Convert input texts to MobileCLIP tokens.
240
241
 
@@ -319,7 +320,7 @@ class MobileCLIPTS(TextModel):
319
320
  self.tokenizer = clip.clip.tokenize
320
321
  self.device = device
321
322
 
322
- def tokenize(self, texts: List[str]) -> torch.Tensor:
323
+ def tokenize(self, texts: list[str]) -> torch.Tensor:
323
324
  """
324
325
  Convert input texts to MobileCLIP tokens.
325
326
 
@@ -1,7 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from itertools import cycle
4
- from typing import Any, Dict, Optional
6
+ from typing import Any
5
7
 
6
8
  import cv2
7
9
  import numpy as np
@@ -135,7 +137,7 @@ class Analytics(BaseSolution):
135
137
  return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
136
138
 
137
139
  def update_graph(
138
- self, frame_number: int, count_dict: Optional[Dict[str, int]] = None, plot: str = "line"
140
+ self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
139
141
  ) -> np.ndarray:
140
142
  """
141
143
  Update the graph with new data for single or multiple classes.
@@ -204,7 +206,7 @@ class Analytics(BaseSolution):
204
206
  markersize=self.line_width * 5,
205
207
  label=f"{key} Data Points",
206
208
  )
207
- if plot == "bar":
209
+ elif plot == "bar":
208
210
  self.ax.clear() # clear bar data
209
211
  for label in labels: # Map labels to colors
210
212
  if label not in self.color_mapping:
@@ -224,12 +226,12 @@ class Analytics(BaseSolution):
224
226
  for bar, label in zip(bars, labels):
225
227
  bar.set_label(label) # Assign label to each bar
226
228
  self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
227
- if plot == "pie":
229
+ elif plot == "pie":
228
230
  total = sum(counts)
229
231
  percentages = [size / total * 100 for size in counts]
230
- start_angle = 90
231
232
  self.ax.clear()
232
233
 
234
+ start_angle = 90
233
235
  # Create pie chart and create legend labels with percentages
234
236
  wedges, _ = self.ax.pie(
235
237
  counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None