dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
  2. dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
  3. tests/__init__.py +5 -7
  4. tests/conftest.py +8 -15
  5. tests/test_cli.py +8 -10
  6. tests/test_cuda.py +9 -10
  7. tests/test_engine.py +29 -2
  8. tests/test_exports.py +69 -21
  9. tests/test_integrations.py +8 -11
  10. tests/test_python.py +109 -71
  11. tests/test_solutions.py +170 -159
  12. ultralytics/__init__.py +27 -9
  13. ultralytics/cfg/__init__.py +57 -64
  14. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  15. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  16. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  17. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  18. ultralytics/cfg/datasets/Objects365.yaml +19 -15
  19. ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
  20. ultralytics/cfg/datasets/VOC.yaml +19 -21
  21. ultralytics/cfg/datasets/VisDrone.yaml +5 -5
  22. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  23. ultralytics/cfg/datasets/coco-pose.yaml +24 -2
  24. ultralytics/cfg/datasets/coco.yaml +2 -2
  25. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  26. ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
  27. ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
  28. ultralytics/cfg/datasets/dog-pose.yaml +28 -0
  29. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  30. ultralytics/cfg/datasets/dota8.yaml +2 -2
  31. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
  32. ultralytics/cfg/datasets/kitti.yaml +27 -0
  33. ultralytics/cfg/datasets/lvis.yaml +7 -7
  34. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  35. ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
  36. ultralytics/cfg/datasets/xView.yaml +16 -16
  37. ultralytics/cfg/default.yaml +96 -94
  38. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  39. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  40. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  41. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
  42. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
  43. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
  44. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
  45. ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
  46. ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
  47. ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
  48. ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
  49. ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
  50. ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
  51. ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
  52. ultralytics/cfg/models/v6/yolov6.yaml +1 -1
  53. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  54. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  55. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  56. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  57. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  58. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  59. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  60. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  61. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  62. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  63. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  64. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
  65. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  66. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  67. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  68. ultralytics/cfg/trackers/botsort.yaml +16 -17
  69. ultralytics/cfg/trackers/bytetrack.yaml +9 -11
  70. ultralytics/data/__init__.py +4 -4
  71. ultralytics/data/annotator.py +3 -4
  72. ultralytics/data/augment.py +286 -476
  73. ultralytics/data/base.py +18 -26
  74. ultralytics/data/build.py +151 -26
  75. ultralytics/data/converter.py +38 -50
  76. ultralytics/data/dataset.py +47 -75
  77. ultralytics/data/loaders.py +42 -49
  78. ultralytics/data/split.py +5 -6
  79. ultralytics/data/split_dota.py +8 -15
  80. ultralytics/data/utils.py +41 -45
  81. ultralytics/engine/exporter.py +462 -462
  82. ultralytics/engine/model.py +150 -191
  83. ultralytics/engine/predictor.py +30 -40
  84. ultralytics/engine/results.py +177 -311
  85. ultralytics/engine/trainer.py +193 -120
  86. ultralytics/engine/tuner.py +77 -63
  87. ultralytics/engine/validator.py +39 -22
  88. ultralytics/hub/__init__.py +16 -19
  89. ultralytics/hub/auth.py +6 -12
  90. ultralytics/hub/google/__init__.py +7 -10
  91. ultralytics/hub/session.py +15 -25
  92. ultralytics/hub/utils.py +5 -8
  93. ultralytics/models/__init__.py +1 -1
  94. ultralytics/models/fastsam/__init__.py +1 -1
  95. ultralytics/models/fastsam/model.py +8 -10
  96. ultralytics/models/fastsam/predict.py +19 -30
  97. ultralytics/models/fastsam/utils.py +1 -2
  98. ultralytics/models/fastsam/val.py +5 -7
  99. ultralytics/models/nas/__init__.py +1 -1
  100. ultralytics/models/nas/model.py +5 -8
  101. ultralytics/models/nas/predict.py +7 -9
  102. ultralytics/models/nas/val.py +1 -2
  103. ultralytics/models/rtdetr/__init__.py +1 -1
  104. ultralytics/models/rtdetr/model.py +7 -8
  105. ultralytics/models/rtdetr/predict.py +15 -19
  106. ultralytics/models/rtdetr/train.py +10 -13
  107. ultralytics/models/rtdetr/val.py +21 -23
  108. ultralytics/models/sam/__init__.py +15 -2
  109. ultralytics/models/sam/amg.py +14 -20
  110. ultralytics/models/sam/build.py +26 -19
  111. ultralytics/models/sam/build_sam3.py +377 -0
  112. ultralytics/models/sam/model.py +29 -32
  113. ultralytics/models/sam/modules/blocks.py +83 -144
  114. ultralytics/models/sam/modules/decoders.py +22 -40
  115. ultralytics/models/sam/modules/encoders.py +44 -101
  116. ultralytics/models/sam/modules/memory_attention.py +16 -30
  117. ultralytics/models/sam/modules/sam.py +206 -79
  118. ultralytics/models/sam/modules/tiny_encoder.py +64 -83
  119. ultralytics/models/sam/modules/transformer.py +18 -28
  120. ultralytics/models/sam/modules/utils.py +174 -50
  121. ultralytics/models/sam/predict.py +2268 -366
  122. ultralytics/models/sam/sam3/__init__.py +3 -0
  123. ultralytics/models/sam/sam3/decoder.py +546 -0
  124. ultralytics/models/sam/sam3/encoder.py +529 -0
  125. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  126. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  127. ultralytics/models/sam/sam3/model_misc.py +199 -0
  128. ultralytics/models/sam/sam3/necks.py +129 -0
  129. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  130. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  131. ultralytics/models/sam/sam3/vitdet.py +547 -0
  132. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  133. ultralytics/models/utils/loss.py +14 -26
  134. ultralytics/models/utils/ops.py +13 -17
  135. ultralytics/models/yolo/__init__.py +1 -1
  136. ultralytics/models/yolo/classify/predict.py +9 -12
  137. ultralytics/models/yolo/classify/train.py +15 -41
  138. ultralytics/models/yolo/classify/val.py +34 -32
  139. ultralytics/models/yolo/detect/predict.py +8 -11
  140. ultralytics/models/yolo/detect/train.py +13 -32
  141. ultralytics/models/yolo/detect/val.py +75 -63
  142. ultralytics/models/yolo/model.py +37 -53
  143. ultralytics/models/yolo/obb/predict.py +5 -14
  144. ultralytics/models/yolo/obb/train.py +11 -14
  145. ultralytics/models/yolo/obb/val.py +42 -39
  146. ultralytics/models/yolo/pose/__init__.py +1 -1
  147. ultralytics/models/yolo/pose/predict.py +7 -22
  148. ultralytics/models/yolo/pose/train.py +10 -22
  149. ultralytics/models/yolo/pose/val.py +40 -59
  150. ultralytics/models/yolo/segment/predict.py +16 -20
  151. ultralytics/models/yolo/segment/train.py +3 -12
  152. ultralytics/models/yolo/segment/val.py +106 -56
  153. ultralytics/models/yolo/world/train.py +12 -16
  154. ultralytics/models/yolo/world/train_world.py +11 -34
  155. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  156. ultralytics/models/yolo/yoloe/predict.py +16 -23
  157. ultralytics/models/yolo/yoloe/train.py +31 -56
  158. ultralytics/models/yolo/yoloe/train_seg.py +5 -10
  159. ultralytics/models/yolo/yoloe/val.py +16 -21
  160. ultralytics/nn/__init__.py +7 -7
  161. ultralytics/nn/autobackend.py +152 -80
  162. ultralytics/nn/modules/__init__.py +60 -60
  163. ultralytics/nn/modules/activation.py +4 -6
  164. ultralytics/nn/modules/block.py +133 -217
  165. ultralytics/nn/modules/conv.py +52 -97
  166. ultralytics/nn/modules/head.py +64 -116
  167. ultralytics/nn/modules/transformer.py +79 -89
  168. ultralytics/nn/modules/utils.py +16 -21
  169. ultralytics/nn/tasks.py +111 -156
  170. ultralytics/nn/text_model.py +40 -67
  171. ultralytics/solutions/__init__.py +12 -12
  172. ultralytics/solutions/ai_gym.py +11 -17
  173. ultralytics/solutions/analytics.py +15 -16
  174. ultralytics/solutions/config.py +5 -6
  175. ultralytics/solutions/distance_calculation.py +10 -13
  176. ultralytics/solutions/heatmap.py +7 -13
  177. ultralytics/solutions/instance_segmentation.py +5 -8
  178. ultralytics/solutions/object_blurrer.py +7 -10
  179. ultralytics/solutions/object_counter.py +12 -19
  180. ultralytics/solutions/object_cropper.py +8 -14
  181. ultralytics/solutions/parking_management.py +33 -31
  182. ultralytics/solutions/queue_management.py +10 -12
  183. ultralytics/solutions/region_counter.py +9 -12
  184. ultralytics/solutions/security_alarm.py +15 -20
  185. ultralytics/solutions/similarity_search.py +13 -17
  186. ultralytics/solutions/solutions.py +75 -74
  187. ultralytics/solutions/speed_estimation.py +7 -10
  188. ultralytics/solutions/streamlit_inference.py +4 -7
  189. ultralytics/solutions/templates/similarity-search.html +7 -18
  190. ultralytics/solutions/trackzone.py +7 -10
  191. ultralytics/solutions/vision_eye.py +5 -8
  192. ultralytics/trackers/__init__.py +1 -1
  193. ultralytics/trackers/basetrack.py +3 -5
  194. ultralytics/trackers/bot_sort.py +10 -27
  195. ultralytics/trackers/byte_tracker.py +14 -30
  196. ultralytics/trackers/track.py +3 -6
  197. ultralytics/trackers/utils/gmc.py +11 -22
  198. ultralytics/trackers/utils/kalman_filter.py +37 -48
  199. ultralytics/trackers/utils/matching.py +12 -15
  200. ultralytics/utils/__init__.py +116 -116
  201. ultralytics/utils/autobatch.py +2 -4
  202. ultralytics/utils/autodevice.py +17 -18
  203. ultralytics/utils/benchmarks.py +70 -70
  204. ultralytics/utils/callbacks/base.py +8 -10
  205. ultralytics/utils/callbacks/clearml.py +5 -13
  206. ultralytics/utils/callbacks/comet.py +32 -46
  207. ultralytics/utils/callbacks/dvc.py +13 -18
  208. ultralytics/utils/callbacks/mlflow.py +4 -5
  209. ultralytics/utils/callbacks/neptune.py +7 -15
  210. ultralytics/utils/callbacks/platform.py +314 -38
  211. ultralytics/utils/callbacks/raytune.py +3 -4
  212. ultralytics/utils/callbacks/tensorboard.py +23 -31
  213. ultralytics/utils/callbacks/wb.py +10 -13
  214. ultralytics/utils/checks.py +151 -87
  215. ultralytics/utils/cpu.py +3 -8
  216. ultralytics/utils/dist.py +19 -15
  217. ultralytics/utils/downloads.py +29 -41
  218. ultralytics/utils/errors.py +6 -14
  219. ultralytics/utils/events.py +2 -4
  220. ultralytics/utils/export/__init__.py +7 -0
  221. ultralytics/utils/{export.py → export/engine.py} +16 -16
  222. ultralytics/utils/export/imx.py +325 -0
  223. ultralytics/utils/export/tensorflow.py +231 -0
  224. ultralytics/utils/files.py +24 -28
  225. ultralytics/utils/git.py +9 -11
  226. ultralytics/utils/instance.py +30 -51
  227. ultralytics/utils/logger.py +212 -114
  228. ultralytics/utils/loss.py +15 -24
  229. ultralytics/utils/metrics.py +131 -160
  230. ultralytics/utils/nms.py +21 -30
  231. ultralytics/utils/ops.py +107 -165
  232. ultralytics/utils/patches.py +33 -21
  233. ultralytics/utils/plotting.py +122 -119
  234. ultralytics/utils/tal.py +28 -44
  235. ultralytics/utils/torch_utils.py +70 -187
  236. ultralytics/utils/tqdm.py +20 -20
  237. ultralytics/utils/triton.py +13 -19
  238. ultralytics/utils/tuner.py +17 -5
  239. dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
  240. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
  241. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
  242. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
  243. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
@@ -12,20 +12,19 @@ import torch.nn.functional as F
12
12
  from torch.nn.init import constant_, xavier_uniform_
13
13
 
14
14
  from ultralytics.utils import NOT_MACOS14
15
- from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
16
- from ultralytics.utils.torch_utils import disable_dynamo, fuse_conv_and_bn, smart_inference_mode
15
+ from ultralytics.utils.tal import dist2bbox, dist2rbox, make_anchors
16
+ from ultralytics.utils.torch_utils import TORCH_1_11, fuse_conv_and_bn, smart_inference_mode
17
17
 
18
18
  from .block import DFL, SAVPE, BNContrastiveHead, ContrastiveHead, Proto, Residual, SwiGLUFFN
19
19
  from .conv import Conv, DWConv
20
20
  from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
21
21
  from .utils import bias_init_with_prob, linear_init
22
22
 
23
- __all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder", "v10Detect", "YOLOEDetect", "YOLOESegment"
23
+ __all__ = "OBB", "Classify", "Detect", "Pose", "RTDETRDecoder", "Segment", "YOLOEDetect", "YOLOESegment", "v10Detect"
24
24
 
25
25
 
26
26
  class Detect(nn.Module):
27
- """
28
- YOLO Detect head for object detection models.
27
+ """YOLO Detect head for object detection models.
29
28
 
30
29
  This class implements the detection head used in YOLO models for predicting bounding boxes and class probabilities.
31
30
  It supports both training and inference modes, with optional end-to-end detection capabilities.
@@ -78,8 +77,7 @@ class Detect(nn.Module):
78
77
  xyxy = False # xyxy or xywh output
79
78
 
80
79
  def __init__(self, nc: int = 80, ch: tuple = ()):
81
- """
82
- Initialize the YOLO detection layer with specified number of classes and channels.
80
+ """Initialize the YOLO detection layer with specified number of classes and channels.
83
81
 
84
82
  Args:
85
83
  nc (int): Number of classes.
@@ -126,15 +124,14 @@ class Detect(nn.Module):
126
124
  return y if self.export else (y, x)
127
125
 
128
126
  def forward_end2end(self, x: list[torch.Tensor]) -> dict | tuple:
129
- """
130
- Perform forward pass of the v10Detect module.
127
+ """Perform forward pass of the v10Detect module.
131
128
 
132
129
  Args:
133
130
  x (list[torch.Tensor]): Input feature maps from different levels.
134
131
 
135
132
  Returns:
136
- outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs.
137
- Inference mode returns processed detections or tuple with detections and raw outputs.
133
+ outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs. Inference mode returns
134
+ processed detections or tuple with detections and raw outputs.
138
135
  """
139
136
  x_detach = [xi.detach() for xi in x]
140
137
  one2one = [
@@ -149,10 +146,8 @@ class Detect(nn.Module):
149
146
  y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
150
147
  return y if self.export else (y, {"one2many": x, "one2one": one2one})
151
148
 
152
- @disable_dynamo
153
149
  def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
154
- """
155
- Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
150
+ """Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
156
151
 
157
152
  Args:
158
153
  x (list[torch.Tensor]): List of feature maps from different detection layers.
@@ -163,28 +158,12 @@ class Detect(nn.Module):
163
158
  # Inference path
164
159
  shape = x[0].shape # BCHW
165
160
  x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
166
- if self.format != "imx" and (self.dynamic or self.shape != shape):
161
+ if self.dynamic or self.shape != shape:
167
162
  self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
168
163
  self.shape = shape
169
164
 
170
- if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}: # avoid TF FlexSplitV ops
171
- box = x_cat[:, : self.reg_max * 4]
172
- cls = x_cat[:, self.reg_max * 4 :]
173
- else:
174
- box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
175
-
176
- if self.export and self.format in {"tflite", "edgetpu"}:
177
- # Precompute normalization factor to increase numerical stability
178
- # See https://github.com/ultralytics/ultralytics/issues/7371
179
- grid_h = shape[2]
180
- grid_w = shape[3]
181
- grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
182
- norm = self.strides / (self.stride[0] * grid_size)
183
- dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
184
- else:
185
- dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
186
- if self.export and self.format == "imx":
187
- return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
165
+ box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
166
+ dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
188
167
  return torch.cat((dbox, cls.sigmoid()), 1)
189
168
 
190
169
  def bias_init(self):
@@ -211,8 +190,7 @@ class Detect(nn.Module):
211
190
 
212
191
  @staticmethod
213
192
  def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80) -> torch.Tensor:
214
- """
215
- Post-process YOLO model predictions.
193
+ """Post-process YOLO model predictions.
216
194
 
217
195
  Args:
218
196
  preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
@@ -235,8 +213,7 @@ class Detect(nn.Module):
235
213
 
236
214
 
237
215
  class Segment(Detect):
238
- """
239
- YOLO Segment head for segmentation models.
216
+ """YOLO Segment head for segmentation models.
240
217
 
241
218
  This class extends the Detect head to include mask prediction capabilities for instance segmentation tasks.
242
219
 
@@ -257,8 +234,7 @@ class Segment(Detect):
257
234
  """
258
235
 
259
236
  def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: tuple = ()):
260
- """
261
- Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
237
+ """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
262
238
 
263
239
  Args:
264
240
  nc (int): Number of classes.
@@ -287,8 +263,7 @@ class Segment(Detect):
287
263
 
288
264
 
289
265
  class OBB(Detect):
290
- """
291
- YOLO OBB detection head for detection with rotation models.
266
+ """YOLO OBB detection head for detection with rotation models.
292
267
 
293
268
  This class extends the Detect head to include oriented bounding box prediction with rotation angles.
294
269
 
@@ -309,8 +284,7 @@ class OBB(Detect):
309
284
  """
310
285
 
311
286
  def __init__(self, nc: int = 80, ne: int = 1, ch: tuple = ()):
312
- """
313
- Initialize OBB with number of classes `nc` and layer channels `ch`.
287
+ """Initialize OBB with number of classes `nc` and layer channels `ch`.
314
288
 
315
289
  Args:
316
290
  nc (int): Number of classes.
@@ -343,8 +317,7 @@ class OBB(Detect):
343
317
 
344
318
 
345
319
  class Pose(Detect):
346
- """
347
- YOLO Pose head for keypoints models.
320
+ """YOLO Pose head for keypoints models.
348
321
 
349
322
  This class extends the Detect head to include keypoint prediction capabilities for pose estimation tasks.
350
323
 
@@ -365,8 +338,7 @@ class Pose(Detect):
365
338
  """
366
339
 
367
340
  def __init__(self, nc: int = 80, kpt_shape: tuple = (17, 3), ch: tuple = ()):
368
- """
369
- Initialize YOLO network with default parameters and Convolutional Layers.
341
+ """Initialize YOLO network with default parameters and Convolutional Layers.
370
342
 
371
343
  Args:
372
344
  nc (int): Number of classes.
@@ -388,28 +360,15 @@ class Pose(Detect):
388
360
  if self.training:
389
361
  return x, kpt
390
362
  pred_kpt = self.kpts_decode(bs, kpt)
391
- if self.export and self.format == "imx":
392
- return (*x, pred_kpt.permute(0, 2, 1))
393
363
  return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
394
364
 
395
365
  def kpts_decode(self, bs: int, kpts: torch.Tensor) -> torch.Tensor:
396
366
  """Decode keypoints from predictions."""
397
367
  ndim = self.kpt_shape[1]
398
368
  if self.export:
399
- if self.format in {
400
- "tflite",
401
- "edgetpu",
402
- }: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
403
- # Precompute normalization factor to increase numerical stability
404
- y = kpts.view(bs, *self.kpt_shape, -1)
405
- grid_h, grid_w = self.shape[2], self.shape[3]
406
- grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
407
- norm = self.strides / (self.stride[0] * grid_size)
408
- a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
409
- else:
410
- # NCNN fix
411
- y = kpts.view(bs, *self.kpt_shape, -1)
412
- a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
369
+ # NCNN fix
370
+ y = kpts.view(bs, *self.kpt_shape, -1)
371
+ a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
413
372
  if ndim == 3:
414
373
  a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
415
374
  return a.view(bs, self.nk, -1)
@@ -426,8 +385,7 @@ class Pose(Detect):
426
385
 
427
386
 
428
387
  class Classify(nn.Module):
429
- """
430
- YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).
388
+ """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).
431
389
 
432
390
  This class implements a classification head that transforms feature maps into class predictions.
433
391
 
@@ -451,8 +409,7 @@ class Classify(nn.Module):
451
409
  export = False # export mode
452
410
 
453
411
  def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
454
- """
455
- Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
412
+ """Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
456
413
 
457
414
  Args:
458
415
  c1 (int): Number of input channels.
@@ -481,11 +438,10 @@ class Classify(nn.Module):
481
438
 
482
439
 
483
440
  class WorldDetect(Detect):
484
- """
485
- Head for integrating YOLO detection models with semantic understanding from text embeddings.
441
+ """Head for integrating YOLO detection models with semantic understanding from text embeddings.
486
442
 
487
- This class extends the standard Detect head to incorporate text embeddings for enhanced semantic understanding
488
- in object detection tasks.
443
+ This class extends the standard Detect head to incorporate text embeddings for enhanced semantic understanding in
444
+ object detection tasks.
489
445
 
490
446
  Attributes:
491
447
  cv3 (nn.ModuleList): Convolution layers for embedding features.
@@ -504,8 +460,7 @@ class WorldDetect(Detect):
504
460
  """
505
461
 
506
462
  def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
507
- """
508
- Initialize YOLO detection layer with nc classes and layer channels ch.
463
+ """Initialize YOLO detection layer with nc classes and layer channels ch.
509
464
 
510
465
  Args:
511
466
  nc (int): Number of classes.
@@ -539,11 +494,10 @@ class WorldDetect(Detect):
539
494
 
540
495
 
541
496
  class LRPCHead(nn.Module):
542
- """
543
- Lightweight Region Proposal and Classification Head for efficient object detection.
497
+ """Lightweight Region Proposal and Classification Head for efficient object detection.
544
498
 
545
- This head combines region proposal filtering with classification to enable efficient detection with
546
- dynamic vocabulary support.
499
+ This head combines region proposal filtering with classification to enable efficient detection with dynamic
500
+ vocabulary support.
547
501
 
548
502
  Attributes:
549
503
  vocab (nn.Module): Vocabulary/classification layer.
@@ -564,8 +518,7 @@ class LRPCHead(nn.Module):
564
518
  """
565
519
 
566
520
  def __init__(self, vocab: nn.Module, pf: nn.Module, loc: nn.Module, enabled: bool = True):
567
- """
568
- Initialize LRPCHead with vocabulary, proposal filter, and localization components.
521
+ """Initialize LRPCHead with vocabulary, proposal filter, and localization components.
569
522
 
570
523
  Args:
571
524
  vocab (nn.Module): Vocabulary/classification module.
@@ -579,7 +532,8 @@ class LRPCHead(nn.Module):
579
532
  self.loc = loc
580
533
  self.enabled = enabled
581
534
 
582
- def conv2linear(self, conv: nn.Conv2d) -> nn.Linear:
535
+ @staticmethod
536
+ def conv2linear(conv: nn.Conv2d) -> nn.Linear:
583
537
  """Convert a 1x1 convolutional layer to a linear layer."""
584
538
  assert isinstance(conv, nn.Conv2d) and conv.kernel_size == (1, 1)
585
539
  linear = nn.Linear(conv.in_channels, conv.out_channels)
@@ -604,8 +558,7 @@ class LRPCHead(nn.Module):
604
558
 
605
559
 
606
560
  class YOLOEDetect(Detect):
607
- """
608
- Head for integrating YOLO detection models with semantic understanding from text embeddings.
561
+ """Head for integrating YOLO detection models with semantic understanding from text embeddings.
609
562
 
610
563
  This class extends the standard Detect head to support text-guided detection with enhanced semantic understanding
611
564
  through text embeddings and visual prompt embeddings.
@@ -637,8 +590,7 @@ class YOLOEDetect(Detect):
637
590
  is_fused = False
638
591
 
639
592
  def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
640
- """
641
- Initialize YOLO detection layer with nc classes and layer channels ch.
593
+ """Initialize YOLO detection layer with nc classes and layer channels ch.
642
594
 
643
595
  Args:
644
596
  nc (int): Number of classes.
@@ -792,11 +744,10 @@ class YOLOEDetect(Detect):
792
744
 
793
745
 
794
746
  class YOLOESegment(YOLOEDetect):
795
- """
796
- YOLO segmentation head with text embedding capabilities.
747
+ """YOLO segmentation head with text embedding capabilities.
797
748
 
798
- This class extends YOLOEDetect to include mask prediction capabilities for instance segmentation tasks
799
- with text-guided semantic understanding.
749
+ This class extends YOLOEDetect to include mask prediction capabilities for instance segmentation tasks with
750
+ text-guided semantic understanding.
800
751
 
801
752
  Attributes:
802
753
  nm (int): Number of masks.
@@ -818,8 +769,7 @@ class YOLOESegment(YOLOEDetect):
818
769
  def __init__(
819
770
  self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: tuple = ()
820
771
  ):
821
- """
822
- Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
772
+ """Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
823
773
 
824
774
  Args:
825
775
  nc (int): Number of classes.
@@ -860,8 +810,7 @@ class YOLOESegment(YOLOEDetect):
860
810
 
861
811
 
862
812
  class RTDETRDecoder(nn.Module):
863
- """
864
- Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
813
+ """Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
865
814
 
866
815
  This decoder module utilizes Transformer architecture along with deformable convolutions to predict bounding boxes
867
816
  and class labels for objects in an image. It integrates features from multiple layers and runs through a series of
@@ -901,6 +850,10 @@ class RTDETRDecoder(nn.Module):
901
850
  """
902
851
 
903
852
  export = False # export mode
853
+ shapes = []
854
+ anchors = torch.empty(0)
855
+ valid_mask = torch.empty(0)
856
+ dynamic = False
904
857
 
905
858
  def __init__(
906
859
  self,
@@ -921,8 +874,7 @@ class RTDETRDecoder(nn.Module):
921
874
  box_noise_scale: float = 1.0,
922
875
  learnt_init_query: bool = False,
923
876
  ):
924
- """
925
- Initialize the RTDETRDecoder module with the given parameters.
877
+ """Initialize the RTDETRDecoder module with the given parameters.
926
878
 
927
879
  Args:
928
880
  nc (int): Number of classes.
@@ -982,8 +934,7 @@ class RTDETRDecoder(nn.Module):
982
934
  self._reset_parameters()
983
935
 
984
936
  def forward(self, x: list[torch.Tensor], batch: dict | None = None) -> tuple | torch.Tensor:
985
- """
986
- Run the forward pass of the module, returning bounding box and classification scores for the input.
937
+ """Run the forward pass of the module, returning bounding box and classification scores for the input.
987
938
 
988
939
  Args:
989
940
  x (list[torch.Tensor]): List of feature maps from the backbone.
@@ -1031,16 +982,15 @@ class RTDETRDecoder(nn.Module):
1031
982
  y = torch.cat((dec_bboxes.squeeze(0), dec_scores.squeeze(0).sigmoid()), -1)
1032
983
  return y if self.export else (y, x)
1033
984
 
985
+ @staticmethod
1034
986
  def _generate_anchors(
1035
- self,
1036
987
  shapes: list[list[int]],
1037
988
  grid_size: float = 0.05,
1038
989
  dtype: torch.dtype = torch.float32,
1039
990
  device: str = "cpu",
1040
991
  eps: float = 1e-2,
1041
992
  ) -> tuple[torch.Tensor, torch.Tensor]:
1042
- """
1043
- Generate anchor bounding boxes for given shapes with specific grid size and validate them.
993
+ """Generate anchor bounding boxes for given shapes with specific grid size and validate them.
1044
994
 
1045
995
  Args:
1046
996
  shapes (list): List of feature map shapes.
@@ -1057,7 +1007,7 @@ class RTDETRDecoder(nn.Module):
1057
1007
  for i, (h, w) in enumerate(shapes):
1058
1008
  sy = torch.arange(end=h, dtype=dtype, device=device)
1059
1009
  sx = torch.arange(end=w, dtype=dtype, device=device)
1060
- grid_y, grid_x = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
1010
+ grid_y, grid_x = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_11 else torch.meshgrid(sy, sx)
1061
1011
  grid_xy = torch.stack([grid_x, grid_y], -1) # (h, w, 2)
1062
1012
 
1063
1013
  valid_WH = torch.tensor([w, h], dtype=dtype, device=device)
@@ -1072,8 +1022,7 @@ class RTDETRDecoder(nn.Module):
1072
1022
  return anchors, valid_mask
1073
1023
 
1074
1024
  def _get_encoder_input(self, x: list[torch.Tensor]) -> tuple[torch.Tensor, list[list[int]]]:
1075
- """
1076
- Process and return encoder inputs by getting projection features from input and concatenating them.
1025
+ """Process and return encoder inputs by getting projection features from input and concatenating them.
1077
1026
 
1078
1027
  Args:
1079
1028
  x (list[torch.Tensor]): List of feature maps from the backbone.
@@ -1105,8 +1054,7 @@ class RTDETRDecoder(nn.Module):
1105
1054
  dn_embed: torch.Tensor | None = None,
1106
1055
  dn_bbox: torch.Tensor | None = None,
1107
1056
  ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
1108
- """
1109
- Generate and prepare the input required for the decoder from the provided features and shapes.
1057
+ """Generate and prepare the input required for the decoder from the provided features and shapes.
1110
1058
 
1111
1059
  Args:
1112
1060
  feats (torch.Tensor): Processed features from encoder.
@@ -1121,22 +1069,24 @@ class RTDETRDecoder(nn.Module):
1121
1069
  enc_scores (torch.Tensor): Encoded scores.
1122
1070
  """
1123
1071
  bs = feats.shape[0]
1124
- # Prepare input for decoder
1125
- anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
1126
- features = self.enc_output(valid_mask * feats) # bs, h*w, 256
1072
+ if self.dynamic or self.shapes != shapes:
1073
+ self.anchors, self.valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
1074
+ self.shapes = shapes
1127
1075
 
1076
+ # Prepare input for decoder
1077
+ features = self.enc_output(self.valid_mask * feats) # bs, h*w, 256
1128
1078
  enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
1129
1079
 
1130
1080
  # Query selection
1131
- # (bs, num_queries)
1081
+ # (bs*num_queries,)
1132
1082
  topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
1133
- # (bs, num_queries)
1083
+ # (bs*num_queries,)
1134
1084
  batch_ind = torch.arange(end=bs, dtype=topk_ind.dtype).unsqueeze(-1).repeat(1, self.num_queries).view(-1)
1135
1085
 
1136
1086
  # (bs, num_queries, 256)
1137
1087
  top_k_features = features[batch_ind, topk_ind].view(bs, self.num_queries, -1)
1138
1088
  # (bs, num_queries, 4)
1139
- top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
1089
+ top_k_anchors = self.anchors[:, topk_ind].view(bs, self.num_queries, -1)
1140
1090
 
1141
1091
  # Dynamic anchors + static content
1142
1092
  refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
@@ -1182,11 +1132,10 @@ class RTDETRDecoder(nn.Module):
1182
1132
 
1183
1133
 
1184
1134
  class v10Detect(Detect):
1185
- """
1186
- v10 Detection head from https://arxiv.org/pdf/2405.14458.
1135
+ """v10 Detection head from https://arxiv.org/pdf/2405.14458.
1187
1136
 
1188
- This class implements the YOLOv10 detection head with dual-assignment training and consistent dual predictions
1189
- for improved efficiency and performance.
1137
+ This class implements the YOLOv10 detection head with dual-assignment training and consistent dual predictions for
1138
+ improved efficiency and performance.
1190
1139
 
1191
1140
  Attributes:
1192
1141
  end2end (bool): End-to-end detection mode.
@@ -1210,8 +1159,7 @@ class v10Detect(Detect):
1210
1159
  end2end = True
1211
1160
 
1212
1161
  def __init__(self, nc: int = 80, ch: tuple = ()):
1213
- """
1214
- Initialize the v10Detect object with the specified number of classes and input channels.
1162
+ """Initialize the v10Detect object with the specified number of classes and input channels.
1215
1163
 
1216
1164
  Args:
1217
1165
  nc (int): Number of classes.