dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/METADATA +64 -74
  2. dgenerate_ultralytics_headless-8.4.7.dist-info/RECORD +311 -0
  3. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/WHEEL +1 -1
  4. tests/__init__.py +7 -9
  5. tests/conftest.py +8 -15
  6. tests/test_cli.py +1 -1
  7. tests/test_cuda.py +13 -10
  8. tests/test_engine.py +9 -9
  9. tests/test_exports.py +65 -13
  10. tests/test_integrations.py +13 -13
  11. tests/test_python.py +125 -69
  12. tests/test_solutions.py +161 -152
  13. ultralytics/__init__.py +1 -1
  14. ultralytics/cfg/__init__.py +86 -92
  15. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  16. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  17. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  18. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  19. ultralytics/cfg/datasets/TT100K.yaml +346 -0
  20. ultralytics/cfg/datasets/VOC.yaml +15 -16
  21. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  22. ultralytics/cfg/datasets/coco-pose.yaml +21 -0
  23. ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
  24. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  25. ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
  26. ultralytics/cfg/datasets/dog-pose.yaml +28 -0
  27. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  28. ultralytics/cfg/datasets/dota8.yaml +2 -2
  29. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
  30. ultralytics/cfg/datasets/kitti.yaml +27 -0
  31. ultralytics/cfg/datasets/lvis.yaml +5 -5
  32. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  33. ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
  34. ultralytics/cfg/datasets/xView.yaml +16 -16
  35. ultralytics/cfg/default.yaml +4 -2
  36. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  37. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  38. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  39. ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
  40. ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
  41. ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
  42. ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
  43. ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
  44. ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
  45. ultralytics/cfg/models/26/yolo26.yaml +52 -0
  46. ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
  47. ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
  48. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
  49. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
  50. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
  51. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
  52. ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
  53. ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
  54. ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
  55. ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
  56. ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
  57. ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
  58. ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
  59. ultralytics/cfg/models/v6/yolov6.yaml +1 -1
  60. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  61. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  62. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  63. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  64. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  65. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  66. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  67. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  68. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  69. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  70. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  71. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
  72. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  73. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  74. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  75. ultralytics/data/__init__.py +4 -4
  76. ultralytics/data/annotator.py +5 -6
  77. ultralytics/data/augment.py +300 -475
  78. ultralytics/data/base.py +18 -26
  79. ultralytics/data/build.py +147 -25
  80. ultralytics/data/converter.py +108 -87
  81. ultralytics/data/dataset.py +47 -75
  82. ultralytics/data/loaders.py +42 -49
  83. ultralytics/data/split.py +5 -6
  84. ultralytics/data/split_dota.py +8 -15
  85. ultralytics/data/utils.py +36 -45
  86. ultralytics/engine/exporter.py +351 -263
  87. ultralytics/engine/model.py +186 -225
  88. ultralytics/engine/predictor.py +45 -54
  89. ultralytics/engine/results.py +198 -325
  90. ultralytics/engine/trainer.py +165 -106
  91. ultralytics/engine/tuner.py +41 -43
  92. ultralytics/engine/validator.py +55 -38
  93. ultralytics/hub/__init__.py +16 -19
  94. ultralytics/hub/auth.py +6 -12
  95. ultralytics/hub/google/__init__.py +7 -10
  96. ultralytics/hub/session.py +15 -25
  97. ultralytics/hub/utils.py +5 -8
  98. ultralytics/models/__init__.py +1 -1
  99. ultralytics/models/fastsam/__init__.py +1 -1
  100. ultralytics/models/fastsam/model.py +8 -10
  101. ultralytics/models/fastsam/predict.py +18 -30
  102. ultralytics/models/fastsam/utils.py +1 -2
  103. ultralytics/models/fastsam/val.py +5 -7
  104. ultralytics/models/nas/__init__.py +1 -1
  105. ultralytics/models/nas/model.py +5 -8
  106. ultralytics/models/nas/predict.py +7 -9
  107. ultralytics/models/nas/val.py +1 -2
  108. ultralytics/models/rtdetr/__init__.py +1 -1
  109. ultralytics/models/rtdetr/model.py +5 -8
  110. ultralytics/models/rtdetr/predict.py +15 -19
  111. ultralytics/models/rtdetr/train.py +10 -13
  112. ultralytics/models/rtdetr/val.py +21 -23
  113. ultralytics/models/sam/__init__.py +15 -2
  114. ultralytics/models/sam/amg.py +14 -20
  115. ultralytics/models/sam/build.py +26 -19
  116. ultralytics/models/sam/build_sam3.py +377 -0
  117. ultralytics/models/sam/model.py +29 -32
  118. ultralytics/models/sam/modules/blocks.py +83 -144
  119. ultralytics/models/sam/modules/decoders.py +19 -37
  120. ultralytics/models/sam/modules/encoders.py +44 -101
  121. ultralytics/models/sam/modules/memory_attention.py +16 -30
  122. ultralytics/models/sam/modules/sam.py +200 -73
  123. ultralytics/models/sam/modules/tiny_encoder.py +64 -83
  124. ultralytics/models/sam/modules/transformer.py +18 -28
  125. ultralytics/models/sam/modules/utils.py +174 -50
  126. ultralytics/models/sam/predict.py +2248 -350
  127. ultralytics/models/sam/sam3/__init__.py +3 -0
  128. ultralytics/models/sam/sam3/decoder.py +546 -0
  129. ultralytics/models/sam/sam3/encoder.py +529 -0
  130. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  131. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  132. ultralytics/models/sam/sam3/model_misc.py +199 -0
  133. ultralytics/models/sam/sam3/necks.py +129 -0
  134. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  135. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  136. ultralytics/models/sam/sam3/vitdet.py +547 -0
  137. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  138. ultralytics/models/utils/loss.py +14 -26
  139. ultralytics/models/utils/ops.py +13 -17
  140. ultralytics/models/yolo/__init__.py +1 -1
  141. ultralytics/models/yolo/classify/predict.py +10 -13
  142. ultralytics/models/yolo/classify/train.py +12 -33
  143. ultralytics/models/yolo/classify/val.py +30 -29
  144. ultralytics/models/yolo/detect/predict.py +9 -12
  145. ultralytics/models/yolo/detect/train.py +17 -23
  146. ultralytics/models/yolo/detect/val.py +77 -59
  147. ultralytics/models/yolo/model.py +43 -60
  148. ultralytics/models/yolo/obb/predict.py +7 -16
  149. ultralytics/models/yolo/obb/train.py +14 -17
  150. ultralytics/models/yolo/obb/val.py +40 -37
  151. ultralytics/models/yolo/pose/__init__.py +1 -1
  152. ultralytics/models/yolo/pose/predict.py +7 -22
  153. ultralytics/models/yolo/pose/train.py +13 -16
  154. ultralytics/models/yolo/pose/val.py +39 -58
  155. ultralytics/models/yolo/segment/predict.py +17 -21
  156. ultralytics/models/yolo/segment/train.py +7 -10
  157. ultralytics/models/yolo/segment/val.py +95 -47
  158. ultralytics/models/yolo/world/train.py +8 -14
  159. ultralytics/models/yolo/world/train_world.py +11 -34
  160. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  161. ultralytics/models/yolo/yoloe/predict.py +16 -23
  162. ultralytics/models/yolo/yoloe/train.py +36 -44
  163. ultralytics/models/yolo/yoloe/train_seg.py +11 -11
  164. ultralytics/models/yolo/yoloe/val.py +15 -20
  165. ultralytics/nn/__init__.py +7 -7
  166. ultralytics/nn/autobackend.py +159 -85
  167. ultralytics/nn/modules/__init__.py +68 -60
  168. ultralytics/nn/modules/activation.py +4 -6
  169. ultralytics/nn/modules/block.py +260 -224
  170. ultralytics/nn/modules/conv.py +52 -97
  171. ultralytics/nn/modules/head.py +831 -299
  172. ultralytics/nn/modules/transformer.py +76 -88
  173. ultralytics/nn/modules/utils.py +16 -21
  174. ultralytics/nn/tasks.py +180 -195
  175. ultralytics/nn/text_model.py +45 -69
  176. ultralytics/optim/__init__.py +5 -0
  177. ultralytics/optim/muon.py +338 -0
  178. ultralytics/solutions/__init__.py +12 -12
  179. ultralytics/solutions/ai_gym.py +13 -19
  180. ultralytics/solutions/analytics.py +15 -16
  181. ultralytics/solutions/config.py +6 -7
  182. ultralytics/solutions/distance_calculation.py +10 -13
  183. ultralytics/solutions/heatmap.py +8 -14
  184. ultralytics/solutions/instance_segmentation.py +6 -9
  185. ultralytics/solutions/object_blurrer.py +7 -10
  186. ultralytics/solutions/object_counter.py +12 -19
  187. ultralytics/solutions/object_cropper.py +8 -14
  188. ultralytics/solutions/parking_management.py +34 -32
  189. ultralytics/solutions/queue_management.py +10 -12
  190. ultralytics/solutions/region_counter.py +9 -12
  191. ultralytics/solutions/security_alarm.py +15 -20
  192. ultralytics/solutions/similarity_search.py +10 -15
  193. ultralytics/solutions/solutions.py +77 -76
  194. ultralytics/solutions/speed_estimation.py +7 -10
  195. ultralytics/solutions/streamlit_inference.py +2 -4
  196. ultralytics/solutions/templates/similarity-search.html +7 -18
  197. ultralytics/solutions/trackzone.py +7 -10
  198. ultralytics/solutions/vision_eye.py +5 -8
  199. ultralytics/trackers/__init__.py +1 -1
  200. ultralytics/trackers/basetrack.py +3 -5
  201. ultralytics/trackers/bot_sort.py +10 -27
  202. ultralytics/trackers/byte_tracker.py +21 -37
  203. ultralytics/trackers/track.py +4 -7
  204. ultralytics/trackers/utils/gmc.py +11 -22
  205. ultralytics/trackers/utils/kalman_filter.py +37 -48
  206. ultralytics/trackers/utils/matching.py +12 -15
  207. ultralytics/utils/__init__.py +124 -124
  208. ultralytics/utils/autobatch.py +2 -4
  209. ultralytics/utils/autodevice.py +17 -18
  210. ultralytics/utils/benchmarks.py +57 -71
  211. ultralytics/utils/callbacks/base.py +8 -10
  212. ultralytics/utils/callbacks/clearml.py +5 -13
  213. ultralytics/utils/callbacks/comet.py +32 -46
  214. ultralytics/utils/callbacks/dvc.py +13 -18
  215. ultralytics/utils/callbacks/mlflow.py +4 -5
  216. ultralytics/utils/callbacks/neptune.py +7 -15
  217. ultralytics/utils/callbacks/platform.py +423 -38
  218. ultralytics/utils/callbacks/raytune.py +3 -4
  219. ultralytics/utils/callbacks/tensorboard.py +25 -31
  220. ultralytics/utils/callbacks/wb.py +16 -14
  221. ultralytics/utils/checks.py +127 -85
  222. ultralytics/utils/cpu.py +3 -8
  223. ultralytics/utils/dist.py +9 -12
  224. ultralytics/utils/downloads.py +25 -33
  225. ultralytics/utils/errors.py +6 -14
  226. ultralytics/utils/events.py +2 -4
  227. ultralytics/utils/export/__init__.py +4 -236
  228. ultralytics/utils/export/engine.py +246 -0
  229. ultralytics/utils/export/imx.py +117 -63
  230. ultralytics/utils/export/tensorflow.py +231 -0
  231. ultralytics/utils/files.py +26 -30
  232. ultralytics/utils/git.py +9 -11
  233. ultralytics/utils/instance.py +30 -51
  234. ultralytics/utils/logger.py +212 -114
  235. ultralytics/utils/loss.py +601 -215
  236. ultralytics/utils/metrics.py +128 -156
  237. ultralytics/utils/nms.py +13 -16
  238. ultralytics/utils/ops.py +117 -166
  239. ultralytics/utils/patches.py +75 -21
  240. ultralytics/utils/plotting.py +75 -80
  241. ultralytics/utils/tal.py +125 -59
  242. ultralytics/utils/torch_utils.py +53 -79
  243. ultralytics/utils/tqdm.py +24 -21
  244. ultralytics/utils/triton.py +13 -19
  245. ultralytics/utils/tuner.py +19 -10
  246. dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
  247. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/entry_points.txt +0 -0
  248. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/licenses/LICENSE +0 -0
  249. {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/top_level.txt +0 -0
@@ -19,11 +19,11 @@ from PIL import Image
19
19
  from ultralytics.utils import ARM64, IS_JETSON, LINUX, LOGGER, PYTHON_VERSION, ROOT, YAML, is_jetson
20
20
  from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml, is_rockchip
21
21
  from ultralytics.utils.downloads import attempt_download_asset, is_url
22
+ from ultralytics.utils.nms import non_max_suppression
22
23
 
23
24
 
24
25
  def check_class_names(names: list | dict) -> dict[int, str]:
25
- """
26
- Check class names and convert to dict format if needed.
26
+ """Check class names and convert to dict format if needed.
27
27
 
28
28
  Args:
29
29
  names (list | dict): Class names as list or dict format.
@@ -52,8 +52,7 @@ def check_class_names(names: list | dict) -> dict[int, str]:
52
52
 
53
53
 
54
54
  def default_class_names(data: str | Path | None = None) -> dict[int, str]:
55
- """
56
- Apply default class names to an input YAML file or return numerical class names.
55
+ """Apply default class names to an input YAML file or return numerical class names.
57
56
 
58
57
  Args:
59
58
  data (str | Path, optional): Path to YAML file containing class names.
@@ -70,8 +69,7 @@ def default_class_names(data: str | Path | None = None) -> dict[int, str]:
70
69
 
71
70
 
72
71
  class AutoBackend(nn.Module):
73
- """
74
- Handle dynamic backend selection for running inference using Ultralytics YOLO models.
72
+ """Handle dynamic backend selection for running inference using Ultralytics YOLO models.
75
73
 
76
74
  The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
77
75
  range of formats, each with specific naming conventions as outlined below:
@@ -95,6 +93,9 @@ class AutoBackend(nn.Module):
95
93
  | NCNN | *_ncnn_model/ |
96
94
  | IMX | *_imx_model/ |
97
95
  | RKNN | *_rknn_model/ |
96
+ | Triton Inference | triton://model |
97
+ | ExecuTorch | *.pte |
98
+ | Axelera | *_axelera_model/ |
98
99
 
99
100
  Attributes:
100
101
  model (torch.nn.Module): The loaded YOLO model.
@@ -121,22 +122,24 @@ class AutoBackend(nn.Module):
121
122
  imx (bool): Whether the model is an IMX model.
122
123
  rknn (bool): Whether the model is an RKNN model.
123
124
  triton (bool): Whether the model is a Triton Inference Server model.
125
+ pte (bool): Whether the model is a PyTorch ExecuTorch model.
126
+ axelera (bool): Whether the model is an Axelera model.
124
127
 
125
128
  Methods:
126
129
  forward: Run inference on an input image.
127
- from_numpy: Convert numpy array to tensor.
130
+ from_numpy: Convert NumPy arrays to tensors on the model device.
128
131
  warmup: Warm up the model with a dummy input.
129
132
  _model_type: Determine the model type from file path.
130
133
 
131
134
  Examples:
132
- >>> model = AutoBackend(model="yolo11n.pt", device="cuda")
135
+ >>> model = AutoBackend(model="yolo26n.pt", device="cuda")
133
136
  >>> results = model(img)
134
137
  """
135
138
 
136
139
  @torch.no_grad()
137
140
  def __init__(
138
141
  self,
139
- model: str | torch.nn.Module = "yolo11n.pt",
142
+ model: str | torch.nn.Module = "yolo26n.pt",
140
143
  device: torch.device = torch.device("cpu"),
141
144
  dnn: bool = False,
142
145
  data: str | Path | None = None,
@@ -144,8 +147,7 @@ class AutoBackend(nn.Module):
144
147
  fuse: bool = True,
145
148
  verbose: bool = True,
146
149
  ):
147
- """
148
- Initialize the AutoBackend for inference.
150
+ """Initialize the AutoBackend for inference.
149
151
 
150
152
  Args:
151
153
  model (str | torch.nn.Module): Path to the model weights file or a module instance.
@@ -175,10 +177,12 @@ class AutoBackend(nn.Module):
175
177
  ncnn,
176
178
  imx,
177
179
  rknn,
180
+ pte,
181
+ axelera,
178
182
  triton,
179
183
  ) = self._model_type("" if nn_module else model)
180
184
  fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
181
- nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch BCWH)
185
+ nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch BCHW)
182
186
  stride, ch = 32, 3 # default stride and channels
183
187
  end2end, dynamic = False, False
184
188
  metadata, task = None, None
@@ -217,6 +221,7 @@ class AutoBackend(nn.Module):
217
221
  for p in model.parameters():
218
222
  p.requires_grad = False
219
223
  self.model = model # explicitly assign for to(), cpu(), cuda(), half()
224
+ end2end = getattr(model, "end2end", False)
220
225
 
221
226
  # TorchScript
222
227
  elif jit:
@@ -241,25 +246,28 @@ class AutoBackend(nn.Module):
241
246
  check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
242
247
  import onnxruntime
243
248
 
244
- providers = ["CPUExecutionProvider"]
245
- if cuda:
246
- if "CUDAExecutionProvider" in onnxruntime.get_available_providers():
247
- providers.insert(0, "CUDAExecutionProvider")
248
- else: # Only log warning if CUDA was requested but unavailable
249
- LOGGER.warning("Failed to start ONNX Runtime with CUDA. Using CPU...")
250
- device = torch.device("cpu")
251
- cuda = False
252
- LOGGER.info(f"Using ONNX Runtime {onnxruntime.__version__} {providers[0]}")
249
+ # Select execution provider: CUDA > CoreML (mps) > CPU
250
+ available = onnxruntime.get_available_providers()
251
+ if cuda and "CUDAExecutionProvider" in available:
252
+ providers = [("CUDAExecutionProvider", {"device_id": device.index}), "CPUExecutionProvider"]
253
+ elif device.type == "mps" and "CoreMLExecutionProvider" in available:
254
+ providers = ["CoreMLExecutionProvider", "CPUExecutionProvider"]
255
+ else:
256
+ providers = ["CPUExecutionProvider"]
257
+ if cuda:
258
+ LOGGER.warning("CUDA requested but CUDAExecutionProvider not available. Using CPU...")
259
+ device, cuda = torch.device("cpu"), False
260
+ LOGGER.info(
261
+ f"Using ONNX Runtime {onnxruntime.__version__} with {providers[0] if isinstance(providers[0], str) else providers[0][0]}"
262
+ )
253
263
  if onnx:
254
264
  session = onnxruntime.InferenceSession(w, providers=providers)
255
265
  else:
256
- check_requirements(
257
- ("model-compression-toolkit>=2.4.1", "sony-custom-layers[torch]>=0.3.0", "onnxruntime-extensions")
258
- )
266
+ check_requirements(("model-compression-toolkit>=2.4.1", "edge-mdt-cl<1.1.0", "onnxruntime-extensions"))
259
267
  w = next(Path(w).glob("*.onnx"))
260
268
  LOGGER.info(f"Loading {w} for ONNX IMX inference...")
261
269
  import mct_quantizers as mctq
262
- from sony_custom_layers.pytorch.nms import nms_ort # noqa
270
+ from edgemdt_cl.pytorch.nms import nms_ort # noqa - register custom NMS ops
263
271
 
264
272
  session_options = mctq.get_ort_session_options()
265
273
  session_options.enable_mem_reuse = False # fix the shape mismatch from onnxruntime
@@ -269,7 +277,10 @@ class AutoBackend(nn.Module):
269
277
  metadata = session.get_modelmeta().custom_metadata_map
270
278
  dynamic = isinstance(session.get_outputs()[0].shape[0], str)
271
279
  fp16 = "float16" in session.get_inputs()[0].type
272
- if not dynamic:
280
+
281
+ # Setup IO binding for optimized inference (CUDA only, not supported for CoreML)
282
+ use_io_binding = not dynamic and cuda
283
+ if use_io_binding:
273
284
  io = session.io_binding()
274
285
  bindings = []
275
286
  for output in session.get_outputs():
@@ -332,11 +343,11 @@ class AutoBackend(nn.Module):
332
343
  check_requirements("numpy==1.23.5")
333
344
 
334
345
  try: # https://developer.nvidia.com/nvidia-tensorrt-download
335
- import tensorrt as trt # noqa
346
+ import tensorrt as trt
336
347
  except ImportError:
337
348
  if LINUX:
338
349
  check_requirements("tensorrt>7.0.0,!=10.1.0")
339
- import tensorrt as trt # noqa
350
+ import tensorrt as trt
340
351
  check_version(trt.__version__, ">=7.0.0", hard=True)
341
352
  check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
342
353
  if device.type == "cpu":
@@ -369,39 +380,42 @@ class AutoBackend(nn.Module):
369
380
  is_trt10 = not hasattr(model, "num_bindings")
370
381
  num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)
371
382
  for i in num:
383
+ # Get tensor info using TRT10+ or legacy API
372
384
  if is_trt10:
373
385
  name = model.get_tensor_name(i)
374
386
  dtype = trt.nptype(model.get_tensor_dtype(name))
375
387
  is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUT
376
- if is_input:
377
- if -1 in tuple(model.get_tensor_shape(name)):
378
- dynamic = True
379
- context.set_input_shape(name, tuple(model.get_tensor_profile_shape(name, 0)[1]))
380
- if dtype == np.float16:
381
- fp16 = True
382
- else:
383
- output_names.append(name)
384
- shape = tuple(context.get_tensor_shape(name))
385
- else: # TensorRT < 10.0
388
+ shape = tuple(model.get_tensor_shape(name))
389
+ profile_shape = tuple(model.get_tensor_profile_shape(name, 0)[2]) if is_input else None
390
+ else:
386
391
  name = model.get_binding_name(i)
387
392
  dtype = trt.nptype(model.get_binding_dtype(i))
388
393
  is_input = model.binding_is_input(i)
389
- if model.binding_is_input(i):
390
- if -1 in tuple(model.get_binding_shape(i)): # dynamic
391
- dynamic = True
392
- context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[1]))
393
- if dtype == np.float16:
394
- fp16 = True
395
- else:
396
- output_names.append(name)
397
- shape = tuple(context.get_binding_shape(i))
394
+ shape = tuple(model.get_binding_shape(i))
395
+ profile_shape = tuple(model.get_profile_shape(0, i)[1]) if is_input else None
396
+
397
+ # Process input/output tensors
398
+ if is_input:
399
+ if -1 in shape:
400
+ dynamic = True
401
+ if is_trt10:
402
+ context.set_input_shape(name, profile_shape)
403
+ else:
404
+ context.set_binding_shape(i, profile_shape)
405
+ if dtype == np.float16:
406
+ fp16 = True
407
+ else:
408
+ output_names.append(name)
409
+ shape = tuple(context.get_tensor_shape(name)) if is_trt10 else tuple(context.get_binding_shape(i))
398
410
  im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
399
411
  bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
400
412
  binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
401
413
 
402
414
  # CoreML
403
415
  elif coreml:
404
- check_requirements("coremltools>=8.0")
416
+ check_requirements(
417
+ ["coremltools>=9.0", "numpy>=1.14.5,<=2.3.5"]
418
+ ) # latest numpy 2.4.0rc1 breaks coremltools exports
405
419
  LOGGER.info(f"Loading {w} for CoreML inference...")
406
420
  import coremltools as ct
407
421
 
@@ -414,8 +428,7 @@ class AutoBackend(nn.Module):
414
428
  LOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")
415
429
  import tensorflow as tf
416
430
 
417
- keras = False # assume TF1 saved_model
418
- model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
431
+ model = tf.saved_model.load(w)
419
432
  metadata = Path(w) / "metadata.yaml"
420
433
 
421
434
  # TF GraphDef
@@ -423,7 +436,7 @@ class AutoBackend(nn.Module):
423
436
  LOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")
424
437
  import tensorflow as tf
425
438
 
426
- from ultralytics.engine.exporter import gd_outputs
439
+ from ultralytics.utils.export.tensorflow import gd_outputs
427
440
 
428
441
  def wrap_frozen_graph(gd, inputs, outputs):
429
442
  """Wrap frozen graphs for deployment."""
@@ -485,13 +498,13 @@ class AutoBackend(nn.Module):
485
498
  elif paddle:
486
499
  LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
487
500
  check_requirements(
488
- "paddlepaddle-gpu"
501
+ "paddlepaddle-gpu>=3.0.0,!=3.3.0" # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
489
502
  if torch.cuda.is_available()
490
503
  else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
491
504
  if ARM64
492
- else "paddlepaddle>=3.0.0"
505
+ else "paddlepaddle>=3.0.0,!=3.3.0" # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
493
506
  )
494
- import paddle.inference as pdi # noqa
507
+ import paddle.inference as pdi
495
508
 
496
509
  w = Path(w)
497
510
  model_file, params_file = None, None
@@ -533,11 +546,16 @@ class AutoBackend(nn.Module):
533
546
  # NCNN
534
547
  elif ncnn:
535
548
  LOGGER.info(f"Loading {w} for NCNN inference...")
536
- check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
549
+ check_requirements("ncnn", cmds="--no-deps")
537
550
  import ncnn as pyncnn
538
551
 
539
552
  net = pyncnn.Net()
540
- net.opt.use_vulkan_compute = cuda
553
+ if isinstance(cuda, torch.device):
554
+ net.opt.use_vulkan_compute = cuda
555
+ elif isinstance(device, str) and device.startswith("vulkan"):
556
+ net.opt.use_vulkan_compute = True
557
+ net.set_vulkan_device(int(device.split(":")[1]))
558
+ device = torch.device("cpu")
541
559
  w = Path(w)
542
560
  if not w.is_file(): # if not *.param
543
561
  w = next(w.glob("*.param")) # get *.param file from *_ncnn_model dir
@@ -569,6 +587,51 @@ class AutoBackend(nn.Module):
569
587
  rknn_model.init_runtime()
570
588
  metadata = w.parent / "metadata.yaml"
571
589
 
590
+ # Axelera
591
+ elif axelera:
592
+ import os
593
+
594
+ if not os.environ.get("AXELERA_RUNTIME_DIR"):
595
+ LOGGER.warning(
596
+ "Axelera runtime environment is not activated."
597
+ "\nPlease run: source /opt/axelera/sdk/latest/axelera_activate.sh"
598
+ "\n\nIf this fails, verify driver installation: https://docs.ultralytics.com/integrations/axelera/#axelera-driver-installation"
599
+ )
600
+ try:
601
+ from axelera.runtime import op
602
+ except ImportError:
603
+ check_requirements(
604
+ "axelera_runtime2==0.1.2",
605
+ cmds="--extra-index-url https://software.axelera.ai/artifactory/axelera-runtime-pypi",
606
+ )
607
+ from axelera.runtime import op
608
+
609
+ w = Path(w)
610
+ if (found := next(w.rglob("*.axm"), None)) is None:
611
+ raise FileNotFoundError(f"No .axm file found in: {w}")
612
+
613
+ ax_model = op.load(str(found))
614
+ metadata = found.parent / "metadata.yaml"
615
+
616
+ # ExecuTorch
617
+ elif pte:
618
+ LOGGER.info(f"Loading {w} for ExecuTorch inference...")
619
+ # TorchAO release compatibility table bug https://github.com/pytorch/ao/issues/2919
620
+ check_requirements("setuptools<71.0.0") # Setuptools bug: https://github.com/pypa/setuptools/issues/4483
621
+ check_requirements(("executorch==1.0.1", "flatbuffers"))
622
+ from executorch.runtime import Runtime
623
+
624
+ w = Path(w)
625
+ if w.is_dir():
626
+ model_file = next(w.rglob("*.pte"))
627
+ metadata = w / "metadata.yaml"
628
+ else:
629
+ model_file = w
630
+ metadata = w.parent / "metadata.yaml"
631
+
632
+ program = Runtime.get().load_program(str(model_file))
633
+ model = program.load_method("forward")
634
+
572
635
  # Any other format (unsupported)
573
636
  else:
574
637
  from ultralytics.engine.exporter import export_formats
@@ -585,15 +648,16 @@ class AutoBackend(nn.Module):
585
648
  for k, v in metadata.items():
586
649
  if k in {"stride", "batch", "channels"}:
587
650
  metadata[k] = int(v)
588
- elif k in {"imgsz", "names", "kpt_shape", "args"} and isinstance(v, str):
589
- metadata[k] = eval(v)
651
+ elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args"} and isinstance(v, str):
652
+ metadata[k] = ast.literal_eval(v)
590
653
  stride = metadata["stride"]
591
654
  task = metadata["task"]
592
655
  batch = metadata["batch"]
593
656
  imgsz = metadata["imgsz"]
594
657
  names = metadata["names"]
595
658
  kpt_shape = metadata.get("kpt_shape")
596
- end2end = metadata.get("args", {}).get("nms", False)
659
+ kpt_names = metadata.get("kpt_names")
660
+ end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
597
661
  dynamic = metadata.get("args", {}).get("dynamic", dynamic)
598
662
  ch = metadata.get("channels", 3)
599
663
  elif not (pt or triton or nn_module):
@@ -614,8 +678,7 @@ class AutoBackend(nn.Module):
614
678
  embed: list | None = None,
615
679
  **kwargs: Any,
616
680
  ) -> torch.Tensor | list[torch.Tensor]:
617
- """
618
- Run inference on an AutoBackend model.
681
+ """Run inference on an AutoBackend model.
619
682
 
620
683
  Args:
621
684
  im (torch.Tensor): The image tensor to perform inference on.
@@ -627,7 +690,7 @@ class AutoBackend(nn.Module):
627
690
  Returns:
628
691
  (torch.Tensor | list[torch.Tensor]): The raw output tensor(s) from the model.
629
692
  """
630
- b, ch, h, w = im.shape # batch, channel, height, width
693
+ _b, _ch, h, w = im.shape # batch, channel, height, width
631
694
  if self.fp16 and im.dtype != torch.float16:
632
695
  im = im.half() # to FP16
633
696
  if self.nhwc:
@@ -649,10 +712,7 @@ class AutoBackend(nn.Module):
649
712
 
650
713
  # ONNX Runtime
651
714
  elif self.onnx or self.imx:
652
- if self.dynamic:
653
- im = im.cpu().numpy() # torch to numpy
654
- y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
655
- else:
715
+ if self.use_io_binding:
656
716
  if not self.cuda:
657
717
  im = im.cpu()
658
718
  self.io.bind_input(
@@ -665,13 +725,21 @@ class AutoBackend(nn.Module):
665
725
  )
666
726
  self.session.run_with_iobinding(self.io)
667
727
  y = self.bindings
728
+ else:
729
+ im = im.cpu().numpy() # torch to numpy
730
+ y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
668
731
  if self.imx:
669
732
  if self.task == "detect":
670
733
  # boxes, conf, cls
671
734
  y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None]], axis=-1)
672
735
  elif self.task == "pose":
673
736
  # boxes, conf, kpts
674
- y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None], y[3]], axis=-1)
737
+ y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None], y[3]], axis=-1, dtype=y[0].dtype)
738
+ elif self.task == "segment":
739
+ y = (
740
+ np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None], y[3]], axis=-1, dtype=y[0].dtype),
741
+ y[4],
742
+ )
675
743
 
676
744
  # OpenVINO
677
745
  elif self.xml:
@@ -771,11 +839,19 @@ class AutoBackend(nn.Module):
771
839
  im = im if isinstance(im, (list, tuple)) else [im]
772
840
  y = self.rknn_model.inference(inputs=im)
773
841
 
842
+ # Axelera
843
+ elif self.axelera:
844
+ y = self.ax_model(im.cpu())
845
+
846
+ # ExecuTorch
847
+ elif self.pte:
848
+ y = self.model.execute([im])
849
+
774
850
  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
775
851
  else:
776
852
  im = im.cpu().numpy()
777
853
  if self.saved_model: # SavedModel
778
- y = self.model(im, training=False) if self.keras else self.model.serving_default(im)
854
+ y = self.model.serving_default(im)
779
855
  if not isinstance(y, list):
780
856
  y = [y]
781
857
  elif self.pb: # GraphDef
@@ -811,7 +887,7 @@ class AutoBackend(nn.Module):
811
887
  x[:, 6::3] *= h
812
888
  y.append(x)
813
889
  # TF segment fixes: export is reversed vs ONNX export and protos are transposed
814
- if len(y) == 2: # segment with (det, proto) output order reversed
890
+ if self.task == "segment": # segment with (det, proto) output order reversed
815
891
  if len(y[1].shape) != 4:
816
892
  y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32)
817
893
  if y[1].shape[-1] == 6: # end-to-end model
@@ -820,8 +896,6 @@ class AutoBackend(nn.Module):
820
896
  y[1] = np.transpose(y[1], (0, 3, 1, 2)) # should be y = (1, 116, 8400), (1, 32, 160, 160)
821
897
  y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
822
898
 
823
- # for x in y:
824
- # print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes
825
899
  if isinstance(y, (list, tuple)):
826
900
  if len(self.names) == 999 and (self.task == "segment" or len(y) == 2): # segments and names not defined
827
901
  nc = y[0].shape[1] - y[1].shape[1] - 4 # y = (1, 32, 160, 160), (1, 116, 8400)
@@ -830,35 +904,35 @@ class AutoBackend(nn.Module):
830
904
  else:
831
905
  return self.from_numpy(y)
832
906
 
833
- def from_numpy(self, x: np.ndarray) -> torch.Tensor:
834
- """
835
- Convert a numpy array to a tensor.
907
+ def from_numpy(self, x: np.ndarray | torch.Tensor) -> torch.Tensor:
908
+ """Convert a NumPy array to a torch tensor on the model device.
836
909
 
837
910
  Args:
838
- x (np.ndarray): The array to be converted.
911
+ x (np.ndarray | torch.Tensor): Input array or tensor.
839
912
 
840
913
  Returns:
841
- (torch.Tensor): The converted tensor
914
+ (torch.Tensor): Tensor on `self.device`.
842
915
  """
843
916
  return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
844
917
 
845
918
  def warmup(self, imgsz: tuple[int, int, int, int] = (1, 3, 640, 640)) -> None:
846
- """
847
- Warm up the model by running one forward pass with a dummy input.
919
+ """Warm up the model by running one forward pass with a dummy input.
848
920
 
849
921
  Args:
850
- imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width)
922
+ imgsz (tuple[int, int, int, int]): Dummy input shape in (batch, channels, height, width) format.
851
923
  """
852
924
  warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
853
925
  if any(warmup_types) and (self.device.type != "cpu" or self.triton):
854
926
  im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
855
927
  for _ in range(2 if self.jit else 1):
856
- self.forward(im) # warmup
928
+ self.forward(im) # warmup model
929
+ warmup_boxes = torch.rand(1, 84, 16, device=self.device) # 16 boxes works best empirically
930
+ warmup_boxes[:, :4] *= imgsz[-1]
931
+ non_max_suppression(warmup_boxes) # warmup NMS
857
932
 
858
933
  @staticmethod
859
934
  def _model_type(p: str = "path/to/model.pt") -> list[bool]:
860
- """
861
- Take a path to a model file and return the model type.
935
+ """Take a path to a model file and return the model type.
862
936
 
863
937
  Args:
864
938
  p (str): Path to the model file.
@@ -867,8 +941,8 @@ class AutoBackend(nn.Module):
867
941
  (list[bool]): List of booleans indicating the model type.
868
942
 
869
943
  Examples:
870
- >>> model = AutoBackend(model="path/to/model.onnx")
871
- >>> model_type = model._model_type() # returns "onnx"
944
+ >>> types = AutoBackend._model_type("path/to/model.onnx")
945
+ >>> assert types[2] # onnx
872
946
  """
873
947
  from ultralytics.engine.exporter import export_formats
874
948
 
@@ -887,4 +961,4 @@ class AutoBackend(nn.Module):
887
961
  url = urlsplit(p)
888
962
  triton = bool(url.netloc) and bool(url.path) and url.scheme in {"http", "grpc"}
889
963
 
890
- return types + [triton]
964
+ return [*types, triton]
@@ -78,15 +78,19 @@ from .conv import (
78
78
  )
79
79
  from .head import (
80
80
  OBB,
81
+ OBB26,
81
82
  Classify,
82
83
  Detect,
83
84
  LRPCHead,
84
85
  Pose,
86
+ Pose26,
85
87
  RTDETRDecoder,
86
88
  Segment,
89
+ Segment26,
87
90
  WorldDetect,
88
91
  YOLOEDetect,
89
92
  YOLOESegment,
93
+ YOLOESegment26,
90
94
  v10Detect,
91
95
  )
92
96
  from .transformer import (
@@ -103,80 +107,84 @@ from .transformer import (
103
107
  )
104
108
 
105
109
  __all__ = (
106
- "Conv",
107
- "Conv2",
108
- "LightConv",
109
- "RepConv",
110
- "DWConv",
111
- "DWConvTranspose2d",
112
- "ConvTranspose",
113
- "Focus",
114
- "GhostConv",
115
- "ChannelAttention",
116
- "SpatialAttention",
110
+ "AIFI",
111
+ "C1",
112
+ "C2",
113
+ "C2PSA",
114
+ "C3",
115
+ "C3TR",
117
116
  "CBAM",
118
- "Concat",
119
- "TransformerLayer",
120
- "TransformerBlock",
121
- "MLPBlock",
122
- "LayerNorm2d",
117
+ "CIB",
123
118
  "DFL",
124
- "HGBlock",
125
- "HGStem",
119
+ "ELAN1",
120
+ "MLP",
121
+ "OBB",
122
+ "OBB26",
123
+ "PSA",
126
124
  "SPP",
125
+ "SPPELAN",
127
126
  "SPPF",
128
- "C1",
129
- "C2",
130
- "C3",
127
+ "A2C2f",
128
+ "AConv",
129
+ "ADown",
130
+ "Attention",
131
+ "BNContrastiveHead",
132
+ "Bottleneck",
133
+ "BottleneckCSP",
131
134
  "C2f",
132
- "C3k2",
133
- "SCDown",
134
- "C2fPSA",
135
- "C2PSA",
136
135
  "C2fAttn",
137
- "C3x",
138
- "C3TR",
136
+ "C2fCIB",
137
+ "C2fPSA",
139
138
  "C3Ghost",
140
- "GhostBottleneck",
141
- "Bottleneck",
142
- "BottleneckCSP",
143
- "Proto",
144
- "Detect",
145
- "Segment",
146
- "Pose",
139
+ "C3k2",
140
+ "C3x",
141
+ "CBFuse",
142
+ "CBLinear",
143
+ "ChannelAttention",
147
144
  "Classify",
148
- "TransformerEncoderLayer",
149
- "RepC3",
150
- "RTDETRDecoder",
151
- "AIFI",
145
+ "Concat",
146
+ "ContrastiveHead",
147
+ "Conv",
148
+ "Conv2",
149
+ "ConvTranspose",
150
+ "DWConv",
151
+ "DWConvTranspose2d",
152
152
  "DeformableTransformerDecoder",
153
153
  "DeformableTransformerDecoderLayer",
154
+ "Detect",
155
+ "Focus",
156
+ "GhostBottleneck",
157
+ "GhostConv",
158
+ "HGBlock",
159
+ "HGStem",
160
+ "ImagePoolingAttn",
161
+ "Index",
162
+ "LRPCHead",
163
+ "LayerNorm2d",
164
+ "LightConv",
165
+ "MLPBlock",
154
166
  "MSDeformAttn",
155
- "MLP",
167
+ "MaxSigmoidAttnBlock",
168
+ "Pose",
169
+ "Pose26",
170
+ "Proto",
171
+ "RTDETRDecoder",
172
+ "RepC3",
173
+ "RepConv",
174
+ "RepNCSPELAN4",
175
+ "RepVGGDW",
156
176
  "ResNetLayer",
157
- "OBB",
177
+ "SCDown",
178
+ "Segment",
179
+ "Segment26",
180
+ "SpatialAttention",
181
+ "TorchVision",
182
+ "TransformerBlock",
183
+ "TransformerEncoderLayer",
184
+ "TransformerLayer",
158
185
  "WorldDetect",
159
186
  "YOLOEDetect",
160
187
  "YOLOESegment",
188
+ "YOLOESegment26",
161
189
  "v10Detect",
162
- "LRPCHead",
163
- "ImagePoolingAttn",
164
- "MaxSigmoidAttnBlock",
165
- "ContrastiveHead",
166
- "BNContrastiveHead",
167
- "RepNCSPELAN4",
168
- "ADown",
169
- "SPPELAN",
170
- "CBFuse",
171
- "CBLinear",
172
- "AConv",
173
- "ELAN1",
174
- "RepVGGDW",
175
- "CIB",
176
- "C2fCIB",
177
- "Attention",
178
- "PSA",
179
- "TorchVision",
180
- "Index",
181
- "A2C2f",
182
190
  )