dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
- dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
- tests/__init__.py +5 -7
- tests/conftest.py +8 -15
- tests/test_cli.py +8 -10
- tests/test_cuda.py +9 -10
- tests/test_engine.py +29 -2
- tests/test_exports.py +69 -21
- tests/test_integrations.py +8 -11
- tests/test_python.py +109 -71
- tests/test_solutions.py +170 -159
- ultralytics/__init__.py +27 -9
- ultralytics/cfg/__init__.py +57 -64
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/Objects365.yaml +19 -15
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +19 -21
- ultralytics/cfg/datasets/VisDrone.yaml +5 -5
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +24 -2
- ultralytics/cfg/datasets/coco.yaml +2 -2
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/datasets/dog-pose.yaml +28 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +7 -7
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/default.yaml +96 -94
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
- ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
- ultralytics/cfg/models/v6/yolov6.yaml +1 -1
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +3 -4
- ultralytics/data/augment.py +286 -476
- ultralytics/data/base.py +18 -26
- ultralytics/data/build.py +151 -26
- ultralytics/data/converter.py +38 -50
- ultralytics/data/dataset.py +47 -75
- ultralytics/data/loaders.py +42 -49
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +41 -45
- ultralytics/engine/exporter.py +462 -462
- ultralytics/engine/model.py +150 -191
- ultralytics/engine/predictor.py +30 -40
- ultralytics/engine/results.py +177 -311
- ultralytics/engine/trainer.py +193 -120
- ultralytics/engine/tuner.py +77 -63
- ultralytics/engine/validator.py +39 -22
- ultralytics/hub/__init__.py +16 -19
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +5 -8
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +8 -10
- ultralytics/models/fastsam/predict.py +19 -30
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +5 -7
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +7 -8
- ultralytics/models/rtdetr/predict.py +15 -19
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +21 -23
- ultralytics/models/sam/__init__.py +15 -2
- ultralytics/models/sam/amg.py +14 -20
- ultralytics/models/sam/build.py +26 -19
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +29 -32
- ultralytics/models/sam/modules/blocks.py +83 -144
- ultralytics/models/sam/modules/decoders.py +22 -40
- ultralytics/models/sam/modules/encoders.py +44 -101
- ultralytics/models/sam/modules/memory_attention.py +16 -30
- ultralytics/models/sam/modules/sam.py +206 -79
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +18 -28
- ultralytics/models/sam/modules/utils.py +174 -50
- ultralytics/models/sam/predict.py +2268 -366
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +529 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +199 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +547 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +9 -12
- ultralytics/models/yolo/classify/train.py +15 -41
- ultralytics/models/yolo/classify/val.py +34 -32
- ultralytics/models/yolo/detect/predict.py +8 -11
- ultralytics/models/yolo/detect/train.py +13 -32
- ultralytics/models/yolo/detect/val.py +75 -63
- ultralytics/models/yolo/model.py +37 -53
- ultralytics/models/yolo/obb/predict.py +5 -14
- ultralytics/models/yolo/obb/train.py +11 -14
- ultralytics/models/yolo/obb/val.py +42 -39
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +7 -22
- ultralytics/models/yolo/pose/train.py +10 -22
- ultralytics/models/yolo/pose/val.py +40 -59
- ultralytics/models/yolo/segment/predict.py +16 -20
- ultralytics/models/yolo/segment/train.py +3 -12
- ultralytics/models/yolo/segment/val.py +106 -56
- ultralytics/models/yolo/world/train.py +12 -16
- ultralytics/models/yolo/world/train_world.py +11 -34
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +31 -56
- ultralytics/models/yolo/yoloe/train_seg.py +5 -10
- ultralytics/models/yolo/yoloe/val.py +16 -21
- ultralytics/nn/__init__.py +7 -7
- ultralytics/nn/autobackend.py +152 -80
- ultralytics/nn/modules/__init__.py +60 -60
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +133 -217
- ultralytics/nn/modules/conv.py +52 -97
- ultralytics/nn/modules/head.py +64 -116
- ultralytics/nn/modules/transformer.py +79 -89
- ultralytics/nn/modules/utils.py +16 -21
- ultralytics/nn/tasks.py +111 -156
- ultralytics/nn/text_model.py +40 -67
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +11 -17
- ultralytics/solutions/analytics.py +15 -16
- ultralytics/solutions/config.py +5 -6
- ultralytics/solutions/distance_calculation.py +10 -13
- ultralytics/solutions/heatmap.py +7 -13
- ultralytics/solutions/instance_segmentation.py +5 -8
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +12 -19
- ultralytics/solutions/object_cropper.py +8 -14
- ultralytics/solutions/parking_management.py +33 -31
- ultralytics/solutions/queue_management.py +10 -12
- ultralytics/solutions/region_counter.py +9 -12
- ultralytics/solutions/security_alarm.py +15 -20
- ultralytics/solutions/similarity_search.py +13 -17
- ultralytics/solutions/solutions.py +75 -74
- ultralytics/solutions/speed_estimation.py +7 -10
- ultralytics/solutions/streamlit_inference.py +4 -7
- ultralytics/solutions/templates/similarity-search.html +7 -18
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +3 -5
- ultralytics/trackers/bot_sort.py +10 -27
- ultralytics/trackers/byte_tracker.py +14 -30
- ultralytics/trackers/track.py +3 -6
- ultralytics/trackers/utils/gmc.py +11 -22
- ultralytics/trackers/utils/kalman_filter.py +37 -48
- ultralytics/trackers/utils/matching.py +12 -15
- ultralytics/utils/__init__.py +116 -116
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +17 -18
- ultralytics/utils/benchmarks.py +70 -70
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +5 -13
- ultralytics/utils/callbacks/comet.py +32 -46
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +7 -15
- ultralytics/utils/callbacks/platform.py +314 -38
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +23 -31
- ultralytics/utils/callbacks/wb.py +10 -13
- ultralytics/utils/checks.py +151 -87
- ultralytics/utils/cpu.py +3 -8
- ultralytics/utils/dist.py +19 -15
- ultralytics/utils/downloads.py +29 -41
- ultralytics/utils/errors.py +6 -14
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +7 -0
- ultralytics/utils/{export.py → export/engine.py} +16 -16
- ultralytics/utils/export/imx.py +325 -0
- ultralytics/utils/export/tensorflow.py +231 -0
- ultralytics/utils/files.py +24 -28
- ultralytics/utils/git.py +9 -11
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +212 -114
- ultralytics/utils/loss.py +15 -24
- ultralytics/utils/metrics.py +131 -160
- ultralytics/utils/nms.py +21 -30
- ultralytics/utils/ops.py +107 -165
- ultralytics/utils/patches.py +33 -21
- ultralytics/utils/plotting.py +122 -119
- ultralytics/utils/tal.py +28 -44
- ultralytics/utils/torch_utils.py +70 -187
- ultralytics/utils/tqdm.py +20 -20
- ultralytics/utils/triton.py +13 -19
- ultralytics/utils/tuner.py +17 -5
- dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
ultralytics/engine/exporter.py
CHANGED
|
@@ -20,6 +20,8 @@ MNN | `mnn` | yolo11n.mnn
|
|
|
20
20
|
NCNN | `ncnn` | yolo11n_ncnn_model/
|
|
21
21
|
IMX | `imx` | yolo11n_imx_model/
|
|
22
22
|
RKNN | `rknn` | yolo11n_rknn_model/
|
|
23
|
+
ExecuTorch | `executorch` | yolo11n_executorch_model/
|
|
24
|
+
Axelera | `axelera` | yolo11n_axelera_model/
|
|
23
25
|
|
|
24
26
|
Requirements:
|
|
25
27
|
$ pip install "ultralytics[export]"
|
|
@@ -48,6 +50,8 @@ Inference:
|
|
|
48
50
|
yolo11n_ncnn_model # NCNN
|
|
49
51
|
yolo11n_imx_model # IMX
|
|
50
52
|
yolo11n_rknn_model # RKNN
|
|
53
|
+
yolo11n_executorch_model # ExecuTorch
|
|
54
|
+
yolo11n_axelera_model # Axelera
|
|
51
55
|
|
|
52
56
|
TensorFlow.js:
|
|
53
57
|
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
|
|
@@ -62,7 +66,6 @@ import re
|
|
|
62
66
|
import shutil
|
|
63
67
|
import subprocess
|
|
64
68
|
import time
|
|
65
|
-
import warnings
|
|
66
69
|
from copy import deepcopy
|
|
67
70
|
from datetime import datetime
|
|
68
71
|
from pathlib import Path
|
|
@@ -82,13 +85,17 @@ from ultralytics.utils import (
|
|
|
82
85
|
ARM64,
|
|
83
86
|
DEFAULT_CFG,
|
|
84
87
|
IS_COLAB,
|
|
88
|
+
IS_DEBIAN_BOOKWORM,
|
|
89
|
+
IS_DEBIAN_TRIXIE,
|
|
90
|
+
IS_DOCKER,
|
|
85
91
|
IS_JETSON,
|
|
92
|
+
IS_RASPBERRYPI,
|
|
93
|
+
IS_UBUNTU,
|
|
86
94
|
LINUX,
|
|
87
95
|
LOGGER,
|
|
88
96
|
MACOS,
|
|
89
97
|
MACOS_VERSION,
|
|
90
98
|
RKNN_CHIPS,
|
|
91
|
-
ROOT,
|
|
92
99
|
SETTINGS,
|
|
93
100
|
TORCH_VERSION,
|
|
94
101
|
WINDOWS,
|
|
@@ -98,21 +105,38 @@ from ultralytics.utils import (
|
|
|
98
105
|
get_default_args,
|
|
99
106
|
)
|
|
100
107
|
from ultralytics.utils.checks import (
|
|
108
|
+
IS_PYTHON_3_10,
|
|
109
|
+
IS_PYTHON_MINIMUM_3_9,
|
|
110
|
+
check_apt_requirements,
|
|
101
111
|
check_imgsz,
|
|
102
|
-
check_is_path_safe,
|
|
103
112
|
check_requirements,
|
|
104
113
|
check_version,
|
|
105
114
|
is_intel,
|
|
106
115
|
is_sudo_available,
|
|
107
116
|
)
|
|
108
|
-
from ultralytics.utils.
|
|
109
|
-
|
|
110
|
-
|
|
117
|
+
from ultralytics.utils.export import (
|
|
118
|
+
keras2pb,
|
|
119
|
+
onnx2engine,
|
|
120
|
+
onnx2saved_model,
|
|
121
|
+
pb2tfjs,
|
|
122
|
+
tflite2edgetpu,
|
|
123
|
+
torch2imx,
|
|
124
|
+
torch2onnx,
|
|
125
|
+
)
|
|
126
|
+
from ultralytics.utils.files import file_size
|
|
111
127
|
from ultralytics.utils.metrics import batch_probiou
|
|
112
128
|
from ultralytics.utils.nms import TorchNMS
|
|
113
129
|
from ultralytics.utils.ops import Profile
|
|
114
130
|
from ultralytics.utils.patches import arange_patch
|
|
115
|
-
from ultralytics.utils.torch_utils import
|
|
131
|
+
from ultralytics.utils.torch_utils import (
|
|
132
|
+
TORCH_1_10,
|
|
133
|
+
TORCH_1_11,
|
|
134
|
+
TORCH_1_13,
|
|
135
|
+
TORCH_2_1,
|
|
136
|
+
TORCH_2_4,
|
|
137
|
+
TORCH_2_9,
|
|
138
|
+
select_device,
|
|
139
|
+
)
|
|
116
140
|
|
|
117
141
|
|
|
118
142
|
def export_formats():
|
|
@@ -137,7 +161,7 @@ def export_formats():
|
|
|
137
161
|
True,
|
|
138
162
|
["batch", "dynamic", "half", "int8", "simplify", "nms", "fraction"],
|
|
139
163
|
],
|
|
140
|
-
["CoreML", "coreml", ".mlpackage", True, False, ["batch", "half", "int8", "nms"]],
|
|
164
|
+
["CoreML", "coreml", ".mlpackage", True, False, ["batch", "dynamic", "half", "int8", "nms"]],
|
|
141
165
|
["TensorFlow SavedModel", "saved_model", "_saved_model", True, True, ["batch", "int8", "keras", "nms"]],
|
|
142
166
|
["TensorFlow GraphDef", "pb", ".pb", True, True, ["batch"]],
|
|
143
167
|
["TensorFlow Lite", "tflite", ".tflite", True, False, ["batch", "half", "int8", "nms", "fraction"]],
|
|
@@ -148,13 +172,42 @@ def export_formats():
|
|
|
148
172
|
["NCNN", "ncnn", "_ncnn_model", True, True, ["batch", "half"]],
|
|
149
173
|
["IMX", "imx", "_imx_model", True, True, ["int8", "fraction", "nms"]],
|
|
150
174
|
["RKNN", "rknn", "_rknn_model", False, False, ["batch", "name"]],
|
|
175
|
+
["ExecuTorch", "executorch", "_executorch_model", True, False, ["batch"]],
|
|
176
|
+
["Axelera", "axelera", "_axelera_model", False, False, ["batch", "int8", "fraction"]],
|
|
151
177
|
]
|
|
152
178
|
return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU", "Arguments"], zip(*x)))
|
|
153
179
|
|
|
154
180
|
|
|
181
|
+
def best_onnx_opset(onnx, cuda=False) -> int:
|
|
182
|
+
"""Return max ONNX opset for this torch version with ONNX fallback."""
|
|
183
|
+
if TORCH_2_4: # _constants.ONNX_MAX_OPSET first defined in torch 1.13
|
|
184
|
+
opset = torch.onnx.utils._constants.ONNX_MAX_OPSET - 1 # use second-latest version for safety
|
|
185
|
+
if cuda:
|
|
186
|
+
opset -= 2 # fix CUDA ONNXRuntime NMS squeeze op errors
|
|
187
|
+
else:
|
|
188
|
+
version = ".".join(TORCH_VERSION.split(".")[:2])
|
|
189
|
+
opset = {
|
|
190
|
+
"1.8": 12,
|
|
191
|
+
"1.9": 12,
|
|
192
|
+
"1.10": 13,
|
|
193
|
+
"1.11": 14,
|
|
194
|
+
"1.12": 15,
|
|
195
|
+
"1.13": 17,
|
|
196
|
+
"2.0": 17, # reduced from 18 to fix ONNX errors
|
|
197
|
+
"2.1": 17, # reduced from 19
|
|
198
|
+
"2.2": 17, # reduced from 19
|
|
199
|
+
"2.3": 17, # reduced from 19
|
|
200
|
+
"2.4": 20,
|
|
201
|
+
"2.5": 20,
|
|
202
|
+
"2.6": 20,
|
|
203
|
+
"2.7": 20,
|
|
204
|
+
"2.8": 23,
|
|
205
|
+
}.get(version, 12)
|
|
206
|
+
return min(opset, onnx.defs.onnx_opset_version())
|
|
207
|
+
|
|
208
|
+
|
|
155
209
|
def validate_args(format, passed_args, valid_args):
|
|
156
|
-
"""
|
|
157
|
-
Validate arguments based on the export format.
|
|
210
|
+
"""Validate arguments based on the export format.
|
|
158
211
|
|
|
159
212
|
Args:
|
|
160
213
|
format (str): The export format.
|
|
@@ -175,15 +228,6 @@ def validate_args(format, passed_args, valid_args):
|
|
|
175
228
|
assert arg in valid_args, f"ERROR ❌️ argument '{arg}' is not supported for format='{format}'"
|
|
176
229
|
|
|
177
230
|
|
|
178
|
-
def gd_outputs(gd):
|
|
179
|
-
"""Return TensorFlow GraphDef model output node names."""
|
|
180
|
-
name_list, input_list = [], []
|
|
181
|
-
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
|
|
182
|
-
name_list.append(node.name)
|
|
183
|
-
input_list.extend(node.input)
|
|
184
|
-
return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))
|
|
185
|
-
|
|
186
|
-
|
|
187
231
|
def try_export(inner_func):
|
|
188
232
|
"""YOLO export decorator, i.e. @try_export."""
|
|
189
233
|
inner_args = get_default_args(inner_func)
|
|
@@ -194,9 +238,12 @@ def try_export(inner_func):
|
|
|
194
238
|
dt = 0.0
|
|
195
239
|
try:
|
|
196
240
|
with Profile() as dt:
|
|
197
|
-
f
|
|
198
|
-
|
|
199
|
-
|
|
241
|
+
f = inner_func(*args, **kwargs) # exported file/dir or tuple of (file/dir, *)
|
|
242
|
+
path = f if isinstance(f, (str, Path)) else f[0]
|
|
243
|
+
mb = file_size(path)
|
|
244
|
+
assert mb > 0.0, "0.0 MB output model size"
|
|
245
|
+
LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{path}' ({mb:.1f} MB)")
|
|
246
|
+
return f
|
|
200
247
|
except Exception as e:
|
|
201
248
|
LOGGER.error(f"{prefix} export failure {dt.t:.1f}s: {e}")
|
|
202
249
|
raise e
|
|
@@ -205,8 +252,7 @@ def try_export(inner_func):
|
|
|
205
252
|
|
|
206
253
|
|
|
207
254
|
class Exporter:
|
|
208
|
-
"""
|
|
209
|
-
A class for exporting YOLO models to various formats.
|
|
255
|
+
"""A class for exporting YOLO models to various formats.
|
|
210
256
|
|
|
211
257
|
This class provides functionality to export YOLO models to different formats including ONNX, TensorRT, CoreML,
|
|
212
258
|
TensorFlow, and others. It handles format validation, device selection, model preparation, and the actual export
|
|
@@ -256,8 +302,7 @@ class Exporter:
|
|
|
256
302
|
"""
|
|
257
303
|
|
|
258
304
|
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
|
259
|
-
"""
|
|
260
|
-
Initialize the Exporter class.
|
|
305
|
+
"""Initialize the Exporter class.
|
|
261
306
|
|
|
262
307
|
Args:
|
|
263
308
|
cfg (str, optional): Path to a configuration file.
|
|
@@ -269,7 +314,11 @@ class Exporter:
|
|
|
269
314
|
callbacks.add_integration_callbacks(self)
|
|
270
315
|
|
|
271
316
|
def __call__(self, model=None) -> str:
|
|
272
|
-
"""
|
|
317
|
+
"""Export a model and return the final exported path as a string.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
(str): Path to the exported file or directory (the last export artifact).
|
|
321
|
+
"""
|
|
273
322
|
t = time.time()
|
|
274
323
|
fmt = self.args.format.lower() # to lowercase
|
|
275
324
|
if fmt in {"tensorrt", "trt"}: # 'engine' aliases
|
|
@@ -284,15 +333,32 @@ class Exporter:
|
|
|
284
333
|
# Get the closest match if format is invalid
|
|
285
334
|
matches = difflib.get_close_matches(fmt, fmts, n=1, cutoff=0.6) # 60% similarity required to match
|
|
286
335
|
if not matches:
|
|
287
|
-
|
|
336
|
+
msg = "Model is already in PyTorch format." if fmt == "pt" else f"Invalid export format='{fmt}'."
|
|
337
|
+
raise ValueError(f"{msg} Valid formats are {fmts}")
|
|
288
338
|
LOGGER.warning(f"Invalid export format='{fmt}', updating to format='{matches[0]}'")
|
|
289
339
|
fmt = matches[0]
|
|
290
340
|
flags = [x == fmt for x in fmts]
|
|
291
341
|
if sum(flags) != 1:
|
|
292
342
|
raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
|
|
293
|
-
(
|
|
294
|
-
|
|
295
|
-
|
|
343
|
+
(
|
|
344
|
+
jit,
|
|
345
|
+
onnx,
|
|
346
|
+
xml,
|
|
347
|
+
engine,
|
|
348
|
+
coreml,
|
|
349
|
+
saved_model,
|
|
350
|
+
pb,
|
|
351
|
+
tflite,
|
|
352
|
+
edgetpu,
|
|
353
|
+
tfjs,
|
|
354
|
+
paddle,
|
|
355
|
+
mnn,
|
|
356
|
+
ncnn,
|
|
357
|
+
imx,
|
|
358
|
+
rknn,
|
|
359
|
+
executorch,
|
|
360
|
+
axelera,
|
|
361
|
+
) = flags # export booleans
|
|
296
362
|
|
|
297
363
|
is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs))
|
|
298
364
|
|
|
@@ -302,9 +368,10 @@ class Exporter:
|
|
|
302
368
|
LOGGER.warning("TensorRT requires GPU export, automatically assigning device=0")
|
|
303
369
|
self.args.device = "0"
|
|
304
370
|
if engine and "dla" in str(self.args.device): # convert int/list to str first
|
|
305
|
-
|
|
371
|
+
device_str = str(self.args.device)
|
|
372
|
+
dla = device_str.rsplit(":", 1)[-1]
|
|
306
373
|
self.args.device = "0" # update device to "0"
|
|
307
|
-
assert dla in {"0", "1"}, f"Expected
|
|
374
|
+
assert dla in {"0", "1"}, f"Expected device 'dla:0' or 'dla:1', but got {device_str}."
|
|
308
375
|
if imx and self.args.device is None and torch.cuda.is_available():
|
|
309
376
|
LOGGER.warning("Exporting on CPU while CUDA is available, setting device=0 for faster export on GPU.")
|
|
310
377
|
self.args.device = "0" # update device to "0"
|
|
@@ -313,23 +380,37 @@ class Exporter:
|
|
|
313
380
|
# Argument compatibility checks
|
|
314
381
|
fmt_keys = fmts_dict["Arguments"][flags.index(True) + 1]
|
|
315
382
|
validate_args(fmt, self.args, fmt_keys)
|
|
383
|
+
if axelera:
|
|
384
|
+
if not IS_PYTHON_3_10:
|
|
385
|
+
raise SystemError("Axelera export only supported on Python 3.10.")
|
|
386
|
+
if not self.args.int8:
|
|
387
|
+
LOGGER.warning("Setting int8=True for Axelera mixed-precision export.")
|
|
388
|
+
self.args.int8 = True
|
|
389
|
+
if model.task not in {"detect"}:
|
|
390
|
+
raise ValueError("Axelera export only supported for detection models.")
|
|
391
|
+
if not self.args.data:
|
|
392
|
+
self.args.data = "coco128.yaml" # Axelera default to coco128.yaml
|
|
316
393
|
if imx:
|
|
317
394
|
if not self.args.int8:
|
|
318
395
|
LOGGER.warning("IMX export requires int8=True, setting int8=True.")
|
|
319
396
|
self.args.int8 = True
|
|
320
|
-
if not self.args.nms:
|
|
397
|
+
if not self.args.nms and model.task in {"detect", "pose", "segment"}:
|
|
321
398
|
LOGGER.warning("IMX export requires nms=True, setting nms=True.")
|
|
322
399
|
self.args.nms = True
|
|
323
|
-
if model.task not in {"detect", "pose"}:
|
|
324
|
-
raise ValueError(
|
|
400
|
+
if model.task not in {"detect", "pose", "classify", "segment"}:
|
|
401
|
+
raise ValueError(
|
|
402
|
+
"IMX export only supported for detection, pose estimation, classification, and segmentation models."
|
|
403
|
+
)
|
|
325
404
|
if not hasattr(model, "names"):
|
|
326
405
|
model.names = default_class_names()
|
|
327
406
|
model.names = check_class_names(model.names)
|
|
328
407
|
if self.args.half and self.args.int8:
|
|
329
408
|
LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
|
|
330
409
|
self.args.half = False
|
|
331
|
-
if self.args.half and
|
|
332
|
-
LOGGER.warning(
|
|
410
|
+
if self.args.half and jit and self.device.type == "cpu":
|
|
411
|
+
LOGGER.warning(
|
|
412
|
+
"half=True only compatible with GPU export for TorchScript, i.e. use device=0, setting half=False."
|
|
413
|
+
)
|
|
333
414
|
self.args.half = False
|
|
334
415
|
self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size
|
|
335
416
|
if self.args.optimize:
|
|
@@ -346,18 +427,18 @@ class Exporter:
|
|
|
346
427
|
assert self.args.name in RKNN_CHIPS, (
|
|
347
428
|
f"Invalid processor name '{self.args.name}' for Rockchip RKNN export. Valid names are {RKNN_CHIPS}."
|
|
348
429
|
)
|
|
349
|
-
if self.args.int8 and tflite:
|
|
350
|
-
assert not getattr(model, "end2end", False), "TFLite INT8 export not supported for end2end models."
|
|
351
430
|
if self.args.nms:
|
|
352
431
|
assert not isinstance(model, ClassificationModel), "'nms=True' is not valid for classification models."
|
|
353
432
|
assert not tflite or not ARM64 or not LINUX, "TFLite export with NMS unsupported on ARM64 Linux"
|
|
354
|
-
|
|
433
|
+
assert not is_tf_format or TORCH_1_13, "TensorFlow exports with NMS require torch>=1.13"
|
|
434
|
+
assert not onnx or TORCH_1_13, "ONNX export with NMS requires torch>=1.13"
|
|
435
|
+
if getattr(model, "end2end", False) or isinstance(model.model[-1], RTDETRDecoder):
|
|
355
436
|
LOGGER.warning("'nms=True' is not available for end2end models. Forcing 'nms=False'.")
|
|
356
437
|
self.args.nms = False
|
|
357
438
|
self.args.conf = self.args.conf or 0.25 # set conf default value for nms export
|
|
358
|
-
if (engine or self.args.nms) and self.args.dynamic and self.args.batch == 1:
|
|
439
|
+
if (engine or coreml or self.args.nms) and self.args.dynamic and self.args.batch == 1:
|
|
359
440
|
LOGGER.warning(
|
|
360
|
-
f"'dynamic=True' model with '{'nms=True' if self.args.nms else 'format=
|
|
441
|
+
f"'dynamic=True' model with '{'nms=True' if self.args.nms else f'format={self.args.format}'}' requires max batch size, i.e. 'batch=16'"
|
|
361
442
|
)
|
|
362
443
|
if edgetpu:
|
|
363
444
|
if not LINUX or ARM64:
|
|
@@ -408,9 +489,13 @@ class Exporter:
|
|
|
408
489
|
model = model.fuse()
|
|
409
490
|
|
|
410
491
|
if imx:
|
|
411
|
-
from ultralytics.utils.
|
|
492
|
+
from ultralytics.utils.export.imx import FXModel
|
|
412
493
|
|
|
413
|
-
model = FXModel(model)
|
|
494
|
+
model = FXModel(model, self.imgsz)
|
|
495
|
+
if tflite or edgetpu:
|
|
496
|
+
from ultralytics.utils.export.tensorflow import tf_wrapper
|
|
497
|
+
|
|
498
|
+
model = tf_wrapper(model)
|
|
414
499
|
for m in model.modules():
|
|
415
500
|
if isinstance(m, Classify):
|
|
416
501
|
m.export = True
|
|
@@ -425,27 +510,13 @@ class Exporter:
|
|
|
425
510
|
elif isinstance(m, C2f) and not is_tf_format:
|
|
426
511
|
# EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
|
|
427
512
|
m.forward = m.forward_split
|
|
428
|
-
if isinstance(m, Detect) and imx:
|
|
429
|
-
from ultralytics.utils.tal import make_anchors
|
|
430
|
-
|
|
431
|
-
m.anchors, m.strides = (
|
|
432
|
-
x.transpose(0, 1)
|
|
433
|
-
for x in make_anchors(
|
|
434
|
-
torch.cat([s / m.stride.unsqueeze(-1) for s in self.imgsz], dim=1), m.stride, 0.5
|
|
435
|
-
)
|
|
436
|
-
)
|
|
437
513
|
|
|
438
514
|
y = None
|
|
439
515
|
for _ in range(2): # dry runs
|
|
440
516
|
y = NMSModel(model, self.args)(im) if self.args.nms and not coreml and not imx else model(im)
|
|
441
|
-
if self.args.half and onnx and self.device.type != "cpu":
|
|
517
|
+
if self.args.half and (onnx or jit) and self.device.type != "cpu":
|
|
442
518
|
im, model = im.half(), model.half() # to FP16
|
|
443
519
|
|
|
444
|
-
# Filter warnings
|
|
445
|
-
warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # suppress TracerWarning
|
|
446
|
-
warnings.filterwarnings("ignore", category=UserWarning) # suppress shape prim::Constant missing ONNX warning
|
|
447
|
-
warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress CoreML np.bool deprecation warning
|
|
448
|
-
|
|
449
520
|
# Assign
|
|
450
521
|
self.im = im
|
|
451
522
|
self.model = model
|
|
@@ -477,6 +548,8 @@ class Exporter:
|
|
|
477
548
|
self.metadata["dla"] = dla # make sure `AutoBackend` uses correct dla device if it has one
|
|
478
549
|
if model.task == "pose":
|
|
479
550
|
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
|
|
551
|
+
if hasattr(model, "kpt_names"):
|
|
552
|
+
self.metadata["kpt_names"] = model.kpt_names
|
|
480
553
|
|
|
481
554
|
LOGGER.info(
|
|
482
555
|
f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
|
|
@@ -485,37 +558,41 @@ class Exporter:
|
|
|
485
558
|
self.run_callbacks("on_export_start")
|
|
486
559
|
# Exports
|
|
487
560
|
f = [""] * len(fmts) # exported filenames
|
|
488
|
-
if jit
|
|
489
|
-
f[0]
|
|
561
|
+
if jit: # TorchScript
|
|
562
|
+
f[0] = self.export_torchscript()
|
|
490
563
|
if engine: # TensorRT required before ONNX
|
|
491
|
-
f[1]
|
|
564
|
+
f[1] = self.export_engine(dla=dla)
|
|
492
565
|
if onnx: # ONNX
|
|
493
|
-
f[2]
|
|
566
|
+
f[2] = self.export_onnx()
|
|
494
567
|
if xml: # OpenVINO
|
|
495
|
-
f[3]
|
|
568
|
+
f[3] = self.export_openvino()
|
|
496
569
|
if coreml: # CoreML
|
|
497
|
-
f[4]
|
|
570
|
+
f[4] = self.export_coreml()
|
|
498
571
|
if is_tf_format: # TensorFlow formats
|
|
499
572
|
self.args.int8 |= edgetpu
|
|
500
573
|
f[5], keras_model = self.export_saved_model()
|
|
501
574
|
if pb or tfjs: # pb prerequisite to tfjs
|
|
502
|
-
f[6]
|
|
575
|
+
f[6] = self.export_pb(keras_model=keras_model)
|
|
503
576
|
if tflite:
|
|
504
|
-
f[7]
|
|
577
|
+
f[7] = self.export_tflite()
|
|
505
578
|
if edgetpu:
|
|
506
|
-
f[8]
|
|
579
|
+
f[8] = self.export_edgetpu(tflite_model=Path(f[5]) / f"{self.file.stem}_full_integer_quant.tflite")
|
|
507
580
|
if tfjs:
|
|
508
|
-
f[9]
|
|
581
|
+
f[9] = self.export_tfjs()
|
|
509
582
|
if paddle: # PaddlePaddle
|
|
510
|
-
f[10]
|
|
583
|
+
f[10] = self.export_paddle()
|
|
511
584
|
if mnn: # MNN
|
|
512
|
-
f[11]
|
|
585
|
+
f[11] = self.export_mnn()
|
|
513
586
|
if ncnn: # NCNN
|
|
514
|
-
f[12]
|
|
587
|
+
f[12] = self.export_ncnn()
|
|
515
588
|
if imx:
|
|
516
|
-
f[13]
|
|
589
|
+
f[13] = self.export_imx()
|
|
517
590
|
if rknn:
|
|
518
|
-
f[14]
|
|
591
|
+
f[14] = self.export_rknn()
|
|
592
|
+
if executorch:
|
|
593
|
+
f[15] = self.export_executorch()
|
|
594
|
+
if axelera:
|
|
595
|
+
f[16] = self.export_axelera()
|
|
519
596
|
|
|
520
597
|
# Finish
|
|
521
598
|
f = [str(x) for x in f if x] # filter out '' and None
|
|
@@ -540,7 +617,7 @@ class Exporter:
|
|
|
540
617
|
)
|
|
541
618
|
|
|
542
619
|
self.run_callbacks("on_export_end")
|
|
543
|
-
return f #
|
|
620
|
+
return f # path to final export artifact
|
|
544
621
|
|
|
545
622
|
def get_int8_calibration_dataloader(self, prefix=""):
|
|
546
623
|
"""Build and return a dataloader for calibration of INT8 models."""
|
|
@@ -561,7 +638,9 @@ class Exporter:
|
|
|
561
638
|
f"The calibration dataset ({n} images) must have at least as many images as the batch size "
|
|
562
639
|
f"('batch={self.args.batch}')."
|
|
563
640
|
)
|
|
564
|
-
elif n <
|
|
641
|
+
elif self.args.format == "axelera" and n < 100:
|
|
642
|
+
LOGGER.warning(f"{prefix} >100 images required for Axelera calibration, found {n} images.")
|
|
643
|
+
elif self.args.format != "axelera" and n < 300:
|
|
565
644
|
LOGGER.warning(f"{prefix} >300 images recommended for INT8 calibration, found {n} images.")
|
|
566
645
|
return build_dataloader(dataset, batch=self.args.batch, workers=0, drop_last=True) # required for batch loading
|
|
567
646
|
|
|
@@ -580,21 +659,24 @@ class Exporter:
|
|
|
580
659
|
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
|
|
581
660
|
else:
|
|
582
661
|
ts.save(str(f), _extra_files=extra_files)
|
|
583
|
-
return f
|
|
662
|
+
return f
|
|
584
663
|
|
|
585
664
|
@try_export
|
|
586
665
|
def export_onnx(self, prefix=colorstr("ONNX:")):
|
|
587
666
|
"""Export YOLO model to ONNX format."""
|
|
588
|
-
requirements = ["onnx>=1.12.0"]
|
|
667
|
+
requirements = ["onnx>=1.12.0,<2.0.0"]
|
|
589
668
|
if self.args.simplify:
|
|
590
|
-
requirements += ["onnxslim>=0.1.
|
|
669
|
+
requirements += ["onnxslim>=0.1.71", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
|
|
591
670
|
check_requirements(requirements)
|
|
592
|
-
import onnx
|
|
671
|
+
import onnx
|
|
672
|
+
|
|
673
|
+
opset = self.args.opset or best_onnx_opset(onnx, cuda="cuda" in self.device.type)
|
|
674
|
+
LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset}...")
|
|
675
|
+
if self.args.nms:
|
|
676
|
+
assert TORCH_1_13, f"'nms=True' ONNX export requires torch>=1.13 (found torch=={TORCH_VERSION})"
|
|
593
677
|
|
|
594
|
-
opset_version = self.args.opset or get_latest_opset()
|
|
595
|
-
LOGGER.info(f"\n{prefix} starting export with onnx {onnx.__version__} opset {opset_version}...")
|
|
596
678
|
f = str(self.file.with_suffix(".onnx"))
|
|
597
|
-
output_names = ["output0", "output1"] if
|
|
679
|
+
output_names = ["output0", "output1"] if self.model.task == "segment" else ["output0"]
|
|
598
680
|
dynamic = self.args.dynamic
|
|
599
681
|
if dynamic:
|
|
600
682
|
dynamic = {"images": {0: "batch", 2: "height", 3: "width"}} # shape(1,3,640,640)
|
|
@@ -606,14 +688,14 @@ class Exporter:
|
|
|
606
688
|
if self.args.nms: # only batch size is dynamic with NMS
|
|
607
689
|
dynamic["output0"].pop(2)
|
|
608
690
|
if self.args.nms and self.model.task == "obb":
|
|
609
|
-
self.args.opset =
|
|
691
|
+
self.args.opset = opset # for NMSModel
|
|
610
692
|
|
|
611
693
|
with arange_patch(self.args):
|
|
612
|
-
|
|
694
|
+
torch2onnx(
|
|
613
695
|
NMSModel(self.model, self.args) if self.args.nms else self.model,
|
|
614
696
|
self.im,
|
|
615
697
|
f,
|
|
616
|
-
opset=
|
|
698
|
+
opset=opset,
|
|
617
699
|
input_names=["images"],
|
|
618
700
|
output_names=output_names,
|
|
619
701
|
dynamic=dynamic or None,
|
|
@@ -638,8 +720,23 @@ class Exporter:
|
|
|
638
720
|
meta = model_onnx.metadata_props.add()
|
|
639
721
|
meta.key, meta.value = k, str(v)
|
|
640
722
|
|
|
723
|
+
# IR version
|
|
724
|
+
if getattr(model_onnx, "ir_version", 0) > 10:
|
|
725
|
+
LOGGER.info(f"{prefix} limiting IR version {model_onnx.ir_version} to 10 for ONNXRuntime compatibility...")
|
|
726
|
+
model_onnx.ir_version = 10
|
|
727
|
+
|
|
728
|
+
# FP16 conversion for CPU export (GPU exports are already FP16 from model.half() during tracing)
|
|
729
|
+
if self.args.half and self.args.format == "onnx" and self.device.type == "cpu":
|
|
730
|
+
try:
|
|
731
|
+
from onnxruntime.transformers import float16
|
|
732
|
+
|
|
733
|
+
LOGGER.info(f"{prefix} converting to FP16...")
|
|
734
|
+
model_onnx = float16.convert_float_to_float16(model_onnx, keep_io_types=True)
|
|
735
|
+
except Exception as e:
|
|
736
|
+
LOGGER.warning(f"{prefix} FP16 conversion failure: {e}")
|
|
737
|
+
|
|
641
738
|
onnx.save(model_onnx, f)
|
|
642
|
-
return f
|
|
739
|
+
return f
|
|
643
740
|
|
|
644
741
|
@try_export
|
|
645
742
|
def export_openvino(self, prefix=colorstr("OpenVINO:")):
|
|
@@ -649,7 +746,7 @@ class Exporter:
|
|
|
649
746
|
import openvino as ov
|
|
650
747
|
|
|
651
748
|
LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
|
|
652
|
-
assert
|
|
749
|
+
assert TORCH_2_1, f"OpenVINO export requires torch>=2.1 but torch=={TORCH_VERSION} is installed"
|
|
653
750
|
ov_model = ov.convert_model(
|
|
654
751
|
NMSModel(self.model, self.args) if self.args.nms else self.model,
|
|
655
752
|
input=None if self.args.dynamic else [self.im.shape],
|
|
@@ -678,13 +775,6 @@ class Exporter:
|
|
|
678
775
|
check_requirements("nncf>=2.14.0")
|
|
679
776
|
import nncf
|
|
680
777
|
|
|
681
|
-
def transform_fn(data_item) -> np.ndarray:
|
|
682
|
-
"""Quantization transform function."""
|
|
683
|
-
data_item: torch.Tensor = data_item["img"] if isinstance(data_item, dict) else data_item
|
|
684
|
-
assert data_item.dtype == torch.uint8, "Input image must be uint8 for the quantization preprocessing"
|
|
685
|
-
im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0-255 to 0.0-1.0
|
|
686
|
-
return np.expand_dims(im, 0) if im.ndim == 3 else im
|
|
687
|
-
|
|
688
778
|
# Generate calibration data for integer quantization
|
|
689
779
|
ignored_scope = None
|
|
690
780
|
if isinstance(self.model.model[-1], Detect):
|
|
@@ -703,18 +793,18 @@ class Exporter:
|
|
|
703
793
|
|
|
704
794
|
quantized_ov_model = nncf.quantize(
|
|
705
795
|
model=ov_model,
|
|
706
|
-
calibration_dataset=nncf.Dataset(self.get_int8_calibration_dataloader(prefix),
|
|
796
|
+
calibration_dataset=nncf.Dataset(self.get_int8_calibration_dataloader(prefix), self._transform_fn),
|
|
707
797
|
preset=nncf.QuantizationPreset.MIXED,
|
|
708
798
|
ignored_scope=ignored_scope,
|
|
709
799
|
)
|
|
710
800
|
serialize(quantized_ov_model, fq_ov)
|
|
711
|
-
return fq
|
|
801
|
+
return fq
|
|
712
802
|
|
|
713
803
|
f = str(self.file).replace(self.file.suffix, f"_openvino_model{os.sep}")
|
|
714
804
|
f_ov = str(Path(f) / self.file.with_suffix(".xml").name)
|
|
715
805
|
|
|
716
806
|
serialize(ov_model, f_ov)
|
|
717
|
-
return f
|
|
807
|
+
return f
|
|
718
808
|
|
|
719
809
|
@try_export
|
|
720
810
|
def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
|
|
@@ -730,23 +820,24 @@ class Exporter:
|
|
|
730
820
|
"x2paddle",
|
|
731
821
|
)
|
|
732
822
|
)
|
|
733
|
-
import x2paddle
|
|
734
|
-
from x2paddle.convert import pytorch2paddle
|
|
823
|
+
import x2paddle
|
|
824
|
+
from x2paddle.convert import pytorch2paddle
|
|
735
825
|
|
|
736
826
|
LOGGER.info(f"\n{prefix} starting export with X2Paddle {x2paddle.__version__}...")
|
|
737
827
|
f = str(self.file).replace(self.file.suffix, f"_paddle_model{os.sep}")
|
|
738
828
|
|
|
739
829
|
pytorch2paddle(module=self.model, save_dir=f, jit_type="trace", input_examples=[self.im]) # export
|
|
740
830
|
YAML.save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
|
|
741
|
-
return f
|
|
831
|
+
return f
|
|
742
832
|
|
|
743
833
|
@try_export
|
|
744
834
|
def export_mnn(self, prefix=colorstr("MNN:")):
|
|
745
835
|
"""Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN."""
|
|
746
|
-
|
|
836
|
+
assert TORCH_1_10, "MNN export requires torch>=1.10.0 to avoid segmentation faults"
|
|
837
|
+
f_onnx = self.export_onnx() # get onnx model first
|
|
747
838
|
|
|
748
839
|
check_requirements("MNN>=2.9.6")
|
|
749
|
-
import MNN
|
|
840
|
+
import MNN
|
|
750
841
|
from MNN.tools import mnnconvert
|
|
751
842
|
|
|
752
843
|
# Setup and checks
|
|
@@ -763,98 +854,73 @@ class Exporter:
|
|
|
763
854
|
convert_scratch = Path(self.file.parent / ".__convert_external_data.bin")
|
|
764
855
|
if convert_scratch.exists():
|
|
765
856
|
convert_scratch.unlink()
|
|
766
|
-
return f
|
|
857
|
+
return f
|
|
767
858
|
|
|
768
859
|
@try_export
|
|
769
860
|
def export_ncnn(self, prefix=colorstr("NCNN:")):
|
|
770
861
|
"""Export YOLO model to NCNN format using PNNX https://github.com/pnnx/pnnx."""
|
|
771
862
|
check_requirements("ncnn", cmds="--no-deps") # no deps to avoid installing opencv-python
|
|
772
|
-
|
|
863
|
+
check_requirements("pnnx")
|
|
864
|
+
import ncnn
|
|
865
|
+
import pnnx
|
|
773
866
|
|
|
774
|
-
LOGGER.info(f"\n{prefix} starting export with NCNN {ncnn.__version__}...")
|
|
867
|
+
LOGGER.info(f"\n{prefix} starting export with NCNN {ncnn.__version__} and PNNX {pnnx.__version__}...")
|
|
775
868
|
f = Path(str(self.file).replace(self.file.suffix, f"_ncnn_model{os.sep}"))
|
|
776
|
-
f_ts = self.file.with_suffix(".torchscript")
|
|
777
869
|
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
)
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
except Exception as e:
|
|
793
|
-
release = "20240410"
|
|
794
|
-
asset = f"pnnx-{release}-{system}.zip"
|
|
795
|
-
LOGGER.warning(f"{prefix} PNNX GitHub assets not found: {e}, using default {asset}")
|
|
796
|
-
unzip_dir = safe_download(f"https://github.com/pnnx/pnnx/releases/download/{release}/{asset}", delete=True)
|
|
797
|
-
if check_is_path_safe(Path.cwd(), unzip_dir): # avoid path traversal security vulnerability
|
|
798
|
-
shutil.move(src=unzip_dir / name, dst=pnnx) # move binary to ROOT
|
|
799
|
-
pnnx.chmod(0o777) # set read, write, and execute permissions for everyone
|
|
800
|
-
shutil.rmtree(unzip_dir) # delete unzip dir
|
|
801
|
-
|
|
802
|
-
ncnn_args = [
|
|
803
|
-
f"ncnnparam={f / 'model.ncnn.param'}",
|
|
804
|
-
f"ncnnbin={f / 'model.ncnn.bin'}",
|
|
805
|
-
f"ncnnpy={f / 'model_ncnn.py'}",
|
|
806
|
-
]
|
|
807
|
-
|
|
808
|
-
pnnx_args = [
|
|
809
|
-
f"pnnxparam={f / 'model.pnnx.param'}",
|
|
810
|
-
f"pnnxbin={f / 'model.pnnx.bin'}",
|
|
811
|
-
f"pnnxpy={f / 'model_pnnx.py'}",
|
|
812
|
-
f"pnnxonnx={f / 'model.pnnx.onnx'}",
|
|
813
|
-
]
|
|
814
|
-
|
|
815
|
-
cmd = [
|
|
816
|
-
str(pnnx),
|
|
817
|
-
str(f_ts),
|
|
818
|
-
*ncnn_args,
|
|
819
|
-
*pnnx_args,
|
|
820
|
-
f"fp16={int(self.args.half)}",
|
|
821
|
-
f"device={self.device.type}",
|
|
822
|
-
f'inputshape="{[self.args.batch, 3, *self.imgsz]}"',
|
|
823
|
-
]
|
|
870
|
+
ncnn_args = dict(
|
|
871
|
+
ncnnparam=(f / "model.ncnn.param").as_posix(),
|
|
872
|
+
ncnnbin=(f / "model.ncnn.bin").as_posix(),
|
|
873
|
+
ncnnpy=(f / "model_ncnn.py").as_posix(),
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
pnnx_args = dict(
|
|
877
|
+
ptpath=(f / "model.pt").as_posix(),
|
|
878
|
+
pnnxparam=(f / "model.pnnx.param").as_posix(),
|
|
879
|
+
pnnxbin=(f / "model.pnnx.bin").as_posix(),
|
|
880
|
+
pnnxpy=(f / "model_pnnx.py").as_posix(),
|
|
881
|
+
pnnxonnx=(f / "model.pnnx.onnx").as_posix(),
|
|
882
|
+
)
|
|
883
|
+
|
|
824
884
|
f.mkdir(exist_ok=True) # make ncnn_model directory
|
|
825
|
-
|
|
826
|
-
subprocess.run(cmd, check=True)
|
|
885
|
+
pnnx.export(self.model, inputs=self.im, **ncnn_args, **pnnx_args, fp16=self.args.half, device=self.device.type)
|
|
827
886
|
|
|
828
|
-
|
|
829
|
-
pnnx_files = [x.rsplit("=", 1)[-1] for x in pnnx_args]
|
|
830
|
-
for f_debug in ("debug.bin", "debug.param", "debug2.bin", "debug2.param", *pnnx_files):
|
|
887
|
+
for f_debug in ("debug.bin", "debug.param", "debug2.bin", "debug2.param", *pnnx_args.values()):
|
|
831
888
|
Path(f_debug).unlink(missing_ok=True)
|
|
832
889
|
|
|
833
890
|
YAML.save(f / "metadata.yaml", self.metadata) # add metadata.yaml
|
|
834
|
-
return str(f)
|
|
891
|
+
return str(f)
|
|
835
892
|
|
|
836
893
|
@try_export
|
|
837
894
|
def export_coreml(self, prefix=colorstr("CoreML:")):
|
|
838
895
|
"""Export YOLO model to CoreML format."""
|
|
839
896
|
mlmodel = self.args.format.lower() == "mlmodel" # legacy *.mlmodel export format requested
|
|
840
|
-
check_requirements(
|
|
841
|
-
|
|
897
|
+
check_requirements(
|
|
898
|
+
["coremltools>=9.0", "numpy>=1.14.5,<=2.3.5"]
|
|
899
|
+
) # latest numpy 2.4.0rc1 breaks coremltools exports
|
|
900
|
+
import coremltools as ct
|
|
842
901
|
|
|
843
902
|
LOGGER.info(f"\n{prefix} starting export with coremltools {ct.__version__}...")
|
|
844
903
|
assert not WINDOWS, "CoreML export is not supported on Windows, please run on macOS or Linux."
|
|
845
|
-
assert
|
|
904
|
+
assert TORCH_1_11, "CoreML export requires torch>=1.11"
|
|
905
|
+
if self.args.batch > 1:
|
|
906
|
+
assert self.args.dynamic, (
|
|
907
|
+
"batch sizes > 1 are not supported without 'dynamic=True' for CoreML export. Please retry at 'dynamic=True'."
|
|
908
|
+
)
|
|
909
|
+
if self.args.dynamic:
|
|
910
|
+
assert not self.args.nms, (
|
|
911
|
+
"'nms=True' cannot be used together with 'dynamic=True' for CoreML export. Please disable one of them."
|
|
912
|
+
)
|
|
913
|
+
assert self.model.task != "classify", "'dynamic=True' is not supported for CoreML classification models."
|
|
846
914
|
f = self.file.with_suffix(".mlmodel" if mlmodel else ".mlpackage")
|
|
847
915
|
if f.is_dir():
|
|
848
916
|
shutil.rmtree(f)
|
|
849
917
|
|
|
850
|
-
bias = [0.0, 0.0, 0.0]
|
|
851
|
-
scale = 1 / 255
|
|
852
918
|
classifier_config = None
|
|
853
919
|
if self.model.task == "classify":
|
|
854
920
|
classifier_config = ct.ClassifierConfig(list(self.model.names.values()))
|
|
855
921
|
model = self.model
|
|
856
922
|
elif self.model.task == "detect":
|
|
857
|
-
model = IOSDetectModel(self.model, self.im) if self.args.nms else self.model
|
|
923
|
+
model = IOSDetectModel(self.model, self.im, mlprogram=not mlmodel) if self.args.nms else self.model
|
|
858
924
|
else:
|
|
859
925
|
if self.args.nms:
|
|
860
926
|
LOGGER.warning(f"{prefix} 'nms=True' is only available for Detect models like 'yolo11n.pt'.")
|
|
@@ -862,13 +928,26 @@ class Exporter:
|
|
|
862
928
|
model = self.model
|
|
863
929
|
ts = torch.jit.trace(model.eval(), self.im, strict=False) # TorchScript model
|
|
864
930
|
|
|
931
|
+
if self.args.dynamic:
|
|
932
|
+
input_shape = ct.Shape(
|
|
933
|
+
shape=(
|
|
934
|
+
ct.RangeDim(lower_bound=1, upper_bound=self.args.batch, default=1),
|
|
935
|
+
self.im.shape[1],
|
|
936
|
+
ct.RangeDim(lower_bound=32, upper_bound=self.imgsz[0] * 2, default=self.imgsz[0]),
|
|
937
|
+
ct.RangeDim(lower_bound=32, upper_bound=self.imgsz[1] * 2, default=self.imgsz[1]),
|
|
938
|
+
)
|
|
939
|
+
)
|
|
940
|
+
inputs = [ct.TensorType("image", shape=input_shape)]
|
|
941
|
+
else:
|
|
942
|
+
inputs = [ct.ImageType("image", shape=self.im.shape, scale=1 / 255, bias=[0.0, 0.0, 0.0])]
|
|
943
|
+
|
|
865
944
|
# Based on apple's documentation it is better to leave out the minimum_deployment target and let that get set
|
|
866
945
|
# Internally based on the model conversion and output type.
|
|
867
|
-
# Setting
|
|
946
|
+
# Setting minimum_deployment_target >= iOS16 will require setting compute_precision=ct.precision.FLOAT32.
|
|
868
947
|
# iOS16 adds in better support for FP16, but none of the CoreML NMS specifications handle FP16 as input.
|
|
869
948
|
ct_model = ct.convert(
|
|
870
949
|
ts,
|
|
871
|
-
inputs=
|
|
950
|
+
inputs=inputs,
|
|
872
951
|
classifier_config=classifier_config,
|
|
873
952
|
convert_to="neuralnetwork" if mlmodel else "mlprogram",
|
|
874
953
|
)
|
|
@@ -885,12 +964,7 @@ class Exporter:
|
|
|
885
964
|
config = cto.OptimizationConfig(global_config=op_config)
|
|
886
965
|
ct_model = cto.palettize_weights(ct_model, config=config)
|
|
887
966
|
if self.args.nms and self.model.task == "detect":
|
|
888
|
-
if mlmodel
|
|
889
|
-
weights_dir = None
|
|
890
|
-
else:
|
|
891
|
-
ct_model.save(str(f)) # save otherwise weights_dir does not exist
|
|
892
|
-
weights_dir = str(f / "Data/com.apple.CoreML/weights")
|
|
893
|
-
ct_model = self._pipeline_coreml(ct_model, weights_dir=weights_dir)
|
|
967
|
+
ct_model = self._pipeline_coreml(ct_model, weights_dir=None if mlmodel else ct_model.weights_dir)
|
|
894
968
|
|
|
895
969
|
m = self.metadata # metadata dict
|
|
896
970
|
ct_model.short_description = m.pop("description")
|
|
@@ -910,20 +984,21 @@ class Exporter:
|
|
|
910
984
|
)
|
|
911
985
|
f = f.with_suffix(".mlmodel")
|
|
912
986
|
ct_model.save(str(f))
|
|
913
|
-
return f
|
|
987
|
+
return f
|
|
914
988
|
|
|
915
989
|
@try_export
|
|
916
990
|
def export_engine(self, dla=None, prefix=colorstr("TensorRT:")):
|
|
917
991
|
"""Export YOLO model to TensorRT format https://developer.nvidia.com/tensorrt."""
|
|
918
992
|
assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
|
|
919
|
-
f_onnx
|
|
993
|
+
f_onnx = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016
|
|
920
994
|
|
|
921
995
|
try:
|
|
922
|
-
import tensorrt as trt
|
|
996
|
+
import tensorrt as trt
|
|
923
997
|
except ImportError:
|
|
924
998
|
if LINUX:
|
|
925
|
-
|
|
926
|
-
|
|
999
|
+
cuda_version = torch.version.cuda.split(".")[0]
|
|
1000
|
+
check_requirements(f"tensorrt-cu{cuda_version}>7.0.0,!=10.1.0")
|
|
1001
|
+
import tensorrt as trt
|
|
927
1002
|
check_version(trt.__version__, ">=7.0.0", hard=True)
|
|
928
1003
|
check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
|
|
929
1004
|
|
|
@@ -931,7 +1006,7 @@ class Exporter:
|
|
|
931
1006
|
LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...")
|
|
932
1007
|
assert Path(f_onnx).exists(), f"failed to export ONNX file: {f_onnx}"
|
|
933
1008
|
f = self.file.with_suffix(".engine") # TensorRT engine file
|
|
934
|
-
|
|
1009
|
+
onnx2engine(
|
|
935
1010
|
f_onnx,
|
|
936
1011
|
f,
|
|
937
1012
|
self.args.workspace,
|
|
@@ -946,26 +1021,26 @@ class Exporter:
|
|
|
946
1021
|
prefix=prefix,
|
|
947
1022
|
)
|
|
948
1023
|
|
|
949
|
-
return f
|
|
1024
|
+
return f
|
|
950
1025
|
|
|
951
1026
|
@try_export
|
|
952
1027
|
def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
|
|
953
1028
|
"""Export YOLO model to TensorFlow SavedModel format."""
|
|
954
1029
|
cuda = torch.cuda.is_available()
|
|
955
1030
|
try:
|
|
956
|
-
import tensorflow as tf
|
|
1031
|
+
import tensorflow as tf
|
|
957
1032
|
except ImportError:
|
|
958
1033
|
check_requirements("tensorflow>=2.0.0,<=2.19.0")
|
|
959
|
-
import tensorflow as tf
|
|
1034
|
+
import tensorflow as tf
|
|
960
1035
|
check_requirements(
|
|
961
1036
|
(
|
|
962
1037
|
"tf_keras<=2.19.0", # required by 'onnx2tf' package
|
|
963
1038
|
"sng4onnx>=1.0.1", # required by 'onnx2tf' package
|
|
964
1039
|
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
|
|
965
|
-
"ai-edge-litert>=1.2.0,<1.4.0", # required by 'onnx2tf' package
|
|
966
|
-
"onnx>=1.12.0",
|
|
1040
|
+
"ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""), # required by 'onnx2tf' package
|
|
1041
|
+
"onnx>=1.12.0,<2.0.0",
|
|
967
1042
|
"onnx2tf>=1.26.3",
|
|
968
|
-
"onnxslim>=0.1.
|
|
1043
|
+
"onnxslim>=0.1.71",
|
|
969
1044
|
"onnxruntime-gpu" if cuda else "onnxruntime",
|
|
970
1045
|
"protobuf>=5",
|
|
971
1046
|
),
|
|
@@ -984,80 +1059,50 @@ class Exporter:
|
|
|
984
1059
|
if f.is_dir():
|
|
985
1060
|
shutil.rmtree(f) # delete output folder
|
|
986
1061
|
|
|
987
|
-
#
|
|
988
|
-
|
|
989
|
-
if
|
|
990
|
-
|
|
1062
|
+
# Export to TF
|
|
1063
|
+
images = None
|
|
1064
|
+
if self.args.int8 and self.args.data:
|
|
1065
|
+
images = [batch["img"] for batch in self.get_int8_calibration_dataloader(prefix)]
|
|
1066
|
+
images = (
|
|
1067
|
+
torch.nn.functional.interpolate(torch.cat(images, 0).float(), size=self.imgsz)
|
|
1068
|
+
.permute(0, 2, 3, 1)
|
|
1069
|
+
.numpy()
|
|
1070
|
+
.astype(np.float32)
|
|
1071
|
+
)
|
|
991
1072
|
|
|
992
1073
|
# Export to ONNX
|
|
1074
|
+
if isinstance(self.model.model[-1], RTDETRDecoder):
|
|
1075
|
+
self.args.opset = self.args.opset or 19
|
|
1076
|
+
assert 16 <= self.args.opset <= 19, "RTDETR export requires opset>=16;<=19"
|
|
993
1077
|
self.args.simplify = True
|
|
994
|
-
f_onnx
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
images = [batch["img"] for batch in self.get_int8_calibration_dataloader(prefix)]
|
|
1003
|
-
images = torch.nn.functional.interpolate(torch.cat(images, 0).float(), size=self.imgsz).permute(
|
|
1004
|
-
0, 2, 3, 1
|
|
1005
|
-
)
|
|
1006
|
-
np.save(str(tmp_file), images.numpy().astype(np.float32)) # BHWC
|
|
1007
|
-
np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]]
|
|
1008
|
-
|
|
1009
|
-
import onnx2tf # scoped for after ONNX export for reduced conflict during import
|
|
1010
|
-
|
|
1011
|
-
LOGGER.info(f"{prefix} starting TFLite export with onnx2tf {onnx2tf.__version__}...")
|
|
1012
|
-
keras_model = onnx2tf.convert(
|
|
1013
|
-
input_onnx_file_path=f_onnx,
|
|
1014
|
-
output_folder_path=str(f),
|
|
1015
|
-
not_use_onnxsim=True,
|
|
1016
|
-
verbosity="error", # note INT8-FP16 activation bug https://github.com/ultralytics/ultralytics/issues/15873
|
|
1017
|
-
output_integer_quantized_tflite=self.args.int8,
|
|
1018
|
-
quant_type="per-tensor", # "per-tensor" (faster) or "per-channel" (slower but more accurate)
|
|
1019
|
-
custom_input_op_name_np_data_path=np_data,
|
|
1020
|
-
enable_batchmatmul_unfold=True, # fix lower no. of detected objects on GPU delegate
|
|
1021
|
-
output_signaturedefs=True, # fix error with Attention block group convolution
|
|
1022
|
-
disable_group_convolution=self.args.format in {"tfjs", "edgetpu"}, # fix error with group convolution
|
|
1078
|
+
f_onnx = self.export_onnx() # ensure ONNX is available
|
|
1079
|
+
keras_model = onnx2saved_model(
|
|
1080
|
+
f_onnx,
|
|
1081
|
+
f,
|
|
1082
|
+
int8=self.args.int8,
|
|
1083
|
+
images=images,
|
|
1084
|
+
disable_group_convolution=self.args.format in {"tfjs", "edgetpu"},
|
|
1085
|
+
prefix=prefix,
|
|
1023
1086
|
)
|
|
1024
1087
|
YAML.save(f / "metadata.yaml", self.metadata) # add metadata.yaml
|
|
1025
|
-
|
|
1026
|
-
# Remove/rename TFLite models
|
|
1027
|
-
if self.args.int8:
|
|
1028
|
-
tmp_file.unlink(missing_ok=True)
|
|
1029
|
-
for file in f.rglob("*_dynamic_range_quant.tflite"):
|
|
1030
|
-
file.rename(file.with_name(file.stem.replace("_dynamic_range_quant", "_int8") + file.suffix))
|
|
1031
|
-
for file in f.rglob("*_integer_quant_with_int16_act.tflite"):
|
|
1032
|
-
file.unlink() # delete extra fp16 activation TFLite files
|
|
1033
|
-
|
|
1034
1088
|
# Add TFLite metadata
|
|
1035
1089
|
for file in f.rglob("*.tflite"):
|
|
1036
|
-
|
|
1090
|
+
file.unlink() if "quant_with_int16_act.tflite" in str(file) else self._add_tflite_metadata(file)
|
|
1037
1091
|
|
|
1038
1092
|
return str(f), keras_model # or keras_model = tf.saved_model.load(f, tags=None, options=None)
|
|
1039
1093
|
|
|
1040
1094
|
@try_export
|
|
1041
1095
|
def export_pb(self, keras_model, prefix=colorstr("TensorFlow GraphDef:")):
|
|
1042
1096
|
"""Export YOLO model to TensorFlow GraphDef *.pb format https://github.com/leimao/Frozen-Graph-TensorFlow."""
|
|
1043
|
-
import tensorflow as tf # noqa
|
|
1044
|
-
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa
|
|
1045
|
-
|
|
1046
|
-
LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
|
|
1047
1097
|
f = self.file.with_suffix(".pb")
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))
|
|
1051
|
-
frozen_func = convert_variables_to_constants_v2(m)
|
|
1052
|
-
frozen_func.graph.as_graph_def()
|
|
1053
|
-
tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
|
|
1054
|
-
return f, None
|
|
1098
|
+
keras2pb(keras_model, f, prefix)
|
|
1099
|
+
return f
|
|
1055
1100
|
|
|
1056
1101
|
@try_export
|
|
1057
1102
|
def export_tflite(self, prefix=colorstr("TensorFlow Lite:")):
|
|
1058
1103
|
"""Export YOLO model to TensorFlow Lite format."""
|
|
1059
1104
|
# BUG https://github.com/ultralytics/ultralytics/issues/13436
|
|
1060
|
-
import tensorflow as tf
|
|
1105
|
+
import tensorflow as tf
|
|
1061
1106
|
|
|
1062
1107
|
LOGGER.info(f"\n{prefix} starting export with tensorflow {tf.__version__}...")
|
|
1063
1108
|
saved_model = Path(str(self.file).replace(self.file.suffix, "_saved_model"))
|
|
@@ -1067,7 +1112,111 @@ class Exporter:
|
|
|
1067
1112
|
f = saved_model / f"{self.file.stem}_float16.tflite" # fp32 in/out
|
|
1068
1113
|
else:
|
|
1069
1114
|
f = saved_model / f"{self.file.stem}_float32.tflite"
|
|
1070
|
-
return str(f)
|
|
1115
|
+
return str(f)
|
|
1116
|
+
|
|
1117
|
+
@try_export
|
|
1118
|
+
def export_axelera(self, prefix=colorstr("Axelera:")):
|
|
1119
|
+
"""YOLO Axelera export."""
|
|
1120
|
+
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
|
1121
|
+
try:
|
|
1122
|
+
from axelera import compiler
|
|
1123
|
+
except ImportError:
|
|
1124
|
+
check_apt_requirements(
|
|
1125
|
+
["libllvm14", "libgirepository1.0-dev", "pkg-config", "libcairo2-dev", "build-essential", "cmake"]
|
|
1126
|
+
)
|
|
1127
|
+
|
|
1128
|
+
check_requirements(
|
|
1129
|
+
"axelera-voyager-sdk==1.5.2",
|
|
1130
|
+
cmds="--extra-index-url https://software.axelera.ai/artifactory/axelera-runtime-pypi "
|
|
1131
|
+
"--extra-index-url https://software.axelera.ai/artifactory/axelera-dev-pypi",
|
|
1132
|
+
)
|
|
1133
|
+
|
|
1134
|
+
from axelera import compiler
|
|
1135
|
+
from axelera.compiler import CompilerConfig
|
|
1136
|
+
|
|
1137
|
+
self.args.opset = 17 # hardcode opset for Axelera
|
|
1138
|
+
onnx_path = self.export_onnx()
|
|
1139
|
+
model_name = Path(onnx_path).stem
|
|
1140
|
+
export_path = Path(f"{model_name}_axelera_model")
|
|
1141
|
+
export_path.mkdir(exist_ok=True)
|
|
1142
|
+
|
|
1143
|
+
if "C2PSA" in self.model.__str__(): # YOLO11
|
|
1144
|
+
config = CompilerConfig(
|
|
1145
|
+
quantization_scheme="per_tensor_min_max",
|
|
1146
|
+
ignore_weight_buffers=False,
|
|
1147
|
+
resources_used=0.25,
|
|
1148
|
+
aipu_cores_used=1,
|
|
1149
|
+
multicore_mode="batch",
|
|
1150
|
+
output_axm_format=True,
|
|
1151
|
+
model_name=model_name,
|
|
1152
|
+
)
|
|
1153
|
+
else: # YOLOv8
|
|
1154
|
+
config = CompilerConfig(
|
|
1155
|
+
tiling_depth=6,
|
|
1156
|
+
split_buffer_promotion=True,
|
|
1157
|
+
resources_used=0.25,
|
|
1158
|
+
aipu_cores_used=1,
|
|
1159
|
+
multicore_mode="batch",
|
|
1160
|
+
output_axm_format=True,
|
|
1161
|
+
model_name=model_name,
|
|
1162
|
+
)
|
|
1163
|
+
|
|
1164
|
+
qmodel = compiler.quantize(
|
|
1165
|
+
model=onnx_path,
|
|
1166
|
+
calibration_dataset=self.get_int8_calibration_dataloader(prefix),
|
|
1167
|
+
config=config,
|
|
1168
|
+
transform_fn=self._transform_fn,
|
|
1169
|
+
)
|
|
1170
|
+
|
|
1171
|
+
compiler.compile(model=qmodel, config=config, output_dir=export_path)
|
|
1172
|
+
|
|
1173
|
+
axm_name = f"{model_name}.axm"
|
|
1174
|
+
axm_src = Path(axm_name)
|
|
1175
|
+
axm_dst = export_path / axm_name
|
|
1176
|
+
|
|
1177
|
+
if axm_src.exists():
|
|
1178
|
+
axm_src.replace(axm_dst)
|
|
1179
|
+
|
|
1180
|
+
YAML.save(export_path / "metadata.yaml", self.metadata)
|
|
1181
|
+
|
|
1182
|
+
return export_path
|
|
1183
|
+
|
|
1184
|
+
@try_export
|
|
1185
|
+
def export_executorch(self, prefix=colorstr("ExecuTorch:")):
|
|
1186
|
+
"""Exports a model to ExecuTorch (.pte) format into a dedicated directory and saves the required metadata,
|
|
1187
|
+
following Ultralytics conventions.
|
|
1188
|
+
"""
|
|
1189
|
+
LOGGER.info(f"\n{prefix} starting export with ExecuTorch...")
|
|
1190
|
+
assert TORCH_2_9, f"ExecuTorch export requires torch>=2.9.0 but torch=={TORCH_VERSION} is installed"
|
|
1191
|
+
|
|
1192
|
+
# BUG executorch build on arm64 Docker requires packaging>=22.0 https://github.com/pypa/setuptools/issues/4483
|
|
1193
|
+
if LINUX and ARM64 and IS_DOCKER:
|
|
1194
|
+
check_requirements("packaging>=22.0")
|
|
1195
|
+
|
|
1196
|
+
check_requirements("ruamel.yaml<0.19.0")
|
|
1197
|
+
check_requirements("executorch==1.0.1", "flatbuffers")
|
|
1198
|
+
# Pin numpy to avoid coremltools errors with numpy>=2.4.0, must be separate
|
|
1199
|
+
check_requirements("numpy<=2.3.5")
|
|
1200
|
+
|
|
1201
|
+
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
|
|
1202
|
+
from executorch.exir import to_edge_transform_and_lower
|
|
1203
|
+
|
|
1204
|
+
file_directory = Path(str(self.file).replace(self.file.suffix, "_executorch_model"))
|
|
1205
|
+
file_directory.mkdir(parents=True, exist_ok=True)
|
|
1206
|
+
|
|
1207
|
+
file_pte = file_directory / self.file.with_suffix(".pte").name
|
|
1208
|
+
sample_inputs = (self.im,)
|
|
1209
|
+
|
|
1210
|
+
et_program = to_edge_transform_and_lower(
|
|
1211
|
+
torch.export.export(self.model, sample_inputs), partitioner=[XnnpackPartitioner()]
|
|
1212
|
+
).to_executorch()
|
|
1213
|
+
|
|
1214
|
+
with open(file_pte, "wb") as file:
|
|
1215
|
+
file.write(et_program.buffer)
|
|
1216
|
+
|
|
1217
|
+
YAML.save(file_directory / "metadata.yaml", self.metadata)
|
|
1218
|
+
|
|
1219
|
+
return str(file_directory)
|
|
1071
1220
|
|
|
1072
1221
|
@try_export
|
|
1073
1222
|
def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
|
|
@@ -1077,65 +1226,33 @@ class Exporter:
|
|
|
1077
1226
|
assert LINUX, f"export only supported on Linux. See {help_url}"
|
|
1078
1227
|
if subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True).returncode != 0:
|
|
1079
1228
|
LOGGER.info(f"\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}")
|
|
1229
|
+
sudo = "sudo " if is_sudo_available() else ""
|
|
1080
1230
|
for c in (
|
|
1081
|
-
"
|
|
1082
|
-
|
|
1083
|
-
"sudo
|
|
1084
|
-
"sudo apt-get update",
|
|
1085
|
-
"sudo apt-get install edgetpu-compiler",
|
|
1231
|
+
f"{sudo}mkdir -p /etc/apt/keyrings",
|
|
1232
|
+
f"curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | {sudo}gpg --dearmor -o /etc/apt/keyrings/google.gpg",
|
|
1233
|
+
f'echo "deb [signed-by=/etc/apt/keyrings/google.gpg] https://packages.cloud.google.com/apt coral-edgetpu-stable main" | {sudo}tee /etc/apt/sources.list.d/coral-edgetpu.list',
|
|
1086
1234
|
):
|
|
1087
|
-
subprocess.run(c
|
|
1088
|
-
|
|
1235
|
+
subprocess.run(c, shell=True, check=True)
|
|
1236
|
+
check_apt_requirements(["edgetpu-compiler"])
|
|
1089
1237
|
|
|
1238
|
+
ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().rsplit(maxsplit=1)[-1]
|
|
1090
1239
|
LOGGER.info(f"\n{prefix} starting export with Edge TPU compiler {ver}...")
|
|
1240
|
+
tflite2edgetpu(tflite_file=tflite_model, output_dir=tflite_model.parent, prefix=prefix)
|
|
1091
1241
|
f = str(tflite_model).replace(".tflite", "_edgetpu.tflite") # Edge TPU model
|
|
1092
|
-
|
|
1093
|
-
cmd = (
|
|
1094
|
-
"edgetpu_compiler "
|
|
1095
|
-
f'--out_dir "{Path(f).parent}" '
|
|
1096
|
-
"--show_operations "
|
|
1097
|
-
"--search_delegate "
|
|
1098
|
-
"--delegate_search_step 30 "
|
|
1099
|
-
"--timeout_sec 180 "
|
|
1100
|
-
f'"{tflite_model}"'
|
|
1101
|
-
)
|
|
1102
|
-
LOGGER.info(f"{prefix} running '{cmd}'")
|
|
1103
|
-
subprocess.run(cmd, shell=True)
|
|
1104
1242
|
self._add_tflite_metadata(f)
|
|
1105
|
-
return f
|
|
1243
|
+
return f
|
|
1106
1244
|
|
|
1107
1245
|
@try_export
|
|
1108
1246
|
def export_tfjs(self, prefix=colorstr("TensorFlow.js:")):
|
|
1109
1247
|
"""Export YOLO model to TensorFlow.js format."""
|
|
1110
1248
|
check_requirements("tensorflowjs")
|
|
1111
|
-
import tensorflow as tf
|
|
1112
|
-
import tensorflowjs as tfjs # noqa
|
|
1113
1249
|
|
|
1114
|
-
LOGGER.info(f"\n{prefix} starting export with tensorflowjs {tfjs.__version__}...")
|
|
1115
1250
|
f = str(self.file).replace(self.file.suffix, "_web_model") # js dir
|
|
1116
1251
|
f_pb = str(self.file.with_suffix(".pb")) # *.pb path
|
|
1117
|
-
|
|
1118
|
-
gd = tf.Graph().as_graph_def() # TF GraphDef
|
|
1119
|
-
with open(f_pb, "rb") as file:
|
|
1120
|
-
gd.ParseFromString(file.read())
|
|
1121
|
-
outputs = ",".join(gd_outputs(gd))
|
|
1122
|
-
LOGGER.info(f"\n{prefix} output node names: {outputs}")
|
|
1123
|
-
|
|
1124
|
-
quantization = "--quantize_float16" if self.args.half else "--quantize_uint8" if self.args.int8 else ""
|
|
1125
|
-
with spaces_in_path(f_pb) as fpb_, spaces_in_path(f) as f_: # exporter can not handle spaces in path
|
|
1126
|
-
cmd = (
|
|
1127
|
-
"tensorflowjs_converter "
|
|
1128
|
-
f'--input_format=tf_frozen_model {quantization} --output_node_names={outputs} "{fpb_}" "{f_}"'
|
|
1129
|
-
)
|
|
1130
|
-
LOGGER.info(f"{prefix} running '{cmd}'")
|
|
1131
|
-
subprocess.run(cmd, shell=True)
|
|
1132
|
-
|
|
1133
|
-
if " " in f:
|
|
1134
|
-
LOGGER.warning(f"{prefix} your model may not work correctly with spaces in path '{f}'.")
|
|
1135
|
-
|
|
1252
|
+
pb2tfjs(pb_file=f_pb, output_dir=f, half=self.args.half, int8=self.args.int8, prefix=prefix)
|
|
1136
1253
|
# Add metadata
|
|
1137
1254
|
YAML.save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
|
|
1138
|
-
return f
|
|
1255
|
+
return f
|
|
1139
1256
|
|
|
1140
1257
|
@try_export
|
|
1141
1258
|
def export_rknn(self, prefix=colorstr("RKNN:")):
|
|
@@ -1151,7 +1268,7 @@ class Exporter:
|
|
|
1151
1268
|
|
|
1152
1269
|
from rknn.api import RKNN
|
|
1153
1270
|
|
|
1154
|
-
f
|
|
1271
|
+
f = self.export_onnx()
|
|
1155
1272
|
export_path = Path(f"{Path(f).stem}_rknn_model")
|
|
1156
1273
|
export_path.mkdir(exist_ok=True)
|
|
1157
1274
|
|
|
@@ -1162,30 +1279,30 @@ class Exporter:
|
|
|
1162
1279
|
f = f.replace(".onnx", f"-{self.args.name}.rknn")
|
|
1163
1280
|
rknn.export_rknn(f"{export_path / f}")
|
|
1164
1281
|
YAML.save(export_path / "metadata.yaml", self.metadata)
|
|
1165
|
-
return export_path
|
|
1282
|
+
return export_path
|
|
1166
1283
|
|
|
1167
1284
|
@try_export
|
|
1168
1285
|
def export_imx(self, prefix=colorstr("IMX:")):
|
|
1169
1286
|
"""Export YOLO model to IMX format."""
|
|
1170
|
-
gptq = False
|
|
1171
1287
|
assert LINUX, (
|
|
1172
|
-
"
|
|
1173
|
-
"See https://developer.aitrios.sony-semicon.com/en/
|
|
1288
|
+
"Export only supported on Linux."
|
|
1289
|
+
"See https://developer.aitrios.sony-semicon.com/en/docs/raspberry-pi-ai-camera/imx500-converter?version=3.17.3&progLang="
|
|
1174
1290
|
)
|
|
1291
|
+
assert not ARM64, "IMX export is not supported on ARM64 architectures."
|
|
1292
|
+
assert IS_PYTHON_MINIMUM_3_9, "IMX export is only supported on Python 3.9 or above."
|
|
1293
|
+
|
|
1175
1294
|
if getattr(self.model, "end2end", False):
|
|
1176
1295
|
raise ValueError("IMX export is not supported for end2end models.")
|
|
1177
1296
|
check_requirements(
|
|
1178
|
-
(
|
|
1297
|
+
(
|
|
1298
|
+
"model-compression-toolkit>=2.4.1",
|
|
1299
|
+
"edge-mdt-cl<1.1.0",
|
|
1300
|
+
"edge-mdt-tpc>=1.2.0",
|
|
1301
|
+
"pydantic<=2.11.7",
|
|
1302
|
+
)
|
|
1179
1303
|
)
|
|
1180
|
-
check_requirements("imx500-converter[pt]>=3.16.1") # Separate requirements for imx500-converter
|
|
1181
|
-
check_requirements("mct-quantizers>=1.6.0") # Separate for compatibility with model-compression-toolkit
|
|
1182
|
-
|
|
1183
|
-
import model_compression_toolkit as mct
|
|
1184
|
-
import onnx
|
|
1185
|
-
from edgemdt_tpc import get_target_platform_capabilities
|
|
1186
|
-
from sony_custom_layers.pytorch import multiclass_nms_with_indices
|
|
1187
1304
|
|
|
1188
|
-
|
|
1305
|
+
check_requirements("imx500-converter[pt]>=3.17.3")
|
|
1189
1306
|
|
|
1190
1307
|
# Install Java>=17
|
|
1191
1308
|
try:
|
|
@@ -1194,153 +1311,24 @@ class Exporter:
|
|
|
1194
1311
|
java_version = int(version_match.group(1)) if version_match else 0
|
|
1195
1312
|
assert java_version >= 17, "Java version too old"
|
|
1196
1313
|
except (FileNotFoundError, subprocess.CalledProcessError, AssertionError):
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
n_layers = 238 # 238 layers for fused YOLO11n
|
|
1214
|
-
elif self.model.task == "pose":
|
|
1215
|
-
layer_names = ["sub", "mul_2", "add_14", "cat_22", "cat_23", "mul_4", "add_15"]
|
|
1216
|
-
weights_memory = 2437771.67
|
|
1217
|
-
n_layers = 257 # 257 layers for fused YOLO11n-pose
|
|
1218
|
-
else: # YOLOv8
|
|
1219
|
-
if self.model.task == "detect":
|
|
1220
|
-
layer_names = ["sub", "mul", "add_6", "cat_17"]
|
|
1221
|
-
weights_memory = 2550540.8
|
|
1222
|
-
n_layers = 168 # 168 layers for fused YOLOv8n
|
|
1223
|
-
elif self.model.task == "pose":
|
|
1224
|
-
layer_names = ["add_7", "mul_2", "cat_19", "mul", "sub", "add_6", "cat_18"]
|
|
1225
|
-
weights_memory = 2482451.85
|
|
1226
|
-
n_layers = 187 # 187 layers for fused YOLO11n-pose
|
|
1227
|
-
|
|
1228
|
-
# Check if the model has the expected number of layers
|
|
1229
|
-
if len(list(self.model.modules())) != n_layers:
|
|
1230
|
-
raise ValueError("IMX export only supported for YOLOv8n and YOLO11n models.")
|
|
1231
|
-
|
|
1232
|
-
for layer_name in layer_names:
|
|
1233
|
-
bit_cfg.set_manual_activation_bit_width([mct.core.common.network_editors.NodeNameFilter(layer_name)], 16)
|
|
1234
|
-
|
|
1235
|
-
config = mct.core.CoreConfig(
|
|
1236
|
-
mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=10),
|
|
1237
|
-
quantization_config=mct.core.QuantizationConfig(concat_threshold_update=True),
|
|
1238
|
-
bit_width_config=bit_cfg,
|
|
1239
|
-
)
|
|
1240
|
-
|
|
1241
|
-
resource_utilization = mct.core.ResourceUtilization(weights_memory=weights_memory)
|
|
1242
|
-
|
|
1243
|
-
quant_model = (
|
|
1244
|
-
mct.gptq.pytorch_gradient_post_training_quantization( # Perform Gradient-Based Post Training Quantization
|
|
1245
|
-
model=self.model,
|
|
1246
|
-
representative_data_gen=representative_dataset_gen,
|
|
1247
|
-
target_resource_utilization=resource_utilization,
|
|
1248
|
-
gptq_config=mct.gptq.get_pytorch_gptq_config(
|
|
1249
|
-
n_epochs=1000, use_hessian_based_weights=False, use_hessian_sample_attention=False
|
|
1250
|
-
),
|
|
1251
|
-
core_config=config,
|
|
1252
|
-
target_platform_capabilities=tpc,
|
|
1253
|
-
)[0]
|
|
1254
|
-
if gptq
|
|
1255
|
-
else mct.ptq.pytorch_post_training_quantization( # Perform post training quantization
|
|
1256
|
-
in_module=self.model,
|
|
1257
|
-
representative_data_gen=representative_dataset_gen,
|
|
1258
|
-
target_resource_utilization=resource_utilization,
|
|
1259
|
-
core_config=config,
|
|
1260
|
-
target_platform_capabilities=tpc,
|
|
1261
|
-
)[0]
|
|
1262
|
-
)
|
|
1263
|
-
|
|
1264
|
-
class NMSWrapper(torch.nn.Module):
|
|
1265
|
-
"""Wrap PyTorch Module with multiclass_nms layer from sony_custom_layers."""
|
|
1266
|
-
|
|
1267
|
-
def __init__(
|
|
1268
|
-
self,
|
|
1269
|
-
model: torch.nn.Module,
|
|
1270
|
-
score_threshold: float = 0.001,
|
|
1271
|
-
iou_threshold: float = 0.7,
|
|
1272
|
-
max_detections: int = 300,
|
|
1273
|
-
task: str = "detect",
|
|
1274
|
-
):
|
|
1275
|
-
"""
|
|
1276
|
-
Initialize NMSWrapper with PyTorch Module and NMS parameters.
|
|
1277
|
-
|
|
1278
|
-
Args:
|
|
1279
|
-
model (torch.nn.Module): Model instance.
|
|
1280
|
-
score_threshold (float): Score threshold for non-maximum suppression.
|
|
1281
|
-
iou_threshold (float): Intersection over union threshold for non-maximum suppression.
|
|
1282
|
-
max_detections (int): The number of detections to return.
|
|
1283
|
-
task (str): Task type, either 'detect' or 'pose'.
|
|
1284
|
-
"""
|
|
1285
|
-
super().__init__()
|
|
1286
|
-
self.model = model
|
|
1287
|
-
self.score_threshold = score_threshold
|
|
1288
|
-
self.iou_threshold = iou_threshold
|
|
1289
|
-
self.max_detections = max_detections
|
|
1290
|
-
self.task = task
|
|
1291
|
-
|
|
1292
|
-
def forward(self, images):
|
|
1293
|
-
"""Forward pass with model inference and NMS post-processing."""
|
|
1294
|
-
# model inference
|
|
1295
|
-
outputs = self.model(images)
|
|
1296
|
-
|
|
1297
|
-
boxes, scores = outputs[0], outputs[1]
|
|
1298
|
-
nms_outputs = multiclass_nms_with_indices(
|
|
1299
|
-
boxes=boxes,
|
|
1300
|
-
scores=scores,
|
|
1301
|
-
score_threshold=self.score_threshold,
|
|
1302
|
-
iou_threshold=self.iou_threshold,
|
|
1303
|
-
max_detections=self.max_detections,
|
|
1304
|
-
)
|
|
1305
|
-
if self.task == "pose":
|
|
1306
|
-
kpts = outputs[2] # (bs, max_detections, kpts 17*3)
|
|
1307
|
-
out_kpts = torch.gather(kpts, 1, nms_outputs.indices.unsqueeze(-1).expand(-1, -1, kpts.size(-1)))
|
|
1308
|
-
return nms_outputs.boxes, nms_outputs.scores, nms_outputs.labels, out_kpts
|
|
1309
|
-
return nms_outputs
|
|
1310
|
-
|
|
1311
|
-
quant_model = NMSWrapper(
|
|
1312
|
-
model=quant_model,
|
|
1313
|
-
score_threshold=self.args.conf or 0.001,
|
|
1314
|
-
iou_threshold=self.args.iou,
|
|
1315
|
-
max_detections=self.args.max_det,
|
|
1316
|
-
task=self.model.task,
|
|
1317
|
-
).to(self.device)
|
|
1318
|
-
|
|
1319
|
-
f = Path(str(self.file).replace(self.file.suffix, "_imx_model"))
|
|
1320
|
-
f.mkdir(exist_ok=True)
|
|
1321
|
-
onnx_model = f / Path(str(self.file.name).replace(self.file.suffix, "_imx.onnx")) # js dir
|
|
1322
|
-
mct.exporter.pytorch_export_model(
|
|
1323
|
-
model=quant_model, save_model_path=onnx_model, repr_dataset=representative_dataset_gen
|
|
1324
|
-
)
|
|
1325
|
-
|
|
1326
|
-
model_onnx = onnx.load(onnx_model) # load onnx model
|
|
1327
|
-
for k, v in self.metadata.items():
|
|
1328
|
-
meta = model_onnx.metadata_props.add()
|
|
1329
|
-
meta.key, meta.value = k, str(v)
|
|
1330
|
-
|
|
1331
|
-
onnx.save(model_onnx, onnx_model)
|
|
1332
|
-
|
|
1333
|
-
subprocess.run(
|
|
1334
|
-
["imxconv-pt", "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"],
|
|
1335
|
-
check=True,
|
|
1314
|
+
if IS_UBUNTU or IS_DEBIAN_TRIXIE:
|
|
1315
|
+
LOGGER.info(f"\n{prefix} installing Java 21 for Ubuntu...")
|
|
1316
|
+
check_apt_requirements(["openjdk-21-jre"])
|
|
1317
|
+
elif IS_RASPBERRYPI or IS_DEBIAN_BOOKWORM:
|
|
1318
|
+
LOGGER.info(f"\n{prefix} installing Java 17 for Raspberry Pi or Debian ...")
|
|
1319
|
+
check_apt_requirements(["openjdk-17-jre"])
|
|
1320
|
+
|
|
1321
|
+
return torch2imx(
|
|
1322
|
+
self.model,
|
|
1323
|
+
self.file,
|
|
1324
|
+
self.args.conf,
|
|
1325
|
+
self.args.iou,
|
|
1326
|
+
self.args.max_det,
|
|
1327
|
+
metadata=self.metadata,
|
|
1328
|
+
dataset=self.get_int8_calibration_dataloader(prefix),
|
|
1329
|
+
prefix=prefix,
|
|
1336
1330
|
)
|
|
1337
1331
|
|
|
1338
|
-
# Needed for imx models.
|
|
1339
|
-
with open(f / "labels.txt", "w", encoding="utf-8") as file:
|
|
1340
|
-
file.writelines([f"{name}\n" for _, name in self.model.names.items()])
|
|
1341
|
-
|
|
1342
|
-
return f, None
|
|
1343
|
-
|
|
1344
1332
|
def _add_tflite_metadata(self, file):
|
|
1345
1333
|
"""Add metadata to *.tflite models per https://ai.google.dev/edge/litert/models/metadata."""
|
|
1346
1334
|
import zipfile
|
|
@@ -1350,7 +1338,7 @@ class Exporter:
|
|
|
1350
1338
|
|
|
1351
1339
|
def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
|
|
1352
1340
|
"""Create CoreML pipeline with NMS for YOLO detection models."""
|
|
1353
|
-
import coremltools as ct
|
|
1341
|
+
import coremltools as ct
|
|
1354
1342
|
|
|
1355
1343
|
LOGGER.info(f"{prefix} starting pipeline with coremltools {ct.__version__}...")
|
|
1356
1344
|
|
|
@@ -1365,7 +1353,8 @@ class Exporter:
|
|
|
1365
1353
|
names = self.metadata["names"]
|
|
1366
1354
|
nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
|
|
1367
1355
|
nc = outs[0].type.multiArrayType.shape[-1]
|
|
1368
|
-
|
|
1356
|
+
if len(names) != nc: # Hack fix for MLProgram NMS bug https://github.com/ultralytics/ultralytics/issues/22309
|
|
1357
|
+
names = {**names, **{i: str(i) for i in range(len(names), nc)}}
|
|
1369
1358
|
|
|
1370
1359
|
# Model from spec
|
|
1371
1360
|
model = ct.models.MLModel(spec, weights_dir=weights_dir)
|
|
@@ -1442,6 +1431,14 @@ class Exporter:
|
|
|
1442
1431
|
LOGGER.info(f"{prefix} pipeline success")
|
|
1443
1432
|
return model
|
|
1444
1433
|
|
|
1434
|
+
@staticmethod
|
|
1435
|
+
def _transform_fn(data_item) -> np.ndarray:
|
|
1436
|
+
"""The transformation function for Axelera/OpenVINO quantization preprocessing."""
|
|
1437
|
+
data_item: torch.Tensor = data_item["img"] if isinstance(data_item, dict) else data_item
|
|
1438
|
+
assert data_item.dtype == torch.uint8, "Input image must be uint8 for the quantization preprocessing"
|
|
1439
|
+
im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0 - 255 to 0.0 - 1.0
|
|
1440
|
+
return im[None] if im.ndim == 3 else im
|
|
1441
|
+
|
|
1445
1442
|
def add_callback(self, event: str, callback):
|
|
1446
1443
|
"""Append the given callback to the specified event."""
|
|
1447
1444
|
self.callbacks[event].append(callback)
|
|
@@ -1455,18 +1452,19 @@ class Exporter:
|
|
|
1455
1452
|
class IOSDetectModel(torch.nn.Module):
|
|
1456
1453
|
"""Wrap an Ultralytics YOLO model for Apple iOS CoreML export."""
|
|
1457
1454
|
|
|
1458
|
-
def __init__(self, model, im):
|
|
1459
|
-
"""
|
|
1460
|
-
Initialize the IOSDetectModel class with a YOLO model and example image.
|
|
1455
|
+
def __init__(self, model, im, mlprogram=True):
|
|
1456
|
+
"""Initialize the IOSDetectModel class with a YOLO model and example image.
|
|
1461
1457
|
|
|
1462
1458
|
Args:
|
|
1463
1459
|
model (torch.nn.Module): The YOLO model to wrap.
|
|
1464
1460
|
im (torch.Tensor): Example input tensor with shape (B, C, H, W).
|
|
1461
|
+
mlprogram (bool): Whether exporting to MLProgram format to fix NMS bug.
|
|
1465
1462
|
"""
|
|
1466
1463
|
super().__init__()
|
|
1467
1464
|
_, _, h, w = im.shape # batch, channel, height, width
|
|
1468
1465
|
self.model = model
|
|
1469
1466
|
self.nc = len(model.names) # number of classes
|
|
1467
|
+
self.mlprogram = mlprogram
|
|
1470
1468
|
if w == h:
|
|
1471
1469
|
self.normalize = 1.0 / w # scalar
|
|
1472
1470
|
else:
|
|
@@ -1478,15 +1476,18 @@ class IOSDetectModel(torch.nn.Module):
|
|
|
1478
1476
|
def forward(self, x):
|
|
1479
1477
|
"""Normalize predictions of object detection model with input size-dependent factors."""
|
|
1480
1478
|
xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1)
|
|
1481
|
-
|
|
1479
|
+
if self.mlprogram and self.nc % 80 != 0: # NMS bug https://github.com/ultralytics/ultralytics/issues/22309
|
|
1480
|
+
pad_length = int(((self.nc + 79) // 80) * 80) - self.nc # pad class length to multiple of 80
|
|
1481
|
+
cls = torch.nn.functional.pad(cls, (0, pad_length, 0, 0), "constant", 0)
|
|
1482
|
+
|
|
1483
|
+
return cls, xywh * self.normalize
|
|
1482
1484
|
|
|
1483
1485
|
|
|
1484
1486
|
class NMSModel(torch.nn.Module):
|
|
1485
1487
|
"""Model wrapper with embedded NMS for Detect, Segment, Pose and OBB."""
|
|
1486
1488
|
|
|
1487
1489
|
def __init__(self, model, args):
|
|
1488
|
-
"""
|
|
1489
|
-
Initialize the NMSModel.
|
|
1490
|
+
"""Initialize the NMSModel.
|
|
1490
1491
|
|
|
1491
1492
|
Args:
|
|
1492
1493
|
model (torch.nn.Module): The model to wrap with NMS postprocessing.
|
|
@@ -1499,15 +1500,14 @@ class NMSModel(torch.nn.Module):
|
|
|
1499
1500
|
self.is_tf = self.args.format in frozenset({"saved_model", "tflite", "tfjs"})
|
|
1500
1501
|
|
|
1501
1502
|
def forward(self, x):
|
|
1502
|
-
"""
|
|
1503
|
-
Perform inference with NMS post-processing. Supports Detect, Segment, OBB and Pose.
|
|
1503
|
+
"""Perform inference with NMS post-processing. Supports Detect, Segment, OBB and Pose.
|
|
1504
1504
|
|
|
1505
1505
|
Args:
|
|
1506
1506
|
x (torch.Tensor): The preprocessed tensor with shape (N, 3, H, W).
|
|
1507
1507
|
|
|
1508
1508
|
Returns:
|
|
1509
|
-
(torch.Tensor): List of detections, each an (N, max_det, 4 + 2 + extra_shape) Tensor where N is the
|
|
1510
|
-
|
|
1509
|
+
(torch.Tensor): List of detections, each an (N, max_det, 4 + 2 + extra_shape) Tensor where N is the number
|
|
1510
|
+
of detections after NMS.
|
|
1511
1511
|
"""
|
|
1512
1512
|
from functools import partial
|
|
1513
1513
|
|
|
@@ -1530,7 +1530,7 @@ class NMSModel(torch.nn.Module):
|
|
|
1530
1530
|
for i in range(bs):
|
|
1531
1531
|
box, cls, score, extra = boxes[i], classes[i], scores[i], extras[i]
|
|
1532
1532
|
mask = score > self.args.conf
|
|
1533
|
-
if self.is_tf:
|
|
1533
|
+
if self.is_tf or (self.args.format == "onnx" and self.obb):
|
|
1534
1534
|
# TFLite GatherND error if mask is empty
|
|
1535
1535
|
score *= mask
|
|
1536
1536
|
# Explicit length otherwise reshape error, hardcoded to `self.args.max_det * 5`
|
|
@@ -1538,17 +1538,16 @@ class NMSModel(torch.nn.Module):
|
|
|
1538
1538
|
box, score, cls, extra = box[mask], score[mask], cls[mask], extra[mask]
|
|
1539
1539
|
nmsbox = box.clone()
|
|
1540
1540
|
# `8` is the minimum value experimented to get correct NMS results for obb
|
|
1541
|
-
multiplier = 8 if self.obb else 1
|
|
1541
|
+
multiplier = 8 if self.obb else 1 / max(len(self.model.names), 1)
|
|
1542
1542
|
# Normalize boxes for NMS since large values for class offset causes issue with int8 quantization
|
|
1543
1543
|
if self.args.format == "tflite": # TFLite is already normalized
|
|
1544
1544
|
nmsbox *= multiplier
|
|
1545
1545
|
else:
|
|
1546
|
-
nmsbox = multiplier * nmsbox / torch.tensor(x.shape[2:], **kwargs).max()
|
|
1547
|
-
if not self.args.agnostic_nms: # class-
|
|
1546
|
+
nmsbox = multiplier * (nmsbox / torch.tensor(x.shape[2:], **kwargs).max())
|
|
1547
|
+
if not self.args.agnostic_nms: # class-wise NMS
|
|
1548
1548
|
end = 2 if self.obb else 4
|
|
1549
1549
|
# fully explicit expansion otherwise reshape error
|
|
1550
|
-
|
|
1551
|
-
cls_offset = cls.reshape(-1, 1).expand(nmsbox.shape[0], end)
|
|
1550
|
+
cls_offset = cls.view(cls.shape[0], 1).expand(cls.shape[0], end)
|
|
1552
1551
|
offbox = nmsbox[:, :end] + cls_offset * multiplier
|
|
1553
1552
|
nmsbox = torch.cat((offbox, nmsbox[:, end:]), dim=-1)
|
|
1554
1553
|
nms_fn = (
|
|
@@ -1560,6 +1559,7 @@ class NMSModel(torch.nn.Module):
|
|
|
1560
1559
|
or (self.args.format == "openvino" and self.args.int8) # OpenVINO int8 error with triu
|
|
1561
1560
|
),
|
|
1562
1561
|
iou_func=batch_probiou,
|
|
1562
|
+
exit_early=False,
|
|
1563
1563
|
)
|
|
1564
1564
|
if self.obb
|
|
1565
1565
|
else nms
|