onnxruntime_extensions 0.14.0__cp313-cp313-macosx_11_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxruntime_extensions/__init__.py +82 -0
- onnxruntime_extensions/_cuops.py +564 -0
- onnxruntime_extensions/_extensions_pydll.cpython-313-darwin.so +0 -0
- onnxruntime_extensions/_extensions_pydll.pyi +45 -0
- onnxruntime_extensions/_hf_cvt.py +331 -0
- onnxruntime_extensions/_ocos.py +133 -0
- onnxruntime_extensions/_ortapi2.py +274 -0
- onnxruntime_extensions/_torch_cvt.py +231 -0
- onnxruntime_extensions/_version.py +2 -0
- onnxruntime_extensions/cmd.py +66 -0
- onnxruntime_extensions/cvt.py +306 -0
- onnxruntime_extensions/onnxprocess/__init__.py +12 -0
- onnxruntime_extensions/onnxprocess/_builder.py +53 -0
- onnxruntime_extensions/onnxprocess/_onnx_ops.py +1507 -0
- onnxruntime_extensions/onnxprocess/_session.py +355 -0
- onnxruntime_extensions/onnxprocess/_tensor.py +628 -0
- onnxruntime_extensions/onnxprocess/torch_wrapper.py +31 -0
- onnxruntime_extensions/pnp/__init__.py +13 -0
- onnxruntime_extensions/pnp/_base.py +124 -0
- onnxruntime_extensions/pnp/_imagenet.py +65 -0
- onnxruntime_extensions/pnp/_nlp.py +148 -0
- onnxruntime_extensions/pnp/_onnx_ops.py +1544 -0
- onnxruntime_extensions/pnp/_torchext.py +310 -0
- onnxruntime_extensions/pnp/_unifier.py +45 -0
- onnxruntime_extensions/pnp/_utils.py +302 -0
- onnxruntime_extensions/pp_api.py +83 -0
- onnxruntime_extensions/tools/__init__.py +0 -0
- onnxruntime_extensions/tools/add_HuggingFace_CLIPImageProcessor_to_model.py +171 -0
- onnxruntime_extensions/tools/add_pre_post_processing_to_model.py +535 -0
- onnxruntime_extensions/tools/pre_post_processing/__init__.py +4 -0
- onnxruntime_extensions/tools/pre_post_processing/pre_post_processor.py +395 -0
- onnxruntime_extensions/tools/pre_post_processing/step.py +227 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/__init__.py +6 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/general.py +366 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/nlp.py +344 -0
- onnxruntime_extensions/tools/pre_post_processing/steps/vision.py +1157 -0
- onnxruntime_extensions/tools/pre_post_processing/utils.py +139 -0
- onnxruntime_extensions/util.py +186 -0
- onnxruntime_extensions-0.14.0.dist-info/LICENSE +21 -0
- onnxruntime_extensions-0.14.0.dist-info/METADATA +102 -0
- onnxruntime_extensions-0.14.0.dist-info/RECORD +43 -0
- onnxruntime_extensions-0.14.0.dist-info/WHEEL +6 -0
- onnxruntime_extensions-0.14.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,535 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import enum
|
|
6
|
+
import onnx
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Union
|
|
11
|
+
# NOTE: If you're working on this script install onnxruntime_extensions using `pip install -e .` from the repo root
|
|
12
|
+
# and run with `python -m onnxruntime_extensions.tools.add_pre_post_processing_to_model`
|
|
13
|
+
# Running directly will result in an error from a relative import.
|
|
14
|
+
from .pre_post_processing import *
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ModelSource(enum.Enum):
|
|
18
|
+
PYTORCH = 0
|
|
19
|
+
TENSORFLOW = 1
|
|
20
|
+
OTHER = 2
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def imagenet_preprocessing(model_source: ModelSource = ModelSource.PYTORCH):
|
|
24
|
+
"""
|
|
25
|
+
Common pre-processing for an imagenet trained model.
|
|
26
|
+
|
|
27
|
+
- Resize so smallest side is 256
|
|
28
|
+
- Centered crop to 224 x 224
|
|
29
|
+
- Convert image bytes to floating point values in range 0..1
|
|
30
|
+
- [Channels last to channels first (convert to ONNX layout) if model came from pytorch and has NCHW layout]
|
|
31
|
+
- Normalize
|
|
32
|
+
- (value - mean) / stddev
|
|
33
|
+
- for a pytorch model, this applies per-channel normalization parameters
|
|
34
|
+
- for a tensorflow model this simply moves the image bytes into the range -1..1
|
|
35
|
+
- adds a batch dimension with a value of 1
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# These utils cover both cases of typical pytorch/tensorflow pre-processing for an imagenet trained model
|
|
39
|
+
# https://github.com/keras-team/keras/blob/b80dd12da9c0bc3f569eca3455e77762cf2ee8ef/keras/applications/imagenet_utils.py#L177
|
|
40
|
+
|
|
41
|
+
steps = [
|
|
42
|
+
Resize(256),
|
|
43
|
+
CenterCrop(224, 224),
|
|
44
|
+
ImageBytesToFloat()
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
if model_source == ModelSource.PYTORCH:
|
|
48
|
+
# pytorch model has NCHW layout
|
|
49
|
+
steps.extend([
|
|
50
|
+
ChannelsLastToChannelsFirst(),
|
|
51
|
+
Normalize([(0.485, 0.229), (0.456, 0.224), (0.406, 0.225)], layout="CHW")
|
|
52
|
+
])
|
|
53
|
+
else:
|
|
54
|
+
# TF processing involves moving the data into the range -1..1 instead of 0..1.
|
|
55
|
+
# ImageBytesToFloat converts to range 0..1, so we use 0.5 for the mean to move into the range -0.5..0.5
|
|
56
|
+
# and 0.5 for the stddev to expand to -1..1
|
|
57
|
+
steps.append(Normalize([(0.5, 0.5)], layout="HWC"))
|
|
58
|
+
|
|
59
|
+
steps.append(Unsqueeze([0])) # add batch dim
|
|
60
|
+
|
|
61
|
+
return steps
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def mobilenet(model_file: Path, output_file: Path, model_source: ModelSource, onnx_opset: int = 16):
|
|
65
|
+
model = onnx.load(str(model_file.resolve(strict=True)))
|
|
66
|
+
inputs = [create_named_value("image", onnx.TensorProto.UINT8, ["num_bytes"])]
|
|
67
|
+
|
|
68
|
+
pipeline = PrePostProcessor(inputs, onnx_opset)
|
|
69
|
+
|
|
70
|
+
# support user providing encoded image bytes
|
|
71
|
+
preprocessing = [
|
|
72
|
+
ConvertImageToBGR(), # custom op to convert jpg/png to BGR (output is HWC)
|
|
73
|
+
ReverseAxis(axis=2, dim_value=3, name="BGR_to_RGB"),
|
|
74
|
+
] # Normalization params are for RGB ordering
|
|
75
|
+
# plug in default imagenet pre-processing
|
|
76
|
+
preprocessing.extend(imagenet_preprocessing(model_source))
|
|
77
|
+
|
|
78
|
+
pipeline.add_pre_processing(preprocessing)
|
|
79
|
+
|
|
80
|
+
# for mobilenet we convert the score to probabilities with softmax if necessary. the TF model includes Softmax
|
|
81
|
+
if model.graph.node[-1].op_type != "Softmax":
|
|
82
|
+
pipeline.add_post_processing([Softmax()])
|
|
83
|
+
|
|
84
|
+
new_model = pipeline.run(model)
|
|
85
|
+
|
|
86
|
+
onnx.save_model(new_model, str(output_file.resolve()))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def superresolution(model_file: Path, output_file: Path, output_format: str, onnx_opset: int = 16):
|
|
90
|
+
# TODO: There seems to be a split with some super resolution models processing RGB input and some processing
|
|
91
|
+
# the Y channel after converting to YCbCr.
|
|
92
|
+
# For the sake of this example implementation we do the trickier YCbCr processing as that involves joining the
|
|
93
|
+
# Cb and Cr channels with the model output to create the resized image.
|
|
94
|
+
# Model is from https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html
|
|
95
|
+
model = onnx.load(str(model_file.resolve(strict=True)))
|
|
96
|
+
inputs = [create_named_value("image", onnx.TensorProto.UINT8, ["num_bytes"])]
|
|
97
|
+
|
|
98
|
+
# assuming input is *CHW, infer the input sizes from the model.
|
|
99
|
+
# requires the model input and output has a fixed size for the input and output height and width.
|
|
100
|
+
model_input_shape = model.graph.input[0].type.tensor_type.shape
|
|
101
|
+
model_output_shape = model.graph.output[0].type.tensor_type.shape
|
|
102
|
+
assert model_input_shape.dim[-1].HasField("dim_value")
|
|
103
|
+
assert model_input_shape.dim[-2].HasField("dim_value")
|
|
104
|
+
assert model_output_shape.dim[-1].HasField("dim_value")
|
|
105
|
+
assert model_output_shape.dim[-2].HasField("dim_value")
|
|
106
|
+
|
|
107
|
+
w_in = model_input_shape.dim[-1].dim_value
|
|
108
|
+
h_in = model_input_shape.dim[-2].dim_value
|
|
109
|
+
h_out = model_output_shape.dim[-2].dim_value
|
|
110
|
+
w_out = model_output_shape.dim[-1].dim_value
|
|
111
|
+
|
|
112
|
+
# pre/post processing for https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html
|
|
113
|
+
pipeline = PrePostProcessor(inputs, onnx_opset)
|
|
114
|
+
pipeline.add_pre_processing(
|
|
115
|
+
[
|
|
116
|
+
ConvertImageToBGR(), # jpg/png image to BGR in HWC layout
|
|
117
|
+
Resize((h_in, w_in)),
|
|
118
|
+
CenterCrop(h_in, w_in),
|
|
119
|
+
# this produces Y, Cb and Cr outputs. each has shape {h_in, w_in}. only Y is input to model
|
|
120
|
+
PixelsToYCbCr(layout="BGR"),
|
|
121
|
+
# if you inserted this Debug step here the 3 outputs from PixelsToYCbCr would also be model outputs
|
|
122
|
+
# Debug(num_inputs=3),
|
|
123
|
+
ImageBytesToFloat(), # Convert Y to float in range 0..1
|
|
124
|
+
Unsqueeze([0, 1]), # add batch and channels dim to Y so shape is {1, 1, h_in, w_in}
|
|
125
|
+
]
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Post-processing is complicated here. resize the Cb and Cr outputs from the pre-processing to match
|
|
129
|
+
# the model output size, merge those with the Y` model output, and convert back to RGB.
|
|
130
|
+
|
|
131
|
+
# create the Steps we need to use in the manual connections
|
|
132
|
+
pipeline.add_post_processing(
|
|
133
|
+
[
|
|
134
|
+
Squeeze([0, 1]), # remove batch and channels dims from Y'
|
|
135
|
+
FloatToImageBytes(name="Y1_uint8"), # convert Y' to uint8 in range 0..255
|
|
136
|
+
|
|
137
|
+
# Resize the Cb values (output 1 from PixelsToYCbCr)
|
|
138
|
+
(Resize((h_out, w_out), "HW"),
|
|
139
|
+
[IoMapEntry(producer="PixelsToYCbCr", producer_idx=1, consumer_idx=0)]),
|
|
140
|
+
|
|
141
|
+
# the Cb and Cr values are already in the range 0..255 so multiplier is 1. we're using the step to round
|
|
142
|
+
# for accuracy (a direct Cast would just truncate) and clip (to ensure range 0..255) the values post-Resize
|
|
143
|
+
FloatToImageBytes(multiplier=1.0, name="Cb1_uint8"),
|
|
144
|
+
|
|
145
|
+
(Resize((h_out, w_out), "HW"), [IoMapEntry("PixelsToYCbCr", 2, 0)]),
|
|
146
|
+
FloatToImageBytes(multiplier=1.0, name="Cr1_uint8"),
|
|
147
|
+
|
|
148
|
+
# as we're selecting outputs from multiple previous steps we need to map them to the inputs using step names
|
|
149
|
+
(
|
|
150
|
+
YCbCrToPixels(layout="BGR"),
|
|
151
|
+
[
|
|
152
|
+
IoMapEntry("Y1_uint8", 0, 0), # uint8 Y' with shape {h, w}
|
|
153
|
+
IoMapEntry("Cb1_uint8", 0, 1),
|
|
154
|
+
IoMapEntry("Cr1_uint8", 0, 2),
|
|
155
|
+
],
|
|
156
|
+
),
|
|
157
|
+
ConvertBGRToImage(image_format=output_format), # jpg or png are supported
|
|
158
|
+
]
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
new_model = pipeline.run(model)
|
|
162
|
+
onnx.save_model(new_model, str(output_file.resolve()))
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def yolo_detection(model_file: Path, output_file: Path, output_format: str = 'jpg',
|
|
166
|
+
onnx_opset: int = 16, num_classes: int = 80, input_shape: List[int] = None,
|
|
167
|
+
output_as_image: bool = True):
|
|
168
|
+
"""
|
|
169
|
+
SSD-like model and Faster-RCNN-like model are including NMS inside already, You can find it from onnx model zoo.
|
|
170
|
+
|
|
171
|
+
A pure detection model accept fix-sized(say 1,3,640,640) image as input, and output a list of bounding boxes, which
|
|
172
|
+
the numbers are determinate by anchors.
|
|
173
|
+
|
|
174
|
+
This function target for Yolo detection model. It support YOLOv3-yolov8 models theoretically.
|
|
175
|
+
You should assure this model has only one input, and the input shape is [1, 3, h, w].
|
|
176
|
+
The model has either one or more outputs.
|
|
177
|
+
If the model has one output, the output shape is [1,num_boxes, coor+(obj)+cls]
|
|
178
|
+
or [1, coor+(obj)+cls, num_boxes].
|
|
179
|
+
If the model has more than one outputs, you should assure the first output shape is
|
|
180
|
+
[1, num_boxes, coor+(obj)+cls] or [1, coor+(obj)+cls, num_boxes].
|
|
181
|
+
Note: (obj) means it's optional.
|
|
182
|
+
|
|
183
|
+
:param model_file: The input model file path.
|
|
184
|
+
:param output_file: The output file path, where the finalized model saved to.
|
|
185
|
+
:param output_format: The output image format, jpg or png.
|
|
186
|
+
:param onnx_opset: The opset version of onnx model, default(16).
|
|
187
|
+
:param num_classes: The number of classes, default(80).
|
|
188
|
+
:param input_shape: The shape of input image (height,width), default will be asked from model input.
|
|
189
|
+
:param output_as_image: The flag that means that the model should have the image with boxes instead of the coordinates of the boxess
|
|
190
|
+
"""
|
|
191
|
+
model = onnx.load(str(model_file.resolve(strict=True)))
|
|
192
|
+
inputs = [create_named_value("image", onnx.TensorProto.UINT8, ["num_bytes"])]
|
|
193
|
+
|
|
194
|
+
model_input_shape = model.graph.input[0].type.tensor_type.shape
|
|
195
|
+
model_output_shape = model.graph.output[0].type.tensor_type.shape
|
|
196
|
+
|
|
197
|
+
# We will use the input_shape to create the model if provided by user.
|
|
198
|
+
if input_shape is not None:
|
|
199
|
+
assert len(input_shape) == 2, "The input_shape should be [h, w]."
|
|
200
|
+
w_in = input_shape[1]
|
|
201
|
+
h_in = input_shape[0]
|
|
202
|
+
else:
|
|
203
|
+
assert (model_input_shape.dim[-1].HasField("dim_value") and
|
|
204
|
+
model_input_shape.dim[-2].HasField("dim_value")), "please provide input_shape in the command args."
|
|
205
|
+
|
|
206
|
+
w_in = model_input_shape.dim[-1].dim_value
|
|
207
|
+
h_in = model_input_shape.dim[-2].dim_value
|
|
208
|
+
|
|
209
|
+
# Yolov5(v3,v7) has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
|
210
|
+
# Yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
|
211
|
+
# https://github.com/ultralytics/ultralytics/blob/e5cb35edfc3bbc9d7d7db8a6042778a751f0e39e/examples/YOLOv8-CPP-Inference/inference.cpp#L31-L33
|
|
212
|
+
# We always want the box info to be the last dim for each of iteration.
|
|
213
|
+
# For new variants like YoloV8, we need to add an transpose op to permute output back.
|
|
214
|
+
yolo_v8_or_later = False
|
|
215
|
+
|
|
216
|
+
output_shape = [model_output_shape.dim[i].dim_value if model_output_shape.dim[i].HasField("dim_value") else -1
|
|
217
|
+
for i in [-2, -1]]
|
|
218
|
+
if output_shape[0] != -1 and output_shape[1] != -1:
|
|
219
|
+
yolo_v8_or_later = output_shape[0] < output_shape[1]
|
|
220
|
+
else:
|
|
221
|
+
assert len(model.graph.input) == 1, "Doesn't support adding pre and post-processing for multi-inputs model."
|
|
222
|
+
try:
|
|
223
|
+
import numpy as np
|
|
224
|
+
import onnxruntime
|
|
225
|
+
except ImportError:
|
|
226
|
+
raise ImportError(
|
|
227
|
+
"""Please install onnxruntime and numpy to run this script. eg 'pip install onnxruntime numpy'.
|
|
228
|
+
Because we need to execute the model to determine the output shape in order to add the correct post-processing""")
|
|
229
|
+
|
|
230
|
+
# Generate a random input to run the model and infer the output shape.
|
|
231
|
+
session = onnxruntime.InferenceSession(str(model_file), providers=["CPUExecutionProvider"])
|
|
232
|
+
input_name = session.get_inputs()[0].name
|
|
233
|
+
input_type = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[model.graph.input[0].type.tensor_type.elem_type]
|
|
234
|
+
inp = {input_name: np.random.rand(1, 3, h_in, w_in).astype(dtype=input_type)}
|
|
235
|
+
outputs = session.run(None, inp)[0]
|
|
236
|
+
assert len(outputs.shape) == 3 and outputs.shape[0] == 1, "shape of the first model output is not (1, n, m)"
|
|
237
|
+
if outputs.shape[1] < outputs.shape[2]:
|
|
238
|
+
yolo_v8_or_later = True
|
|
239
|
+
assert num_classes+4 == outputs.shape[2] or num_classes+5 == outputs.shape[2], \
|
|
240
|
+
"The output shape is neither (1, num_boxes, num_classes+4(reg)) nor (1, num_boxes, num_classes+5(reg+obj))"
|
|
241
|
+
|
|
242
|
+
pipeline = PrePostProcessor(inputs, onnx_opset)
|
|
243
|
+
# precess steps are responsible for converting any jpg/png image to CHW BGR float32 tensor
|
|
244
|
+
# jpg-->BGR(Image Tensor)-->Resize (scaled Image)-->LetterBox (Fix sized Image)-->(from HWC to)CHW-->float32-->1CHW
|
|
245
|
+
pipeline.add_pre_processing(
|
|
246
|
+
[
|
|
247
|
+
ConvertImageToBGR(), # jpg/png image to BGR in HWC layout
|
|
248
|
+
# Resize an arbitrary sized image to a fixed size in not_larger policy
|
|
249
|
+
Resize((h_in, w_in), policy='not_larger'),
|
|
250
|
+
LetterBox(target_shape=(h_in, w_in)), # padding or cropping the image to (h_in, w_in)
|
|
251
|
+
ChannelsLastToChannelsFirst(), # HWC to CHW
|
|
252
|
+
ImageBytesToFloat(), # Convert to float in range 0..1
|
|
253
|
+
Unsqueeze([0]), # add batch, CHW --> 1CHW
|
|
254
|
+
]
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# NMS and drawing boxes
|
|
258
|
+
post_processing_steps = [
|
|
259
|
+
Squeeze([0]), # - Squeeze to remove batch dimension
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
if yolo_v8_or_later:
|
|
263
|
+
post_processing_steps += [
|
|
264
|
+
Transpose([1, 0]), # transpose to (num_boxes, box+scores)
|
|
265
|
+
# split elements into the box and scores for the classes. no confidence value to apply to scores
|
|
266
|
+
Split(num_outputs=2, axis=-1, splits=[4, num_classes]),
|
|
267
|
+
]
|
|
268
|
+
else:
|
|
269
|
+
post_processing_steps += [
|
|
270
|
+
# Split bounding box from confidence and scores for each class
|
|
271
|
+
# Apply confidence to the scores.
|
|
272
|
+
SplitOutBoxAndScoreWithConf(num_classes=num_classes),
|
|
273
|
+
]
|
|
274
|
+
|
|
275
|
+
post_processing_steps += [
|
|
276
|
+
SelectBestBoundingBoxesByNMS(), # pick best bounding boxes with NonMaxSuppression
|
|
277
|
+
# Scale bounding box coords back to original image
|
|
278
|
+
(ScaleNMSBoundingBoxesAndKeyPoints(name='ScaleBoundingBoxes'),
|
|
279
|
+
[
|
|
280
|
+
# A connection from original image to ScaleBoundingBoxes
|
|
281
|
+
# A connection from the resized image to ScaleBoundingBoxes
|
|
282
|
+
# A connection from the LetterBoxed image to ScaleBoundingBoxes
|
|
283
|
+
# We can use the three image to calculate the scale factor and offset.
|
|
284
|
+
# With scale and offset, we can scale the bounding box back to the original image.
|
|
285
|
+
utils.IoMapEntry("ConvertImageToBGR", producer_idx=0, consumer_idx=1),
|
|
286
|
+
utils.IoMapEntry("Resize", producer_idx=0, consumer_idx=2),
|
|
287
|
+
utils.IoMapEntry("LetterBox", producer_idx=0, consumer_idx=3),
|
|
288
|
+
]),
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
if output_as_image:
|
|
292
|
+
post_processing_steps += [
|
|
293
|
+
# DrawBoundingBoxes on the original image
|
|
294
|
+
# Model imported from pytorch has CENTER_XYWH format
|
|
295
|
+
# two mode for how to color box,
|
|
296
|
+
# 1. colour_by_classes=True, (colour_by_classes), 2. colour_by_classes=False,(colour_by_confidence)
|
|
297
|
+
(DrawBoundingBoxes(mode='CENTER_XYWH', num_classes=num_classes, colour_by_classes=True),
|
|
298
|
+
[
|
|
299
|
+
utils.IoMapEntry("ConvertImageToBGR", producer_idx=0, consumer_idx=0),
|
|
300
|
+
utils.IoMapEntry("ScaleBoundingBoxes", producer_idx=0, consumer_idx=1),
|
|
301
|
+
]),
|
|
302
|
+
# Encode to jpg/png
|
|
303
|
+
ConvertBGRToImage(image_format=output_format),
|
|
304
|
+
]
|
|
305
|
+
|
|
306
|
+
pipeline.add_post_processing(post_processing_steps)
|
|
307
|
+
|
|
308
|
+
new_model = pipeline.run(model)
|
|
309
|
+
# run shape inferencing to validate the new model. shape inferencing will fail if any of the new node
|
|
310
|
+
# types or shapes are incorrect. infer_shapes returns a copy of the model with ValueInfo populated,
|
|
311
|
+
# but we ignore that and save new_model as it is smaller due to not containing the inferred shape information.
|
|
312
|
+
_ = onnx.shape_inference.infer_shapes(new_model, strict_mode=True)
|
|
313
|
+
onnx.save_model(new_model, str(output_file.resolve()))
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class NLPTaskType(enum.Enum):
|
|
317
|
+
TokenClassification = enum.auto()
|
|
318
|
+
QuestionAnswering = enum.auto()
|
|
319
|
+
SequenceClassification = enum.auto()
|
|
320
|
+
NextSentencePrediction = enum.auto()
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class TokenizerType(enum.Enum):
|
|
324
|
+
BertTokenizer = enum.auto()
|
|
325
|
+
SentencePieceTokenizer = enum.auto()
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def transformers_and_bert(
|
|
329
|
+
input_model_file: Path,
|
|
330
|
+
output_model_file: Path,
|
|
331
|
+
vocab_file: Path,
|
|
332
|
+
tokenizer_type: Union[TokenizerType, str],
|
|
333
|
+
task_type: Union[NLPTaskType, str],
|
|
334
|
+
onnx_opset: int = 16,
|
|
335
|
+
add_debug_before_postprocessing=False,
|
|
336
|
+
):
|
|
337
|
+
"""construct the pipeline for a end2end model with pre and post processing. The final model can take text as inputs
|
|
338
|
+
and output the result in text format for model like QA.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
input_model_file (Path): the model file needed to be updated.
|
|
342
|
+
output_model_file (Path): where to save the final onnx model.
|
|
343
|
+
vocab_file (Path): the vocab file for the tokenizer.
|
|
344
|
+
task_type (Union[NLPTaskType, str]): the task type of the model.
|
|
345
|
+
onnx_opset (int, optional): the opset version to use. Defaults to 16.
|
|
346
|
+
add_debug_before_postprocessing (bool, optional): whether to add a debug step before post processing.
|
|
347
|
+
Defaults to False.
|
|
348
|
+
"""
|
|
349
|
+
if isinstance(task_type, str):
|
|
350
|
+
task_type = NLPTaskType[task_type]
|
|
351
|
+
if isinstance(tokenizer_type, str):
|
|
352
|
+
tokenizer_type = TokenizerType[tokenizer_type]
|
|
353
|
+
|
|
354
|
+
onnx_model = onnx.load(str(input_model_file.resolve(strict=True)))
|
|
355
|
+
# hardcode batch size to 1
|
|
356
|
+
inputs = [create_named_value("input_text", onnx.TensorProto.STRING, [1, "num_sentences"])]
|
|
357
|
+
|
|
358
|
+
pipeline = PrePostProcessor(inputs, onnx_opset)
|
|
359
|
+
tokenizer_args = TokenizerParam(
|
|
360
|
+
vocab_or_file=vocab_file,
|
|
361
|
+
do_lower_case=True,
|
|
362
|
+
tweaked_bos_id=0,
|
|
363
|
+
is_sentence_pair=True if task_type in [NLPTaskType.QuestionAnswering,
|
|
364
|
+
NLPTaskType.NextSentencePrediction] else False,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
preprocessing = [
|
|
368
|
+
SentencePieceTokenizer(tokenizer_args)
|
|
369
|
+
if tokenizer_type == TokenizerType.SentencePieceTokenizer else BertTokenizer(tokenizer_args),
|
|
370
|
+
# uncomment this line to debug
|
|
371
|
+
# Debug(2),
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
# For verify results with out postprocessing
|
|
375
|
+
postprocessing = [Debug()] if add_debug_before_postprocessing else []
|
|
376
|
+
if task_type == NLPTaskType.QuestionAnswering:
|
|
377
|
+
postprocessing.append((BertTokenizerQADecoder(tokenizer_args), [
|
|
378
|
+
# input_ids
|
|
379
|
+
utils.IoMapEntry("BertTokenizer", producer_idx=0, consumer_idx=2)]))
|
|
380
|
+
elif task_type == NLPTaskType.SequenceClassification:
|
|
381
|
+
postprocessing.append(ArgMax())
|
|
382
|
+
# the other tasks don't need postprocessing or we don't support it yet.
|
|
383
|
+
|
|
384
|
+
pipeline.add_pre_processing(preprocessing)
|
|
385
|
+
pipeline.add_post_processing(postprocessing)
|
|
386
|
+
|
|
387
|
+
new_model = pipeline.run(onnx_model)
|
|
388
|
+
onnx.save_model(new_model, str(output_model_file.resolve()))
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def main():
|
|
392
|
+
parser = argparse.ArgumentParser(
|
|
393
|
+
os.path.basename(__file__),
|
|
394
|
+
description="""Add pre and post processing to a model.
|
|
395
|
+
|
|
396
|
+
Currently supports updating:
|
|
397
|
+
Vision models:
|
|
398
|
+
- super resolution with YCbCr input
|
|
399
|
+
- imagenet trained mobilenet
|
|
400
|
+
- object detection with YOLOv3-YOLOV8
|
|
401
|
+
|
|
402
|
+
NLP models:
|
|
403
|
+
- MobileBert with different tasks
|
|
404
|
+
- XLM-Roberta with classification task
|
|
405
|
+
|
|
406
|
+
For Vision models:
|
|
407
|
+
To customize, the logic in the `mobilenet`, `superresolution` and `yolo_detection` functions can be used as a guide.
|
|
408
|
+
Create a pipeline and add the required pre/post processing 'Steps' in the order required. Configure
|
|
409
|
+
individual steps as needed.
|
|
410
|
+
|
|
411
|
+
For NLP models:
|
|
412
|
+
`transformers_and_bert` can be used for MobileBert QuestionAnswering/Classification tasks,
|
|
413
|
+
or serve as a guide of how to add pre/post processing to a transformer model.
|
|
414
|
+
Usually pre-processing includes adding a tokenizer. Post-processing includes conversion of output_ids to text.
|
|
415
|
+
|
|
416
|
+
You might need to pass the tokenizer model file (bert vocab file or SentencePieceTokenizer model)
|
|
417
|
+
and task_type to the function.
|
|
418
|
+
|
|
419
|
+
The updated model will be written in the same location as the original model,
|
|
420
|
+
with '.onnx' updated to '.with_pre_post_processing.onnx'
|
|
421
|
+
|
|
422
|
+
Example usage:
|
|
423
|
+
object detection:
|
|
424
|
+
- python -m onnxruntime_extensions.tools.add_pre_post_processing_to_model -t yolo -num_classes 80 --input_shape 640,640 yolov8n.onnx
|
|
425
|
+
""",
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
parser.add_argument(
|
|
429
|
+
"-t",
|
|
430
|
+
"--model_type",
|
|
431
|
+
type=str,
|
|
432
|
+
required=True,
|
|
433
|
+
choices=[
|
|
434
|
+
"superresolution",
|
|
435
|
+
"mobilenet",
|
|
436
|
+
"yolo",
|
|
437
|
+
"transformers",
|
|
438
|
+
],
|
|
439
|
+
help="Model type.",
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
parser.add_argument(
|
|
443
|
+
"-s",
|
|
444
|
+
"--model_source",
|
|
445
|
+
type=str,
|
|
446
|
+
required=False,
|
|
447
|
+
choices=["pytorch", "tensorflow"],
|
|
448
|
+
default="pytorch",
|
|
449
|
+
help="""
|
|
450
|
+
Framework that model came from. In some cases there are known differences that can be taken into account when
|
|
451
|
+
adding the pre/post processing to the model. Currently this equates to choosing different normalization
|
|
452
|
+
behavior for mobilenet models.
|
|
453
|
+
""",
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
parser.add_argument(
|
|
457
|
+
"--output_format",
|
|
458
|
+
type=str,
|
|
459
|
+
required=False,
|
|
460
|
+
choices=["jpg", "png"],
|
|
461
|
+
default="png",
|
|
462
|
+
help="Image output format for superresolution model to produce.",
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
parser.add_argument(
|
|
466
|
+
"--num_classes",
|
|
467
|
+
type=int,
|
|
468
|
+
default=80,
|
|
469
|
+
help="Number of classes in object detection model.",
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
parser.add_argument(
|
|
473
|
+
"--input_shape",
|
|
474
|
+
type=str,
|
|
475
|
+
default="",
|
|
476
|
+
help="To specify input image shape(height,width) for the model. Such as \"224,224\", \
|
|
477
|
+
Tools will ask onnx model for input shape if input_shape is not specified.",
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
parser.add_argument(
|
|
481
|
+
"--nlp_task_type",
|
|
482
|
+
type=str,
|
|
483
|
+
choices=["QuestionAnswering",
|
|
484
|
+
"SequenceClassification",
|
|
485
|
+
"NextSentencePrediction"],
|
|
486
|
+
required=False,
|
|
487
|
+
help="The downstream task for NLP model.",
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
parser.add_argument(
|
|
491
|
+
"--vocab_file",
|
|
492
|
+
type=Path,
|
|
493
|
+
required=False,
|
|
494
|
+
help="Tokenizer model file for BertTokenizer or SentencePieceTokenizer.",
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
parser.add_argument(
|
|
498
|
+
"--tokenizer_type",
|
|
499
|
+
type=str,
|
|
500
|
+
choices=["BertTokenizer",
|
|
501
|
+
"SentencePieceTokenizer"],
|
|
502
|
+
required=False,
|
|
503
|
+
help="Tokenizer model file for BertTokenizer or SentencePieceTokenizer.",
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
parser.add_argument(
|
|
507
|
+
"--opset", type=int, required=False, default=16,
|
|
508
|
+
help="ONNX opset to use. Minimum allowed is 16. Opset 18 is required for Resize with anti-aliasing.",
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
parser.add_argument("model", type=Path, help="Provide path to ONNX model to update.")
|
|
512
|
+
|
|
513
|
+
args = parser.parse_args()
|
|
514
|
+
|
|
515
|
+
model_path = args.model.resolve(strict=True)
|
|
516
|
+
new_model_path = model_path.with_suffix(".with_pre_post_processing.onnx")
|
|
517
|
+
|
|
518
|
+
if args.model_type == "mobilenet":
|
|
519
|
+
source = ModelSource.PYTORCH if args.model_source == "pytorch" else ModelSource.TENSORFLOW
|
|
520
|
+
mobilenet(model_path, new_model_path, source, args.opset)
|
|
521
|
+
elif args.model_type == "superresolution":
|
|
522
|
+
superresolution(model_path, new_model_path, args.output_format, args.opset)
|
|
523
|
+
elif args.model_type == "yolo":
|
|
524
|
+
input_shape = None
|
|
525
|
+
if args.input_shape != "":
|
|
526
|
+
input_shape = [int(x) for x in args.input_shape.split(",")]
|
|
527
|
+
yolo_detection(model_path, new_model_path, args.output_format, args.opset, args.num_classes, input_shape)
|
|
528
|
+
else:
|
|
529
|
+
if args.vocab_file is None or args.nlp_task_type is None or args.tokenizer_type is None:
|
|
530
|
+
parser.error("Please provide vocab file/nlp_task_type/tokenizer_type.")
|
|
531
|
+
transformers_and_bert(model_path, new_model_path, args.tokenizer_type, args.vocab_file, args.nlp_task_type)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
if __name__ == "__main__":
|
|
535
|
+
main()
|