onnxruntime_extensions 0.14.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. onnxruntime_extensions/__init__.py +82 -0
  2. onnxruntime_extensions/_cuops.py +564 -0
  3. onnxruntime_extensions/_extensions_pydll.cpython-313-darwin.so +0 -0
  4. onnxruntime_extensions/_extensions_pydll.pyi +45 -0
  5. onnxruntime_extensions/_hf_cvt.py +331 -0
  6. onnxruntime_extensions/_ocos.py +133 -0
  7. onnxruntime_extensions/_ortapi2.py +274 -0
  8. onnxruntime_extensions/_torch_cvt.py +231 -0
  9. onnxruntime_extensions/_version.py +2 -0
  10. onnxruntime_extensions/cmd.py +66 -0
  11. onnxruntime_extensions/cvt.py +306 -0
  12. onnxruntime_extensions/onnxprocess/__init__.py +12 -0
  13. onnxruntime_extensions/onnxprocess/_builder.py +53 -0
  14. onnxruntime_extensions/onnxprocess/_onnx_ops.py +1507 -0
  15. onnxruntime_extensions/onnxprocess/_session.py +355 -0
  16. onnxruntime_extensions/onnxprocess/_tensor.py +628 -0
  17. onnxruntime_extensions/onnxprocess/torch_wrapper.py +31 -0
  18. onnxruntime_extensions/pnp/__init__.py +13 -0
  19. onnxruntime_extensions/pnp/_base.py +124 -0
  20. onnxruntime_extensions/pnp/_imagenet.py +65 -0
  21. onnxruntime_extensions/pnp/_nlp.py +148 -0
  22. onnxruntime_extensions/pnp/_onnx_ops.py +1544 -0
  23. onnxruntime_extensions/pnp/_torchext.py +310 -0
  24. onnxruntime_extensions/pnp/_unifier.py +45 -0
  25. onnxruntime_extensions/pnp/_utils.py +302 -0
  26. onnxruntime_extensions/pp_api.py +83 -0
  27. onnxruntime_extensions/tools/__init__.py +0 -0
  28. onnxruntime_extensions/tools/add_HuggingFace_CLIPImageProcessor_to_model.py +171 -0
  29. onnxruntime_extensions/tools/add_pre_post_processing_to_model.py +535 -0
  30. onnxruntime_extensions/tools/pre_post_processing/__init__.py +4 -0
  31. onnxruntime_extensions/tools/pre_post_processing/pre_post_processor.py +395 -0
  32. onnxruntime_extensions/tools/pre_post_processing/step.py +227 -0
  33. onnxruntime_extensions/tools/pre_post_processing/steps/__init__.py +6 -0
  34. onnxruntime_extensions/tools/pre_post_processing/steps/general.py +366 -0
  35. onnxruntime_extensions/tools/pre_post_processing/steps/nlp.py +344 -0
  36. onnxruntime_extensions/tools/pre_post_processing/steps/vision.py +1157 -0
  37. onnxruntime_extensions/tools/pre_post_processing/utils.py +139 -0
  38. onnxruntime_extensions/util.py +186 -0
  39. onnxruntime_extensions-0.14.0.dist-info/LICENSE +21 -0
  40. onnxruntime_extensions-0.14.0.dist-info/METADATA +102 -0
  41. onnxruntime_extensions-0.14.0.dist-info/RECORD +43 -0
  42. onnxruntime_extensions-0.14.0.dist-info/WHEEL +6 -0
  43. onnxruntime_extensions-0.14.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,535 @@
1
+ # Copyright (c) Microsoft Corporation. All rights reserved.
2
+ # Licensed under the MIT License.
3
+
4
+ import argparse
5
+ import enum
6
+ import onnx
7
+ import os
8
+
9
+ from pathlib import Path
10
+ from typing import Union
11
+ # NOTE: If you're working on this script install onnxruntime_extensions using `pip install -e .` from the repo root
12
+ # and run with `python -m onnxruntime_extensions.tools.add_pre_post_processing_to_model`
13
+ # Running directly will result in an error from a relative import.
14
+ from .pre_post_processing import *
15
+
16
+
17
+ class ModelSource(enum.Enum):
18
+ PYTORCH = 0
19
+ TENSORFLOW = 1
20
+ OTHER = 2
21
+
22
+
23
+ def imagenet_preprocessing(model_source: ModelSource = ModelSource.PYTORCH):
24
+ """
25
+ Common pre-processing for an imagenet trained model.
26
+
27
+ - Resize so smallest side is 256
28
+ - Centered crop to 224 x 224
29
+ - Convert image bytes to floating point values in range 0..1
30
+ - [Channels last to channels first (convert to ONNX layout) if model came from pytorch and has NCHW layout]
31
+ - Normalize
32
+ - (value - mean) / stddev
33
+ - for a pytorch model, this applies per-channel normalization parameters
34
+ - for a tensorflow model this simply moves the image bytes into the range -1..1
35
+ - adds a batch dimension with a value of 1
36
+ """
37
+
38
+ # These utils cover both cases of typical pytorch/tensorflow pre-processing for an imagenet trained model
39
+ # https://github.com/keras-team/keras/blob/b80dd12da9c0bc3f569eca3455e77762cf2ee8ef/keras/applications/imagenet_utils.py#L177
40
+
41
+ steps = [
42
+ Resize(256),
43
+ CenterCrop(224, 224),
44
+ ImageBytesToFloat()
45
+ ]
46
+
47
+ if model_source == ModelSource.PYTORCH:
48
+ # pytorch model has NCHW layout
49
+ steps.extend([
50
+ ChannelsLastToChannelsFirst(),
51
+ Normalize([(0.485, 0.229), (0.456, 0.224), (0.406, 0.225)], layout="CHW")
52
+ ])
53
+ else:
54
+ # TF processing involves moving the data into the range -1..1 instead of 0..1.
55
+ # ImageBytesToFloat converts to range 0..1, so we use 0.5 for the mean to move into the range -0.5..0.5
56
+ # and 0.5 for the stddev to expand to -1..1
57
+ steps.append(Normalize([(0.5, 0.5)], layout="HWC"))
58
+
59
+ steps.append(Unsqueeze([0])) # add batch dim
60
+
61
+ return steps
62
+
63
+
64
+ def mobilenet(model_file: Path, output_file: Path, model_source: ModelSource, onnx_opset: int = 16):
65
+ model = onnx.load(str(model_file.resolve(strict=True)))
66
+ inputs = [create_named_value("image", onnx.TensorProto.UINT8, ["num_bytes"])]
67
+
68
+ pipeline = PrePostProcessor(inputs, onnx_opset)
69
+
70
+ # support user providing encoded image bytes
71
+ preprocessing = [
72
+ ConvertImageToBGR(), # custom op to convert jpg/png to BGR (output is HWC)
73
+ ReverseAxis(axis=2, dim_value=3, name="BGR_to_RGB"),
74
+ ] # Normalization params are for RGB ordering
75
+ # plug in default imagenet pre-processing
76
+ preprocessing.extend(imagenet_preprocessing(model_source))
77
+
78
+ pipeline.add_pre_processing(preprocessing)
79
+
80
+ # for mobilenet we convert the score to probabilities with softmax if necessary. the TF model includes Softmax
81
+ if model.graph.node[-1].op_type != "Softmax":
82
+ pipeline.add_post_processing([Softmax()])
83
+
84
+ new_model = pipeline.run(model)
85
+
86
+ onnx.save_model(new_model, str(output_file.resolve()))
87
+
88
+
89
+ def superresolution(model_file: Path, output_file: Path, output_format: str, onnx_opset: int = 16):
90
+ # TODO: There seems to be a split with some super resolution models processing RGB input and some processing
91
+ # the Y channel after converting to YCbCr.
92
+ # For the sake of this example implementation we do the trickier YCbCr processing as that involves joining the
93
+ # Cb and Cr channels with the model output to create the resized image.
94
+ # Model is from https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html
95
+ model = onnx.load(str(model_file.resolve(strict=True)))
96
+ inputs = [create_named_value("image", onnx.TensorProto.UINT8, ["num_bytes"])]
97
+
98
+ # assuming input is *CHW, infer the input sizes from the model.
99
+ # requires the model input and output has a fixed size for the input and output height and width.
100
+ model_input_shape = model.graph.input[0].type.tensor_type.shape
101
+ model_output_shape = model.graph.output[0].type.tensor_type.shape
102
+ assert model_input_shape.dim[-1].HasField("dim_value")
103
+ assert model_input_shape.dim[-2].HasField("dim_value")
104
+ assert model_output_shape.dim[-1].HasField("dim_value")
105
+ assert model_output_shape.dim[-2].HasField("dim_value")
106
+
107
+ w_in = model_input_shape.dim[-1].dim_value
108
+ h_in = model_input_shape.dim[-2].dim_value
109
+ h_out = model_output_shape.dim[-2].dim_value
110
+ w_out = model_output_shape.dim[-1].dim_value
111
+
112
+ # pre/post processing for https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html
113
+ pipeline = PrePostProcessor(inputs, onnx_opset)
114
+ pipeline.add_pre_processing(
115
+ [
116
+ ConvertImageToBGR(), # jpg/png image to BGR in HWC layout
117
+ Resize((h_in, w_in)),
118
+ CenterCrop(h_in, w_in),
119
+ # this produces Y, Cb and Cr outputs. each has shape {h_in, w_in}. only Y is input to model
120
+ PixelsToYCbCr(layout="BGR"),
121
+ # if you inserted this Debug step here the 3 outputs from PixelsToYCbCr would also be model outputs
122
+ # Debug(num_inputs=3),
123
+ ImageBytesToFloat(), # Convert Y to float in range 0..1
124
+ Unsqueeze([0, 1]), # add batch and channels dim to Y so shape is {1, 1, h_in, w_in}
125
+ ]
126
+ )
127
+
128
+ # Post-processing is complicated here. resize the Cb and Cr outputs from the pre-processing to match
129
+ # the model output size, merge those with the Y` model output, and convert back to RGB.
130
+
131
+ # create the Steps we need to use in the manual connections
132
+ pipeline.add_post_processing(
133
+ [
134
+ Squeeze([0, 1]), # remove batch and channels dims from Y'
135
+ FloatToImageBytes(name="Y1_uint8"), # convert Y' to uint8 in range 0..255
136
+
137
+ # Resize the Cb values (output 1 from PixelsToYCbCr)
138
+ (Resize((h_out, w_out), "HW"),
139
+ [IoMapEntry(producer="PixelsToYCbCr", producer_idx=1, consumer_idx=0)]),
140
+
141
+ # the Cb and Cr values are already in the range 0..255 so multiplier is 1. we're using the step to round
142
+ # for accuracy (a direct Cast would just truncate) and clip (to ensure range 0..255) the values post-Resize
143
+ FloatToImageBytes(multiplier=1.0, name="Cb1_uint8"),
144
+
145
+ (Resize((h_out, w_out), "HW"), [IoMapEntry("PixelsToYCbCr", 2, 0)]),
146
+ FloatToImageBytes(multiplier=1.0, name="Cr1_uint8"),
147
+
148
+ # as we're selecting outputs from multiple previous steps we need to map them to the inputs using step names
149
+ (
150
+ YCbCrToPixels(layout="BGR"),
151
+ [
152
+ IoMapEntry("Y1_uint8", 0, 0), # uint8 Y' with shape {h, w}
153
+ IoMapEntry("Cb1_uint8", 0, 1),
154
+ IoMapEntry("Cr1_uint8", 0, 2),
155
+ ],
156
+ ),
157
+ ConvertBGRToImage(image_format=output_format), # jpg or png are supported
158
+ ]
159
+ )
160
+
161
+ new_model = pipeline.run(model)
162
+ onnx.save_model(new_model, str(output_file.resolve()))
163
+
164
+
165
+ def yolo_detection(model_file: Path, output_file: Path, output_format: str = 'jpg',
166
+ onnx_opset: int = 16, num_classes: int = 80, input_shape: List[int] = None,
167
+ output_as_image: bool = True):
168
+ """
169
+ SSD-like model and Faster-RCNN-like model are including NMS inside already, You can find it from onnx model zoo.
170
+
171
+ A pure detection model accept fix-sized(say 1,3,640,640) image as input, and output a list of bounding boxes, which
172
+ the numbers are determinate by anchors.
173
+
174
+ This function target for Yolo detection model. It support YOLOv3-yolov8 models theoretically.
175
+ You should assure this model has only one input, and the input shape is [1, 3, h, w].
176
+ The model has either one or more outputs.
177
+ If the model has one output, the output shape is [1,num_boxes, coor+(obj)+cls]
178
+ or [1, coor+(obj)+cls, num_boxes].
179
+ If the model has more than one outputs, you should assure the first output shape is
180
+ [1, num_boxes, coor+(obj)+cls] or [1, coor+(obj)+cls, num_boxes].
181
+ Note: (obj) means it's optional.
182
+
183
+ :param model_file: The input model file path.
184
+ :param output_file: The output file path, where the finalized model saved to.
185
+ :param output_format: The output image format, jpg or png.
186
+ :param onnx_opset: The opset version of onnx model, default(16).
187
+ :param num_classes: The number of classes, default(80).
188
+ :param input_shape: The shape of input image (height,width), default will be asked from model input.
189
+ :param output_as_image: The flag that means that the model should have the image with boxes instead of the coordinates of the boxess
190
+ """
191
+ model = onnx.load(str(model_file.resolve(strict=True)))
192
+ inputs = [create_named_value("image", onnx.TensorProto.UINT8, ["num_bytes"])]
193
+
194
+ model_input_shape = model.graph.input[0].type.tensor_type.shape
195
+ model_output_shape = model.graph.output[0].type.tensor_type.shape
196
+
197
+ # We will use the input_shape to create the model if provided by user.
198
+ if input_shape is not None:
199
+ assert len(input_shape) == 2, "The input_shape should be [h, w]."
200
+ w_in = input_shape[1]
201
+ h_in = input_shape[0]
202
+ else:
203
+ assert (model_input_shape.dim[-1].HasField("dim_value") and
204
+ model_input_shape.dim[-2].HasField("dim_value")), "please provide input_shape in the command args."
205
+
206
+ w_in = model_input_shape.dim[-1].dim_value
207
+ h_in = model_input_shape.dim[-2].dim_value
208
+
209
+ # Yolov5(v3,v7) has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
210
+ # Yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
211
+ # https://github.com/ultralytics/ultralytics/blob/e5cb35edfc3bbc9d7d7db8a6042778a751f0e39e/examples/YOLOv8-CPP-Inference/inference.cpp#L31-L33
212
+ # We always want the box info to be the last dim for each of iteration.
213
+ # For new variants like YoloV8, we need to add an transpose op to permute output back.
214
+ yolo_v8_or_later = False
215
+
216
+ output_shape = [model_output_shape.dim[i].dim_value if model_output_shape.dim[i].HasField("dim_value") else -1
217
+ for i in [-2, -1]]
218
+ if output_shape[0] != -1 and output_shape[1] != -1:
219
+ yolo_v8_or_later = output_shape[0] < output_shape[1]
220
+ else:
221
+ assert len(model.graph.input) == 1, "Doesn't support adding pre and post-processing for multi-inputs model."
222
+ try:
223
+ import numpy as np
224
+ import onnxruntime
225
+ except ImportError:
226
+ raise ImportError(
227
+ """Please install onnxruntime and numpy to run this script. eg 'pip install onnxruntime numpy'.
228
+ Because we need to execute the model to determine the output shape in order to add the correct post-processing""")
229
+
230
+ # Generate a random input to run the model and infer the output shape.
231
+ session = onnxruntime.InferenceSession(str(model_file), providers=["CPUExecutionProvider"])
232
+ input_name = session.get_inputs()[0].name
233
+ input_type = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[model.graph.input[0].type.tensor_type.elem_type]
234
+ inp = {input_name: np.random.rand(1, 3, h_in, w_in).astype(dtype=input_type)}
235
+ outputs = session.run(None, inp)[0]
236
+ assert len(outputs.shape) == 3 and outputs.shape[0] == 1, "shape of the first model output is not (1, n, m)"
237
+ if outputs.shape[1] < outputs.shape[2]:
238
+ yolo_v8_or_later = True
239
+ assert num_classes+4 == outputs.shape[2] or num_classes+5 == outputs.shape[2], \
240
+ "The output shape is neither (1, num_boxes, num_classes+4(reg)) nor (1, num_boxes, num_classes+5(reg+obj))"
241
+
242
+ pipeline = PrePostProcessor(inputs, onnx_opset)
243
+ # precess steps are responsible for converting any jpg/png image to CHW BGR float32 tensor
244
+ # jpg-->BGR(Image Tensor)-->Resize (scaled Image)-->LetterBox (Fix sized Image)-->(from HWC to)CHW-->float32-->1CHW
245
+ pipeline.add_pre_processing(
246
+ [
247
+ ConvertImageToBGR(), # jpg/png image to BGR in HWC layout
248
+ # Resize an arbitrary sized image to a fixed size in not_larger policy
249
+ Resize((h_in, w_in), policy='not_larger'),
250
+ LetterBox(target_shape=(h_in, w_in)), # padding or cropping the image to (h_in, w_in)
251
+ ChannelsLastToChannelsFirst(), # HWC to CHW
252
+ ImageBytesToFloat(), # Convert to float in range 0..1
253
+ Unsqueeze([0]), # add batch, CHW --> 1CHW
254
+ ]
255
+ )
256
+
257
+ # NMS and drawing boxes
258
+ post_processing_steps = [
259
+ Squeeze([0]), # - Squeeze to remove batch dimension
260
+ ]
261
+
262
+ if yolo_v8_or_later:
263
+ post_processing_steps += [
264
+ Transpose([1, 0]), # transpose to (num_boxes, box+scores)
265
+ # split elements into the box and scores for the classes. no confidence value to apply to scores
266
+ Split(num_outputs=2, axis=-1, splits=[4, num_classes]),
267
+ ]
268
+ else:
269
+ post_processing_steps += [
270
+ # Split bounding box from confidence and scores for each class
271
+ # Apply confidence to the scores.
272
+ SplitOutBoxAndScoreWithConf(num_classes=num_classes),
273
+ ]
274
+
275
+ post_processing_steps += [
276
+ SelectBestBoundingBoxesByNMS(), # pick best bounding boxes with NonMaxSuppression
277
+ # Scale bounding box coords back to original image
278
+ (ScaleNMSBoundingBoxesAndKeyPoints(name='ScaleBoundingBoxes'),
279
+ [
280
+ # A connection from original image to ScaleBoundingBoxes
281
+ # A connection from the resized image to ScaleBoundingBoxes
282
+ # A connection from the LetterBoxed image to ScaleBoundingBoxes
283
+ # We can use the three image to calculate the scale factor and offset.
284
+ # With scale and offset, we can scale the bounding box back to the original image.
285
+ utils.IoMapEntry("ConvertImageToBGR", producer_idx=0, consumer_idx=1),
286
+ utils.IoMapEntry("Resize", producer_idx=0, consumer_idx=2),
287
+ utils.IoMapEntry("LetterBox", producer_idx=0, consumer_idx=3),
288
+ ]),
289
+ ]
290
+
291
+ if output_as_image:
292
+ post_processing_steps += [
293
+ # DrawBoundingBoxes on the original image
294
+ # Model imported from pytorch has CENTER_XYWH format
295
+ # two mode for how to color box,
296
+ # 1. colour_by_classes=True, (colour_by_classes), 2. colour_by_classes=False,(colour_by_confidence)
297
+ (DrawBoundingBoxes(mode='CENTER_XYWH', num_classes=num_classes, colour_by_classes=True),
298
+ [
299
+ utils.IoMapEntry("ConvertImageToBGR", producer_idx=0, consumer_idx=0),
300
+ utils.IoMapEntry("ScaleBoundingBoxes", producer_idx=0, consumer_idx=1),
301
+ ]),
302
+ # Encode to jpg/png
303
+ ConvertBGRToImage(image_format=output_format),
304
+ ]
305
+
306
+ pipeline.add_post_processing(post_processing_steps)
307
+
308
+ new_model = pipeline.run(model)
309
+ # run shape inferencing to validate the new model. shape inferencing will fail if any of the new node
310
+ # types or shapes are incorrect. infer_shapes returns a copy of the model with ValueInfo populated,
311
+ # but we ignore that and save new_model as it is smaller due to not containing the inferred shape information.
312
+ _ = onnx.shape_inference.infer_shapes(new_model, strict_mode=True)
313
+ onnx.save_model(new_model, str(output_file.resolve()))
314
+
315
+
316
+ class NLPTaskType(enum.Enum):
317
+ TokenClassification = enum.auto()
318
+ QuestionAnswering = enum.auto()
319
+ SequenceClassification = enum.auto()
320
+ NextSentencePrediction = enum.auto()
321
+
322
+
323
+ class TokenizerType(enum.Enum):
324
+ BertTokenizer = enum.auto()
325
+ SentencePieceTokenizer = enum.auto()
326
+
327
+
328
+ def transformers_and_bert(
329
+ input_model_file: Path,
330
+ output_model_file: Path,
331
+ vocab_file: Path,
332
+ tokenizer_type: Union[TokenizerType, str],
333
+ task_type: Union[NLPTaskType, str],
334
+ onnx_opset: int = 16,
335
+ add_debug_before_postprocessing=False,
336
+ ):
337
+ """construct the pipeline for a end2end model with pre and post processing. The final model can take text as inputs
338
+ and output the result in text format for model like QA.
339
+
340
+ Args:
341
+ input_model_file (Path): the model file needed to be updated.
342
+ output_model_file (Path): where to save the final onnx model.
343
+ vocab_file (Path): the vocab file for the tokenizer.
344
+ task_type (Union[NLPTaskType, str]): the task type of the model.
345
+ onnx_opset (int, optional): the opset version to use. Defaults to 16.
346
+ add_debug_before_postprocessing (bool, optional): whether to add a debug step before post processing.
347
+ Defaults to False.
348
+ """
349
+ if isinstance(task_type, str):
350
+ task_type = NLPTaskType[task_type]
351
+ if isinstance(tokenizer_type, str):
352
+ tokenizer_type = TokenizerType[tokenizer_type]
353
+
354
+ onnx_model = onnx.load(str(input_model_file.resolve(strict=True)))
355
+ # hardcode batch size to 1
356
+ inputs = [create_named_value("input_text", onnx.TensorProto.STRING, [1, "num_sentences"])]
357
+
358
+ pipeline = PrePostProcessor(inputs, onnx_opset)
359
+ tokenizer_args = TokenizerParam(
360
+ vocab_or_file=vocab_file,
361
+ do_lower_case=True,
362
+ tweaked_bos_id=0,
363
+ is_sentence_pair=True if task_type in [NLPTaskType.QuestionAnswering,
364
+ NLPTaskType.NextSentencePrediction] else False,
365
+ )
366
+
367
+ preprocessing = [
368
+ SentencePieceTokenizer(tokenizer_args)
369
+ if tokenizer_type == TokenizerType.SentencePieceTokenizer else BertTokenizer(tokenizer_args),
370
+ # uncomment this line to debug
371
+ # Debug(2),
372
+ ]
373
+
374
+ # For verify results with out postprocessing
375
+ postprocessing = [Debug()] if add_debug_before_postprocessing else []
376
+ if task_type == NLPTaskType.QuestionAnswering:
377
+ postprocessing.append((BertTokenizerQADecoder(tokenizer_args), [
378
+ # input_ids
379
+ utils.IoMapEntry("BertTokenizer", producer_idx=0, consumer_idx=2)]))
380
+ elif task_type == NLPTaskType.SequenceClassification:
381
+ postprocessing.append(ArgMax())
382
+ # the other tasks don't need postprocessing or we don't support it yet.
383
+
384
+ pipeline.add_pre_processing(preprocessing)
385
+ pipeline.add_post_processing(postprocessing)
386
+
387
+ new_model = pipeline.run(onnx_model)
388
+ onnx.save_model(new_model, str(output_model_file.resolve()))
389
+
390
+
391
+ def main():
392
+ parser = argparse.ArgumentParser(
393
+ os.path.basename(__file__),
394
+ description="""Add pre and post processing to a model.
395
+
396
+ Currently supports updating:
397
+ Vision models:
398
+ - super resolution with YCbCr input
399
+ - imagenet trained mobilenet
400
+ - object detection with YOLOv3-YOLOV8
401
+
402
+ NLP models:
403
+ - MobileBert with different tasks
404
+ - XLM-Roberta with classification task
405
+
406
+ For Vision models:
407
+ To customize, the logic in the `mobilenet`, `superresolution` and `yolo_detection` functions can be used as a guide.
408
+ Create a pipeline and add the required pre/post processing 'Steps' in the order required. Configure
409
+ individual steps as needed.
410
+
411
+ For NLP models:
412
+ `transformers_and_bert` can be used for MobileBert QuestionAnswering/Classification tasks,
413
+ or serve as a guide of how to add pre/post processing to a transformer model.
414
+ Usually pre-processing includes adding a tokenizer. Post-processing includes conversion of output_ids to text.
415
+
416
+ You might need to pass the tokenizer model file (bert vocab file or SentencePieceTokenizer model)
417
+ and task_type to the function.
418
+
419
+ The updated model will be written in the same location as the original model,
420
+ with '.onnx' updated to '.with_pre_post_processing.onnx'
421
+
422
+ Example usage:
423
+ object detection:
424
+ - python -m onnxruntime_extensions.tools.add_pre_post_processing_to_model -t yolo -num_classes 80 --input_shape 640,640 yolov8n.onnx
425
+ """,
426
+ )
427
+
428
+ parser.add_argument(
429
+ "-t",
430
+ "--model_type",
431
+ type=str,
432
+ required=True,
433
+ choices=[
434
+ "superresolution",
435
+ "mobilenet",
436
+ "yolo",
437
+ "transformers",
438
+ ],
439
+ help="Model type.",
440
+ )
441
+
442
+ parser.add_argument(
443
+ "-s",
444
+ "--model_source",
445
+ type=str,
446
+ required=False,
447
+ choices=["pytorch", "tensorflow"],
448
+ default="pytorch",
449
+ help="""
450
+ Framework that model came from. In some cases there are known differences that can be taken into account when
451
+ adding the pre/post processing to the model. Currently this equates to choosing different normalization
452
+ behavior for mobilenet models.
453
+ """,
454
+ )
455
+
456
+ parser.add_argument(
457
+ "--output_format",
458
+ type=str,
459
+ required=False,
460
+ choices=["jpg", "png"],
461
+ default="png",
462
+ help="Image output format for superresolution model to produce.",
463
+ )
464
+
465
+ parser.add_argument(
466
+ "--num_classes",
467
+ type=int,
468
+ default=80,
469
+ help="Number of classes in object detection model.",
470
+ )
471
+
472
+ parser.add_argument(
473
+ "--input_shape",
474
+ type=str,
475
+ default="",
476
+ help="To specify input image shape(height,width) for the model. Such as \"224,224\", \
477
+ Tools will ask onnx model for input shape if input_shape is not specified.",
478
+ )
479
+
480
+ parser.add_argument(
481
+ "--nlp_task_type",
482
+ type=str,
483
+ choices=["QuestionAnswering",
484
+ "SequenceClassification",
485
+ "NextSentencePrediction"],
486
+ required=False,
487
+ help="The downstream task for NLP model.",
488
+ )
489
+
490
+ parser.add_argument(
491
+ "--vocab_file",
492
+ type=Path,
493
+ required=False,
494
+ help="Tokenizer model file for BertTokenizer or SentencePieceTokenizer.",
495
+ )
496
+
497
+ parser.add_argument(
498
+ "--tokenizer_type",
499
+ type=str,
500
+ choices=["BertTokenizer",
501
+ "SentencePieceTokenizer"],
502
+ required=False,
503
+ help="Tokenizer model file for BertTokenizer or SentencePieceTokenizer.",
504
+ )
505
+
506
+ parser.add_argument(
507
+ "--opset", type=int, required=False, default=16,
508
+ help="ONNX opset to use. Minimum allowed is 16. Opset 18 is required for Resize with anti-aliasing.",
509
+ )
510
+
511
+ parser.add_argument("model", type=Path, help="Provide path to ONNX model to update.")
512
+
513
+ args = parser.parse_args()
514
+
515
+ model_path = args.model.resolve(strict=True)
516
+ new_model_path = model_path.with_suffix(".with_pre_post_processing.onnx")
517
+
518
+ if args.model_type == "mobilenet":
519
+ source = ModelSource.PYTORCH if args.model_source == "pytorch" else ModelSource.TENSORFLOW
520
+ mobilenet(model_path, new_model_path, source, args.opset)
521
+ elif args.model_type == "superresolution":
522
+ superresolution(model_path, new_model_path, args.output_format, args.opset)
523
+ elif args.model_type == "yolo":
524
+ input_shape = None
525
+ if args.input_shape != "":
526
+ input_shape = [int(x) for x in args.input_shape.split(",")]
527
+ yolo_detection(model_path, new_model_path, args.output_format, args.opset, args.num_classes, input_shape)
528
+ else:
529
+ if args.vocab_file is None or args.nlp_task_type is None or args.tokenizer_type is None:
530
+ parser.error("Please provide vocab file/nlp_task_type/tokenizer_type.")
531
+ transformers_and_bert(model_path, new_model_path, args.tokenizer_type, args.vocab_file, args.nlp_task_type)
532
+
533
+
534
+ if __name__ == "__main__":
535
+ main()
@@ -0,0 +1,4 @@
1
+ from .pre_post_processor import PrePostProcessor
2
+ from .step import Step, Debug
3
+ from .utils import *
4
+ from .steps import *