onnxruntime-directml 1.20.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxruntime/LICENSE +21 -0
- onnxruntime/Privacy.md +21 -0
- onnxruntime/ThirdPartyNotices.txt +6508 -0
- onnxruntime/__init__.py +78 -0
- onnxruntime/backend/__init__.py +6 -0
- onnxruntime/backend/backend.py +174 -0
- onnxruntime/backend/backend_rep.py +53 -0
- onnxruntime/capi/DirectML.dll +0 -0
- onnxruntime/capi/__init__.py +4 -0
- onnxruntime/capi/_ld_preload.py +7 -0
- onnxruntime/capi/_pybind_state.py +33 -0
- onnxruntime/capi/convert_npz_to_onnx_adapter.py +48 -0
- onnxruntime/capi/onnxruntime.dll +0 -0
- onnxruntime/capi/onnxruntime_collect_build_info.py +47 -0
- onnxruntime/capi/onnxruntime_inference_collection.py +1108 -0
- onnxruntime/capi/onnxruntime_providers_shared.dll +0 -0
- onnxruntime/capi/onnxruntime_pybind11_state.pyd +0 -0
- onnxruntime/capi/onnxruntime_validation.py +150 -0
- onnxruntime/capi/version_info.py +2 -0
- onnxruntime/datasets/__init__.py +17 -0
- onnxruntime/datasets/logreg_iris.onnx +0 -0
- onnxruntime/datasets/mul_1.onnx +0 -0
- onnxruntime/datasets/sigmoid.onnx +13 -0
- onnxruntime/quantization/CalTableFlatBuffers/KeyValue.py +78 -0
- onnxruntime/quantization/CalTableFlatBuffers/TrtTable.py +90 -0
- onnxruntime/quantization/CalTableFlatBuffers/__init__.py +0 -0
- onnxruntime/quantization/__init__.py +16 -0
- onnxruntime/quantization/base_quantizer.py +532 -0
- onnxruntime/quantization/calibrate.py +1245 -0
- onnxruntime/quantization/execution_providers/qnn/__init__.py +2 -0
- onnxruntime/quantization/execution_providers/qnn/fusion_lpnorm.py +132 -0
- onnxruntime/quantization/execution_providers/qnn/mixed_precision_overrides_utils.py +413 -0
- onnxruntime/quantization/execution_providers/qnn/preprocess.py +307 -0
- onnxruntime/quantization/execution_providers/qnn/quant_config.py +387 -0
- onnxruntime/quantization/fusions/__init__.py +3 -0
- onnxruntime/quantization/fusions/fusion.py +311 -0
- onnxruntime/quantization/fusions/fusion_gelu.py +272 -0
- onnxruntime/quantization/fusions/fusion_layernorm.py +135 -0
- onnxruntime/quantization/matmul_4bits_quantizer.py +1480 -0
- onnxruntime/quantization/matmul_bnb4_quantizer.py +240 -0
- onnxruntime/quantization/onnx_model.py +580 -0
- onnxruntime/quantization/onnx_quantizer.py +1008 -0
- onnxruntime/quantization/operators/__init__.py +2 -0
- onnxruntime/quantization/operators/activation.py +119 -0
- onnxruntime/quantization/operators/argmax.py +18 -0
- onnxruntime/quantization/operators/attention.py +73 -0
- onnxruntime/quantization/operators/base_operator.py +26 -0
- onnxruntime/quantization/operators/binary_op.py +72 -0
- onnxruntime/quantization/operators/concat.py +62 -0
- onnxruntime/quantization/operators/conv.py +258 -0
- onnxruntime/quantization/operators/direct_q8.py +78 -0
- onnxruntime/quantization/operators/embed_layernorm.py +121 -0
- onnxruntime/quantization/operators/gather.py +64 -0
- onnxruntime/quantization/operators/gavgpool.py +62 -0
- onnxruntime/quantization/operators/gemm.py +166 -0
- onnxruntime/quantization/operators/lstm.py +117 -0
- onnxruntime/quantization/operators/matmul.py +231 -0
- onnxruntime/quantization/operators/maxpool.py +34 -0
- onnxruntime/quantization/operators/norm.py +40 -0
- onnxruntime/quantization/operators/pad.py +100 -0
- onnxruntime/quantization/operators/pooling.py +67 -0
- onnxruntime/quantization/operators/qdq_base_operator.py +22 -0
- onnxruntime/quantization/operators/resize.py +34 -0
- onnxruntime/quantization/operators/softmax.py +74 -0
- onnxruntime/quantization/operators/split.py +63 -0
- onnxruntime/quantization/operators/where.py +87 -0
- onnxruntime/quantization/preprocess.py +141 -0
- onnxruntime/quantization/qdq_loss_debug.py +389 -0
- onnxruntime/quantization/qdq_quantizer.py +1187 -0
- onnxruntime/quantization/quant_utils.py +891 -0
- onnxruntime/quantization/quantize.py +748 -0
- onnxruntime/quantization/registry.py +106 -0
- onnxruntime/quantization/shape_inference.py +187 -0
- onnxruntime/quantization/tensor_quant_overrides.py +516 -0
- onnxruntime/tools/__init__.py +10 -0
- onnxruntime/tools/check_onnx_model_mobile_usability.py +47 -0
- onnxruntime/tools/convert_onnx_models_to_ort.py +377 -0
- onnxruntime/tools/file_utils.py +46 -0
- onnxruntime/tools/logger.py +11 -0
- onnxruntime/tools/make_dynamic_shape_fixed.py +72 -0
- onnxruntime/tools/mobile_helpers/__init__.py +0 -0
- onnxruntime/tools/mobile_helpers/coreml_supported_mlprogram_ops.md +33 -0
- onnxruntime/tools/mobile_helpers/coreml_supported_neuralnetwork_ops.md +43 -0
- onnxruntime/tools/mobile_helpers/nnapi_supported_ops.md +58 -0
- onnxruntime/tools/mobile_helpers/usability_checker.py +739 -0
- onnxruntime/tools/offline_tuning.py +169 -0
- onnxruntime/tools/onnx_model_utils.py +413 -0
- onnxruntime/tools/onnx_randomizer.py +85 -0
- onnxruntime/tools/onnxruntime_test.py +164 -0
- onnxruntime/tools/optimize_onnx_model.py +55 -0
- onnxruntime/tools/ort_format_model/__init__.py +25 -0
- onnxruntime/tools/ort_format_model/operator_type_usage_processors.py +663 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/__init__.py +0 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ArgType.py +7 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ArgTypeAndIndex.py +67 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Attribute.py +337 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/AttributeType.py +18 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Checkpoint.py +125 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedKernelCreateInfos.py +120 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedNodeIndexAndKernelDefHash.py +68 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedSessionState.py +96 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedSubGraphSessionState.py +72 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Dimension.py +71 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DimensionValue.py +80 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DimensionValueType.py +8 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/EdgeEnd.py +32 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/FloatProperty.py +67 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Graph.py +320 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/InferenceSession.py +88 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/IntProperty.py +67 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/KernelTypeStrArgsEntry.py +91 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/KernelTypeStrResolver.py +78 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/MapType.py +71 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Model.py +223 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ModuleState.py +141 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Node.py +317 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/NodeEdge.py +126 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/NodeType.py +7 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/NodesToOptimizeIndices.py +160 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/OpIdKernelTypeStrArgsEntry.py +91 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/OperatorSetId.py +67 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/OptimizerGroup.py +117 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ParameterOptimizerState.py +91 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/PropertyBag.py +152 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/RuntimeOptimizationRecord.py +105 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/RuntimeOptimizationRecordContainerEntry.py +91 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/RuntimeOptimizations.py +79 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/SequenceType.py +58 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Shape.py +78 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/SparseTensor.py +114 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/StringProperty.py +67 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/StringStringEntry.py +67 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Tensor.py +203 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TensorDataType.py +26 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TensorTypeAndShape.py +71 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TypeInfo.py +83 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TypeInfoValue.py +9 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ValueInfo.py +84 -0
- onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/__init__.py +6 -0
- onnxruntime/tools/ort_format_model/ort_model_processor.py +86 -0
- onnxruntime/tools/ort_format_model/types.py +84 -0
- onnxruntime/tools/ort_format_model/utils.py +62 -0
- onnxruntime/tools/pytorch_export_contrib_ops.py +108 -0
- onnxruntime/tools/pytorch_export_helpers.py +131 -0
- onnxruntime/tools/qdq_helpers/__init__.py +0 -0
- onnxruntime/tools/qdq_helpers/optimize_qdq_model.py +37 -0
- onnxruntime/tools/reduced_build_config_parser.py +202 -0
- onnxruntime/tools/symbolic_shape_infer.py +3016 -0
- onnxruntime/tools/update_onnx_opset.py +31 -0
- onnxruntime/transformers/__init__.py +8 -0
- onnxruntime/transformers/affinity_helper.py +40 -0
- onnxruntime/transformers/benchmark.py +944 -0
- onnxruntime/transformers/benchmark_helper.py +646 -0
- onnxruntime/transformers/bert_perf_test.py +634 -0
- onnxruntime/transformers/bert_test_data.py +642 -0
- onnxruntime/transformers/compare_bert_results.py +246 -0
- onnxruntime/transformers/constants.py +47 -0
- onnxruntime/transformers/convert_generation.py +3124 -0
- onnxruntime/transformers/convert_tf_models_to_pytorch.py +205 -0
- onnxruntime/transformers/convert_to_packing_mode.py +387 -0
- onnxruntime/transformers/dynamo_onnx_helper.py +104 -0
- onnxruntime/transformers/float16.py +501 -0
- onnxruntime/transformers/fusion_attention.py +1235 -0
- onnxruntime/transformers/fusion_attention_clip.py +257 -0
- onnxruntime/transformers/fusion_attention_sam2.py +534 -0
- onnxruntime/transformers/fusion_attention_unet.py +1304 -0
- onnxruntime/transformers/fusion_attention_vae.py +301 -0
- onnxruntime/transformers/fusion_bart_attention.py +640 -0
- onnxruntime/transformers/fusion_base.py +137 -0
- onnxruntime/transformers/fusion_bias_add.py +58 -0
- onnxruntime/transformers/fusion_biasgelu.py +66 -0
- onnxruntime/transformers/fusion_biassplitgelu.py +111 -0
- onnxruntime/transformers/fusion_conformer_attention.py +143 -0
- onnxruntime/transformers/fusion_embedlayer.py +811 -0
- onnxruntime/transformers/fusion_fastgelu.py +360 -0
- onnxruntime/transformers/fusion_gelu.py +259 -0
- onnxruntime/transformers/fusion_gelu_approximation.py +25 -0
- onnxruntime/transformers/fusion_gemmfastgelu.py +122 -0
- onnxruntime/transformers/fusion_gpt_attention.py +546 -0
- onnxruntime/transformers/fusion_gpt_attention_megatron.py +355 -0
- onnxruntime/transformers/fusion_gpt_attention_no_past.py +260 -0
- onnxruntime/transformers/fusion_group_norm.py +179 -0
- onnxruntime/transformers/fusion_layernorm.py +465 -0
- onnxruntime/transformers/fusion_nhwc_conv.py +100 -0
- onnxruntime/transformers/fusion_options.py +340 -0
- onnxruntime/transformers/fusion_qordered_attention.py +421 -0
- onnxruntime/transformers/fusion_qordered_gelu.py +119 -0
- onnxruntime/transformers/fusion_qordered_layernorm.py +123 -0
- onnxruntime/transformers/fusion_qordered_matmul.py +217 -0
- onnxruntime/transformers/fusion_quickgelu.py +74 -0
- onnxruntime/transformers/fusion_reshape.py +173 -0
- onnxruntime/transformers/fusion_rotary_attention.py +1592 -0
- onnxruntime/transformers/fusion_shape.py +110 -0
- onnxruntime/transformers/fusion_simplified_layernorm.py +159 -0
- onnxruntime/transformers/fusion_skip_group_norm.py +255 -0
- onnxruntime/transformers/fusion_skiplayernorm.py +209 -0
- onnxruntime/transformers/fusion_transpose.py +168 -0
- onnxruntime/transformers/fusion_utils.py +307 -0
- onnxruntime/transformers/huggingface_models.py +167 -0
- onnxruntime/transformers/import_utils.py +20 -0
- onnxruntime/transformers/io_binding_helper.py +442 -0
- onnxruntime/transformers/large_model_exporter.py +395 -0
- onnxruntime/transformers/machine_info.py +221 -0
- onnxruntime/transformers/metrics.py +164 -0
- onnxruntime/transformers/models/bart/__init__.py +12 -0
- onnxruntime/transformers/models/bart/export.py +98 -0
- onnxruntime/transformers/models/bert/__init__.py +12 -0
- onnxruntime/transformers/models/bert/eval_squad.py +329 -0
- onnxruntime/transformers/models/gpt2/__init__.py +12 -0
- onnxruntime/transformers/models/gpt2/benchmark_gpt2.py +413 -0
- onnxruntime/transformers/models/gpt2/convert_to_onnx.py +561 -0
- onnxruntime/transformers/models/gpt2/gpt2_helper.py +1032 -0
- onnxruntime/transformers/models/gpt2/gpt2_parity.py +513 -0
- onnxruntime/transformers/models/gpt2/gpt2_tester.py +501 -0
- onnxruntime/transformers/models/gpt2/parity_check_helper.py +146 -0
- onnxruntime/transformers/models/llama/__init__.py +12 -0
- onnxruntime/transformers/models/llama/benchmark.py +703 -0
- onnxruntime/transformers/models/llama/benchmark_all.py +488 -0
- onnxruntime/transformers/models/llama/benchmark_e2e.py +606 -0
- onnxruntime/transformers/models/llama/convert_to_onnx.py +1027 -0
- onnxruntime/transformers/models/llama/dist_settings.py +57 -0
- onnxruntime/transformers/models/llama/llama_inputs.py +503 -0
- onnxruntime/transformers/models/llama/llama_parity.py +309 -0
- onnxruntime/transformers/models/llama/llama_torch.py +47 -0
- onnxruntime/transformers/models/llama/quant_kv_dataloader.py +108 -0
- onnxruntime/transformers/models/longformer/__init__.py +12 -0
- onnxruntime/transformers/models/longformer/benchmark_longformer.py +821 -0
- onnxruntime/transformers/models/longformer/convert_to_onnx.py +413 -0
- onnxruntime/transformers/models/longformer/generate_test_data.py +347 -0
- onnxruntime/transformers/models/longformer/longformer_helper.py +77 -0
- onnxruntime/transformers/models/phi2/__init__.py +12 -0
- onnxruntime/transformers/models/phi2/convert_to_onnx.py +576 -0
- onnxruntime/transformers/models/phi2/inference_example.py +414 -0
- onnxruntime/transformers/models/sam2/__init__.py +12 -0
- onnxruntime/transformers/models/sam2/benchmark_sam2.py +625 -0
- onnxruntime/transformers/models/sam2/convert_to_onnx.py +260 -0
- onnxruntime/transformers/models/sam2/image_decoder.py +273 -0
- onnxruntime/transformers/models/sam2/image_encoder.py +186 -0
- onnxruntime/transformers/models/sam2/mask_decoder.py +208 -0
- onnxruntime/transformers/models/sam2/nvtx_helper.py +33 -0
- onnxruntime/transformers/models/sam2/prompt_encoder.py +189 -0
- onnxruntime/transformers/models/sam2/sam2_demo.py +322 -0
- onnxruntime/transformers/models/sam2/sam2_image_onnx_predictor.py +280 -0
- onnxruntime/transformers/models/sam2/sam2_utils.py +147 -0
- onnxruntime/transformers/models/stable_diffusion/__init__.py +12 -0
- onnxruntime/transformers/models/stable_diffusion/benchmark.py +1429 -0
- onnxruntime/transformers/models/stable_diffusion/benchmark_controlnet.py +426 -0
- onnxruntime/transformers/models/stable_diffusion/demo_txt2img.py +102 -0
- onnxruntime/transformers/models/stable_diffusion/demo_txt2img_xl.py +268 -0
- onnxruntime/transformers/models/stable_diffusion/demo_utils.py +778 -0
- onnxruntime/transformers/models/stable_diffusion/diffusion_models.py +1319 -0
- onnxruntime/transformers/models/stable_diffusion/diffusion_schedulers.py +1181 -0
- onnxruntime/transformers/models/stable_diffusion/engine_builder.py +296 -0
- onnxruntime/transformers/models/stable_diffusion/engine_builder_ort_cuda.py +388 -0
- onnxruntime/transformers/models/stable_diffusion/engine_builder_ort_trt.py +288 -0
- onnxruntime/transformers/models/stable_diffusion/engine_builder_tensorrt.py +395 -0
- onnxruntime/transformers/models/stable_diffusion/engine_builder_torch.py +108 -0
- onnxruntime/transformers/models/stable_diffusion/optimize_pipeline.py +350 -0
- onnxruntime/transformers/models/stable_diffusion/ort_optimizer.py +136 -0
- onnxruntime/transformers/models/stable_diffusion/pipeline_stable_diffusion.py +831 -0
- onnxruntime/transformers/models/stable_diffusion/trt_utilities.py +12 -0
- onnxruntime/transformers/models/t5/__init__.py +12 -0
- onnxruntime/transformers/models/t5/convert_to_onnx.py +278 -0
- onnxruntime/transformers/models/t5/past_helper.py +150 -0
- onnxruntime/transformers/models/t5/t5_decoder.py +438 -0
- onnxruntime/transformers/models/t5/t5_encoder.py +171 -0
- onnxruntime/transformers/models/t5/t5_encoder_decoder_init.py +299 -0
- onnxruntime/transformers/models/t5/t5_helper.py +272 -0
- onnxruntime/transformers/models/whisper/__init__.py +12 -0
- onnxruntime/transformers/models/whisper/benchmark.py +610 -0
- onnxruntime/transformers/models/whisper/benchmark_all.py +528 -0
- onnxruntime/transformers/models/whisper/convert_to_onnx.py +536 -0
- onnxruntime/transformers/models/whisper/whisper_chain.py +329 -0
- onnxruntime/transformers/models/whisper/whisper_decoder.py +402 -0
- onnxruntime/transformers/models/whisper/whisper_encoder.py +164 -0
- onnxruntime/transformers/models/whisper/whisper_encoder_decoder_init.py +306 -0
- onnxruntime/transformers/models/whisper/whisper_helper.py +524 -0
- onnxruntime/transformers/models/whisper/whisper_openai_helper.py +84 -0
- onnxruntime/transformers/onnx_exporter.py +717 -0
- onnxruntime/transformers/onnx_model.py +1569 -0
- onnxruntime/transformers/onnx_model_bart.py +142 -0
- onnxruntime/transformers/onnx_model_bert.py +481 -0
- onnxruntime/transformers/onnx_model_bert_keras.py +475 -0
- onnxruntime/transformers/onnx_model_bert_tf.py +589 -0
- onnxruntime/transformers/onnx_model_clip.py +40 -0
- onnxruntime/transformers/onnx_model_conformer.py +33 -0
- onnxruntime/transformers/onnx_model_gpt2.py +101 -0
- onnxruntime/transformers/onnx_model_phi.py +930 -0
- onnxruntime/transformers/onnx_model_sam2.py +138 -0
- onnxruntime/transformers/onnx_model_t5.py +791 -0
- onnxruntime/transformers/onnx_model_tnlr.py +227 -0
- onnxruntime/transformers/onnx_model_unet.py +259 -0
- onnxruntime/transformers/onnx_model_vae.py +43 -0
- onnxruntime/transformers/onnx_utils.py +55 -0
- onnxruntime/transformers/optimizer.py +612 -0
- onnxruntime/transformers/profiler.py +725 -0
- onnxruntime/transformers/quantize_helper.py +76 -0
- onnxruntime/transformers/shape_infer_helper.py +122 -0
- onnxruntime/transformers/shape_optimizer.py +401 -0
- onnxruntime/transformers/torch_onnx_export_helper.py +74 -0
- onnxruntime_directml-1.20.0.dist-info/METADATA +187 -0
- onnxruntime_directml-1.20.0.dist-info/RECORD +305 -0
- onnxruntime_directml-1.20.0.dist-info/WHEEL +5 -0
- onnxruntime_directml-1.20.0.dist-info/entry_points.txt +2 -0
- onnxruntime_directml-1.20.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# -------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# Licensed under the MIT License.
|
|
4
|
+
# --------------------------------------------------------------------------
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from timeit import default_timer as timer
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
import onnxruntime as onnxrt
|
|
15
|
+
|
|
16
|
+
float_dict = {
|
|
17
|
+
"tensor(float16)": "float16",
|
|
18
|
+
"tensor(float)": "float32",
|
|
19
|
+
"tensor(double)": "float64",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
integer_dict = {
|
|
23
|
+
"tensor(int32)": "int32",
|
|
24
|
+
"tensor(int8)": "int8",
|
|
25
|
+
"tensor(uint8)": "uint8",
|
|
26
|
+
"tensor(int16)": "int16",
|
|
27
|
+
"tensor(uint16)": "uint16",
|
|
28
|
+
"tensor(int64)": "int64",
|
|
29
|
+
"tensor(uint64)": "uint64",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def generate_feeds(sess, symbolic_dims: dict | None = None):
|
|
34
|
+
feeds = {}
|
|
35
|
+
symbolic_dims = symbolic_dims or {}
|
|
36
|
+
for input_meta in sess.get_inputs():
|
|
37
|
+
# replace any symbolic dimensions
|
|
38
|
+
shape = []
|
|
39
|
+
for dim in input_meta.shape:
|
|
40
|
+
if not dim:
|
|
41
|
+
# unknown dim
|
|
42
|
+
shape.append(1)
|
|
43
|
+
elif isinstance(dim, str):
|
|
44
|
+
# symbolic dim. see if we have a value otherwise use 1
|
|
45
|
+
if dim in symbolic_dims:
|
|
46
|
+
shape.append(int(symbolic_dims[dim]))
|
|
47
|
+
else:
|
|
48
|
+
shape.append(1)
|
|
49
|
+
else:
|
|
50
|
+
shape.append(dim)
|
|
51
|
+
|
|
52
|
+
if input_meta.type in float_dict:
|
|
53
|
+
feeds[input_meta.name] = np.random.rand(*shape).astype(float_dict[input_meta.type])
|
|
54
|
+
elif input_meta.type in integer_dict:
|
|
55
|
+
feeds[input_meta.name] = np.random.uniform(high=1000, size=tuple(shape)).astype(
|
|
56
|
+
integer_dict[input_meta.type]
|
|
57
|
+
)
|
|
58
|
+
elif input_meta.type == "tensor(bool)":
|
|
59
|
+
feeds[input_meta.name] = np.random.randint(2, size=tuple(shape)).astype("bool")
|
|
60
|
+
else:
|
|
61
|
+
print(f"unsupported input type {input_meta.type} for input {input_meta.name}")
|
|
62
|
+
sys.exit(-1)
|
|
63
|
+
return feeds
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# simple test program for loading onnx model, feeding all inputs and running the model num_iters times.
|
|
67
|
+
def run_model(
|
|
68
|
+
model_path,
|
|
69
|
+
num_iters=1,
|
|
70
|
+
debug=None,
|
|
71
|
+
profile=None,
|
|
72
|
+
symbolic_dims=None,
|
|
73
|
+
feeds=None,
|
|
74
|
+
override_initializers=True,
|
|
75
|
+
):
|
|
76
|
+
symbolic_dims = symbolic_dims or {}
|
|
77
|
+
if debug:
|
|
78
|
+
print(f"Pausing execution ready for debugger to attach to pid: {os.getpid()}")
|
|
79
|
+
print("Press key to continue.")
|
|
80
|
+
sys.stdin.read(1)
|
|
81
|
+
|
|
82
|
+
sess_options = None
|
|
83
|
+
if profile:
|
|
84
|
+
sess_options = onnxrt.SessionOptions()
|
|
85
|
+
sess_options.enable_profiling = True
|
|
86
|
+
sess_options.profile_file_prefix = os.path.basename(model_path)
|
|
87
|
+
|
|
88
|
+
sess = onnxrt.InferenceSession(
|
|
89
|
+
model_path,
|
|
90
|
+
sess_options=sess_options,
|
|
91
|
+
providers=onnxrt.get_available_providers(),
|
|
92
|
+
)
|
|
93
|
+
meta = sess.get_modelmeta()
|
|
94
|
+
|
|
95
|
+
if not feeds:
|
|
96
|
+
feeds = generate_feeds(sess, symbolic_dims)
|
|
97
|
+
|
|
98
|
+
if override_initializers:
|
|
99
|
+
# Starting with IR4 some initializers provide default values
|
|
100
|
+
# and can be overridden (available in IR4). For IR < 4 models
|
|
101
|
+
# the list would be empty
|
|
102
|
+
for initializer in sess.get_overridable_initializers():
|
|
103
|
+
shape = [dim if dim else 1 for dim in initializer.shape]
|
|
104
|
+
if initializer.type in float_dict:
|
|
105
|
+
feeds[initializer.name] = np.random.rand(*shape).astype(float_dict[initializer.type])
|
|
106
|
+
elif initializer.type in integer_dict:
|
|
107
|
+
feeds[initializer.name] = np.random.uniform(high=1000, size=tuple(shape)).astype(
|
|
108
|
+
integer_dict[initializer.type]
|
|
109
|
+
)
|
|
110
|
+
elif initializer.type == "tensor(bool)":
|
|
111
|
+
feeds[initializer.name] = np.random.randint(2, size=tuple(shape)).astype("bool")
|
|
112
|
+
else:
|
|
113
|
+
print(f"unsupported initializer type {initializer.type} for initializer {initializer.name}")
|
|
114
|
+
sys.exit(-1)
|
|
115
|
+
|
|
116
|
+
start = timer()
|
|
117
|
+
for _i in range(num_iters):
|
|
118
|
+
outputs = sess.run([], feeds) # fetch all outputs
|
|
119
|
+
end = timer()
|
|
120
|
+
|
|
121
|
+
print(f"model: {meta.graph_name}")
|
|
122
|
+
print(f"version: {meta.version}")
|
|
123
|
+
print(f"iterations: {num_iters}")
|
|
124
|
+
print(f"avg latency: {((end - start) * 1000) / num_iters} ms")
|
|
125
|
+
|
|
126
|
+
if profile:
|
|
127
|
+
trace_file = sess.end_profiling()
|
|
128
|
+
print(f"trace file written to: {trace_file}")
|
|
129
|
+
|
|
130
|
+
return 0, feeds, num_iters > 0 and outputs
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def main():
|
|
134
|
+
parser = argparse.ArgumentParser(description="Simple ONNX Runtime Test Tool.")
|
|
135
|
+
parser.add_argument("model_path", help="model path")
|
|
136
|
+
parser.add_argument(
|
|
137
|
+
"num_iters",
|
|
138
|
+
nargs="?",
|
|
139
|
+
type=int,
|
|
140
|
+
default=1000,
|
|
141
|
+
help="model run iterations. default=1000",
|
|
142
|
+
)
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"--debug",
|
|
145
|
+
action="store_true",
|
|
146
|
+
help="pause execution to allow attaching a debugger.",
|
|
147
|
+
)
|
|
148
|
+
parser.add_argument("--profile", action="store_true", help="enable chrome timeline trace profiling.")
|
|
149
|
+
parser.add_argument(
|
|
150
|
+
"--symbolic_dims",
|
|
151
|
+
default={},
|
|
152
|
+
type=lambda s: dict(x.split("=") for x in s.split(",")),
|
|
153
|
+
help="Comma separated name=value pairs for any symbolic dimensions in the model input. "
|
|
154
|
+
"e.g. --symbolic_dims batch=1,seqlen=5. "
|
|
155
|
+
"If not provided, the value of 1 will be used for all symbolic dimensions.",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
args = parser.parse_args()
|
|
159
|
+
exit_code, _, _ = run_model(args.model_path, args.num_iters, args.debug, args.profile, args.symbolic_dims)
|
|
160
|
+
sys.exit(exit_code)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
if __name__ == "__main__":
|
|
164
|
+
main()
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# Licensed under the MIT License.
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
|
|
9
|
+
from .onnx_model_utils import get_optimization_level, optimize_model
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def optimize_model_helper():
|
|
13
|
+
parser = argparse.ArgumentParser(
|
|
14
|
+
f"{os.path.basename(__file__)}:{optimize_model_helper.__name__}",
|
|
15
|
+
description="""
|
|
16
|
+
Optimize an ONNX model using ONNX Runtime to the specified level.
|
|
17
|
+
See https://onnxruntime.ai/docs/performance/model-optimizations/graph-optimizations.html for more
|
|
18
|
+
details of the optimization levels.""",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"--opt_level",
|
|
23
|
+
default="basic",
|
|
24
|
+
choices=["disable", "basic", "extended", "all"],
|
|
25
|
+
help="Optimization level to use.",
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--log_level",
|
|
29
|
+
choices=["debug", "info", "warning", "error"],
|
|
30
|
+
type=str,
|
|
31
|
+
required=False,
|
|
32
|
+
default="error",
|
|
33
|
+
help="Log level. Defaults to Error so we don't get output about unused initializers "
|
|
34
|
+
"being removed. Warning or Info may be desirable in some scenarios.",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
parser.add_argument("input_model", type=pathlib.Path, help="Provide path to ONNX model to update.")
|
|
38
|
+
parser.add_argument("output_model", type=pathlib.Path, help="Provide path to write optimized ONNX model to.")
|
|
39
|
+
|
|
40
|
+
args = parser.parse_args()
|
|
41
|
+
|
|
42
|
+
if args.log_level == "error":
|
|
43
|
+
log_level = 3
|
|
44
|
+
elif args.log_level == "debug":
|
|
45
|
+
log_level = 0 # ORT verbose level
|
|
46
|
+
elif args.log_level == "info":
|
|
47
|
+
log_level = 1
|
|
48
|
+
elif args.log_level == "warning":
|
|
49
|
+
log_level = 2
|
|
50
|
+
|
|
51
|
+
optimize_model(args.input_model, args.output_model, get_optimization_level(args.opt_level), log_level)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
optimize_model_helper()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
# need to add the path to the ORT flatbuffers python module before we import anything else here.
|
|
8
|
+
# we also auto-magically adjust to whether we're running from the ORT repo, or from within the ORT python package
|
|
9
|
+
script_dir = os.path.dirname(os.path.realpath(__file__))
|
|
10
|
+
fbs_py_schema_dirname = "ort_flatbuffers_py"
|
|
11
|
+
if os.path.isdir(os.path.join(script_dir, fbs_py_schema_dirname)):
|
|
12
|
+
# fbs bindings are in this directory, so we're running in the ORT python package
|
|
13
|
+
ort_fbs_py_parent_dir = script_dir
|
|
14
|
+
else:
|
|
15
|
+
# running directly from ORT repo, so fbs bindings are under onnxruntime/core/flatbuffers
|
|
16
|
+
ort_root = os.path.abspath(os.path.join(script_dir, "..", "..", "..", ".."))
|
|
17
|
+
ort_fbs_py_parent_dir = os.path.join(ort_root, "onnxruntime", "core", "flatbuffers")
|
|
18
|
+
|
|
19
|
+
sys.path.append(ort_fbs_py_parent_dir)
|
|
20
|
+
|
|
21
|
+
from .operator_type_usage_processors import GloballyAllowedTypesOpTypeImplFilter # noqa: E402, F401
|
|
22
|
+
from .operator_type_usage_processors import OperatorTypeUsageManager # noqa: E402, F401
|
|
23
|
+
from .operator_type_usage_processors import OpTypeImplFilterInterface # noqa: E402, F401
|
|
24
|
+
from .ort_model_processor import OrtFormatModelProcessor # noqa: E402, F401
|
|
25
|
+
from .utils import create_config_from_models # noqa: E402, F401
|