onnxruntime-directml 1.24.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. onnxruntime/LICENSE +21 -0
  2. onnxruntime/Privacy.md +21 -0
  3. onnxruntime/ThirdPartyNotices.txt +6121 -0
  4. onnxruntime/__init__.py +418 -0
  5. onnxruntime/backend/__init__.py +6 -0
  6. onnxruntime/backend/backend.py +175 -0
  7. onnxruntime/backend/backend_rep.py +52 -0
  8. onnxruntime/capi/DirectML.dll +0 -0
  9. onnxruntime/capi/__init__.py +4 -0
  10. onnxruntime/capi/_ld_preload.py +7 -0
  11. onnxruntime/capi/_pybind_state.py +33 -0
  12. onnxruntime/capi/build_and_package_info.py +2 -0
  13. onnxruntime/capi/convert_npz_to_onnx_adapter.py +48 -0
  14. onnxruntime/capi/onnxruntime.dll +0 -0
  15. onnxruntime/capi/onnxruntime_collect_build_info.py +47 -0
  16. onnxruntime/capi/onnxruntime_inference_collection.py +1440 -0
  17. onnxruntime/capi/onnxruntime_providers_shared.dll +0 -0
  18. onnxruntime/capi/onnxruntime_pybind11_state.pyd +0 -0
  19. onnxruntime/capi/onnxruntime_validation.py +154 -0
  20. onnxruntime/capi/version_info.py +2 -0
  21. onnxruntime/datasets/__init__.py +18 -0
  22. onnxruntime/datasets/logreg_iris.onnx +0 -0
  23. onnxruntime/datasets/mul_1.onnx +0 -0
  24. onnxruntime/datasets/sigmoid.onnx +13 -0
  25. onnxruntime/quantization/CalTableFlatBuffers/KeyValue.py +78 -0
  26. onnxruntime/quantization/CalTableFlatBuffers/TrtTable.py +90 -0
  27. onnxruntime/quantization/CalTableFlatBuffers/__init__.py +0 -0
  28. onnxruntime/quantization/__init__.py +19 -0
  29. onnxruntime/quantization/base_quantizer.py +529 -0
  30. onnxruntime/quantization/calibrate.py +1267 -0
  31. onnxruntime/quantization/execution_providers/qnn/__init__.py +2 -0
  32. onnxruntime/quantization/execution_providers/qnn/fusion_lpnorm.py +132 -0
  33. onnxruntime/quantization/execution_providers/qnn/fusion_spacetodepth.py +162 -0
  34. onnxruntime/quantization/execution_providers/qnn/mixed_precision_overrides_utils.py +413 -0
  35. onnxruntime/quantization/execution_providers/qnn/preprocess.py +353 -0
  36. onnxruntime/quantization/execution_providers/qnn/quant_config.py +389 -0
  37. onnxruntime/quantization/fusions/__init__.py +4 -0
  38. onnxruntime/quantization/fusions/fusion.py +311 -0
  39. onnxruntime/quantization/fusions/fusion_gelu.py +272 -0
  40. onnxruntime/quantization/fusions/fusion_layernorm.py +146 -0
  41. onnxruntime/quantization/fusions/replace_upsample_with_resize.py +96 -0
  42. onnxruntime/quantization/matmul_bnb4_quantizer.py +239 -0
  43. onnxruntime/quantization/matmul_nbits_quantizer.py +1638 -0
  44. onnxruntime/quantization/neural_compressor/__init__.py +1 -0
  45. onnxruntime/quantization/neural_compressor/onnx_model.py +1251 -0
  46. onnxruntime/quantization/neural_compressor/util.py +80 -0
  47. onnxruntime/quantization/neural_compressor/weight_only.py +932 -0
  48. onnxruntime/quantization/onnx_model.py +600 -0
  49. onnxruntime/quantization/onnx_quantizer.py +1163 -0
  50. onnxruntime/quantization/operators/__init__.py +2 -0
  51. onnxruntime/quantization/operators/activation.py +119 -0
  52. onnxruntime/quantization/operators/argmax.py +18 -0
  53. onnxruntime/quantization/operators/attention.py +73 -0
  54. onnxruntime/quantization/operators/base_operator.py +26 -0
  55. onnxruntime/quantization/operators/binary_op.py +72 -0
  56. onnxruntime/quantization/operators/concat.py +62 -0
  57. onnxruntime/quantization/operators/conv.py +260 -0
  58. onnxruntime/quantization/operators/direct_q8.py +78 -0
  59. onnxruntime/quantization/operators/embed_layernorm.py +121 -0
  60. onnxruntime/quantization/operators/gather.py +64 -0
  61. onnxruntime/quantization/operators/gavgpool.py +62 -0
  62. onnxruntime/quantization/operators/gemm.py +172 -0
  63. onnxruntime/quantization/operators/lstm.py +121 -0
  64. onnxruntime/quantization/operators/matmul.py +231 -0
  65. onnxruntime/quantization/operators/maxpool.py +34 -0
  66. onnxruntime/quantization/operators/norm.py +40 -0
  67. onnxruntime/quantization/operators/pad.py +172 -0
  68. onnxruntime/quantization/operators/pooling.py +67 -0
  69. onnxruntime/quantization/operators/qdq_base_operator.py +22 -0
  70. onnxruntime/quantization/operators/resize.py +34 -0
  71. onnxruntime/quantization/operators/softmax.py +74 -0
  72. onnxruntime/quantization/operators/split.py +63 -0
  73. onnxruntime/quantization/operators/where.py +87 -0
  74. onnxruntime/quantization/preprocess.py +141 -0
  75. onnxruntime/quantization/qdq_loss_debug.py +389 -0
  76. onnxruntime/quantization/qdq_quantizer.py +1477 -0
  77. onnxruntime/quantization/quant_utils.py +1051 -0
  78. onnxruntime/quantization/quantize.py +953 -0
  79. onnxruntime/quantization/registry.py +110 -0
  80. onnxruntime/quantization/shape_inference.py +204 -0
  81. onnxruntime/quantization/static_quantize_runner.py +256 -0
  82. onnxruntime/quantization/tensor_quant_overrides.py +520 -0
  83. onnxruntime/tools/__init__.py +10 -0
  84. onnxruntime/tools/check_onnx_model_mobile_usability.py +47 -0
  85. onnxruntime/tools/convert_onnx_models_to_ort.py +380 -0
  86. onnxruntime/tools/file_utils.py +47 -0
  87. onnxruntime/tools/logger.py +11 -0
  88. onnxruntime/tools/make_dynamic_shape_fixed.py +73 -0
  89. onnxruntime/tools/mobile_helpers/__init__.py +0 -0
  90. onnxruntime/tools/mobile_helpers/coreml_supported_mlprogram_ops.md +53 -0
  91. onnxruntime/tools/mobile_helpers/coreml_supported_neuralnetwork_ops.md +43 -0
  92. onnxruntime/tools/mobile_helpers/nnapi_supported_ops.md +58 -0
  93. onnxruntime/tools/mobile_helpers/usability_checker.py +738 -0
  94. onnxruntime/tools/offline_tuning.py +169 -0
  95. onnxruntime/tools/onnx_model_utils.py +416 -0
  96. onnxruntime/tools/onnx_randomizer.py +85 -0
  97. onnxruntime/tools/onnxruntime_test.py +164 -0
  98. onnxruntime/tools/optimize_onnx_model.py +56 -0
  99. onnxruntime/tools/ort_format_model/__init__.py +27 -0
  100. onnxruntime/tools/ort_format_model/operator_type_usage_processors.py +653 -0
  101. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/__init__.py +0 -0
  102. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ArgType.py +7 -0
  103. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ArgTypeAndIndex.py +67 -0
  104. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Attribute.py +337 -0
  105. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/AttributeType.py +18 -0
  106. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Checkpoint.py +125 -0
  107. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedKernelCreateInfos.py +120 -0
  108. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedNodeIndexAndKernelDefHash.py +68 -0
  109. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedSessionState.py +96 -0
  110. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DeprecatedSubGraphSessionState.py +72 -0
  111. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Dimension.py +71 -0
  112. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DimensionValue.py +80 -0
  113. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/DimensionValueType.py +8 -0
  114. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/EdgeEnd.py +32 -0
  115. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/FloatProperty.py +67 -0
  116. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Graph.py +320 -0
  117. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/InferenceSession.py +88 -0
  118. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/IntProperty.py +67 -0
  119. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/KernelTypeStrArgsEntry.py +91 -0
  120. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/KernelTypeStrResolver.py +78 -0
  121. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/MapType.py +71 -0
  122. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Model.py +223 -0
  123. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ModuleState.py +141 -0
  124. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Node.py +317 -0
  125. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/NodeEdge.py +126 -0
  126. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/NodeType.py +7 -0
  127. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/NodesToOptimizeIndices.py +160 -0
  128. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/OpIdKernelTypeStrArgsEntry.py +91 -0
  129. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/OperatorSetId.py +67 -0
  130. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/OptimizerGroup.py +117 -0
  131. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ParameterOptimizerState.py +91 -0
  132. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/PropertyBag.py +152 -0
  133. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/RuntimeOptimizationRecord.py +105 -0
  134. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/RuntimeOptimizationRecordContainerEntry.py +91 -0
  135. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/RuntimeOptimizations.py +79 -0
  136. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/SequenceType.py +58 -0
  137. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Shape.py +78 -0
  138. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/SparseTensor.py +114 -0
  139. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/StringProperty.py +67 -0
  140. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/StringStringEntry.py +67 -0
  141. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/Tensor.py +203 -0
  142. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TensorDataType.py +26 -0
  143. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TensorTypeAndShape.py +71 -0
  144. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TypeInfo.py +83 -0
  145. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/TypeInfoValue.py +9 -0
  146. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/ValueInfo.py +84 -0
  147. onnxruntime/tools/ort_format_model/ort_flatbuffers_py/fbs/__init__.py +6 -0
  148. onnxruntime/tools/ort_format_model/ort_model_processor.py +86 -0
  149. onnxruntime/tools/ort_format_model/types.py +85 -0
  150. onnxruntime/tools/ort_format_model/utils.py +61 -0
  151. onnxruntime/tools/pytorch_export_contrib_ops.py +129 -0
  152. onnxruntime/tools/pytorch_export_helpers.py +131 -0
  153. onnxruntime/tools/qdq_helpers/__init__.py +0 -0
  154. onnxruntime/tools/qdq_helpers/optimize_qdq_model.py +37 -0
  155. onnxruntime/tools/qnn/add_trans_cast.py +292 -0
  156. onnxruntime/tools/qnn/gen_qnn_ctx_onnx_model.py +364 -0
  157. onnxruntime/tools/qnn/preprocess.py +165 -0
  158. onnxruntime/tools/reduced_build_config_parser.py +203 -0
  159. onnxruntime/tools/remove_initializer_from_input.py +37 -0
  160. onnxruntime/tools/symbolic_shape_infer.py +3094 -0
  161. onnxruntime/tools/update_onnx_opset.py +31 -0
  162. onnxruntime/transformers/__init__.py +8 -0
  163. onnxruntime/transformers/affinity_helper.py +40 -0
  164. onnxruntime/transformers/benchmark.py +942 -0
  165. onnxruntime/transformers/benchmark_helper.py +643 -0
  166. onnxruntime/transformers/bert_perf_test.py +629 -0
  167. onnxruntime/transformers/bert_test_data.py +641 -0
  168. onnxruntime/transformers/compare_bert_results.py +256 -0
  169. onnxruntime/transformers/constants.py +47 -0
  170. onnxruntime/transformers/convert_generation.py +3605 -0
  171. onnxruntime/transformers/convert_tf_models_to_pytorch.py +205 -0
  172. onnxruntime/transformers/convert_to_packing_mode.py +385 -0
  173. onnxruntime/transformers/dynamo_onnx_helper.py +205 -0
  174. onnxruntime/transformers/float16.py +501 -0
  175. onnxruntime/transformers/fusion_attention.py +1189 -0
  176. onnxruntime/transformers/fusion_attention_clip.py +340 -0
  177. onnxruntime/transformers/fusion_attention_sam2.py +533 -0
  178. onnxruntime/transformers/fusion_attention_unet.py +1307 -0
  179. onnxruntime/transformers/fusion_attention_vae.py +300 -0
  180. onnxruntime/transformers/fusion_bart_attention.py +435 -0
  181. onnxruntime/transformers/fusion_base.py +141 -0
  182. onnxruntime/transformers/fusion_bias_add.py +57 -0
  183. onnxruntime/transformers/fusion_biasgelu.py +66 -0
  184. onnxruntime/transformers/fusion_biassplitgelu.py +110 -0
  185. onnxruntime/transformers/fusion_conformer_attention.py +222 -0
  186. onnxruntime/transformers/fusion_constant_fold.py +144 -0
  187. onnxruntime/transformers/fusion_embedlayer.py +810 -0
  188. onnxruntime/transformers/fusion_fastgelu.py +492 -0
  189. onnxruntime/transformers/fusion_gelu.py +258 -0
  190. onnxruntime/transformers/fusion_gelu_approximation.py +25 -0
  191. onnxruntime/transformers/fusion_gemmfastgelu.py +121 -0
  192. onnxruntime/transformers/fusion_gpt_attention.py +546 -0
  193. onnxruntime/transformers/fusion_gpt_attention_megatron.py +355 -0
  194. onnxruntime/transformers/fusion_gpt_attention_no_past.py +260 -0
  195. onnxruntime/transformers/fusion_group_norm.py +180 -0
  196. onnxruntime/transformers/fusion_layernorm.py +489 -0
  197. onnxruntime/transformers/fusion_mha_mmdit.py +667 -0
  198. onnxruntime/transformers/fusion_nhwc_conv.py +99 -0
  199. onnxruntime/transformers/fusion_options.py +340 -0
  200. onnxruntime/transformers/fusion_qordered_attention.py +420 -0
  201. onnxruntime/transformers/fusion_qordered_gelu.py +118 -0
  202. onnxruntime/transformers/fusion_qordered_layernorm.py +122 -0
  203. onnxruntime/transformers/fusion_qordered_matmul.py +216 -0
  204. onnxruntime/transformers/fusion_quickgelu.py +74 -0
  205. onnxruntime/transformers/fusion_reshape.py +173 -0
  206. onnxruntime/transformers/fusion_rotary_attention.py +1591 -0
  207. onnxruntime/transformers/fusion_shape.py +109 -0
  208. onnxruntime/transformers/fusion_simplified_layernorm.py +165 -0
  209. onnxruntime/transformers/fusion_skip_group_norm.py +254 -0
  210. onnxruntime/transformers/fusion_skiplayernorm.py +209 -0
  211. onnxruntime/transformers/fusion_transpose.py +167 -0
  212. onnxruntime/transformers/fusion_utils.py +321 -0
  213. onnxruntime/transformers/huggingface_models.py +74 -0
  214. onnxruntime/transformers/import_utils.py +20 -0
  215. onnxruntime/transformers/io_binding_helper.py +487 -0
  216. onnxruntime/transformers/large_model_exporter.py +395 -0
  217. onnxruntime/transformers/machine_info.py +230 -0
  218. onnxruntime/transformers/metrics.py +163 -0
  219. onnxruntime/transformers/models/bart/__init__.py +12 -0
  220. onnxruntime/transformers/models/bart/export.py +98 -0
  221. onnxruntime/transformers/models/bert/__init__.py +12 -0
  222. onnxruntime/transformers/models/bert/eval_squad.py +329 -0
  223. onnxruntime/transformers/models/gpt2/__init__.py +12 -0
  224. onnxruntime/transformers/models/gpt2/benchmark_gpt2.py +413 -0
  225. onnxruntime/transformers/models/gpt2/convert_to_onnx.py +566 -0
  226. onnxruntime/transformers/models/gpt2/gpt2_helper.py +1031 -0
  227. onnxruntime/transformers/models/gpt2/gpt2_parity.py +513 -0
  228. onnxruntime/transformers/models/gpt2/gpt2_tester.py +501 -0
  229. onnxruntime/transformers/models/gpt2/parity_check_helper.py +146 -0
  230. onnxruntime/transformers/models/llama/__init__.py +12 -0
  231. onnxruntime/transformers/models/llama/benchmark.py +700 -0
  232. onnxruntime/transformers/models/llama/benchmark_all.py +488 -0
  233. onnxruntime/transformers/models/llama/benchmark_e2e.py +608 -0
  234. onnxruntime/transformers/models/llama/convert_to_onnx.py +1064 -0
  235. onnxruntime/transformers/models/llama/dist_settings.py +57 -0
  236. onnxruntime/transformers/models/llama/llama_inputs.py +504 -0
  237. onnxruntime/transformers/models/llama/llama_parity.py +343 -0
  238. onnxruntime/transformers/models/llama/llama_torch.py +47 -0
  239. onnxruntime/transformers/models/llama/quant_kv_dataloader.py +108 -0
  240. onnxruntime/transformers/models/longformer/__init__.py +12 -0
  241. onnxruntime/transformers/models/longformer/benchmark_longformer.py +821 -0
  242. onnxruntime/transformers/models/longformer/convert_to_onnx.py +413 -0
  243. onnxruntime/transformers/models/longformer/generate_test_data.py +347 -0
  244. onnxruntime/transformers/models/longformer/longformer_helper.py +76 -0
  245. onnxruntime/transformers/models/phi2/__init__.py +12 -0
  246. onnxruntime/transformers/models/phi2/convert_to_onnx.py +590 -0
  247. onnxruntime/transformers/models/phi2/inference_example.py +414 -0
  248. onnxruntime/transformers/models/sam2/__init__.py +12 -0
  249. onnxruntime/transformers/models/sam2/benchmark_sam2.py +638 -0
  250. onnxruntime/transformers/models/sam2/convert_to_onnx.py +270 -0
  251. onnxruntime/transformers/models/sam2/image_decoder.py +272 -0
  252. onnxruntime/transformers/models/sam2/image_encoder.py +236 -0
  253. onnxruntime/transformers/models/sam2/mask_decoder.py +208 -0
  254. onnxruntime/transformers/models/sam2/nvtx_helper.py +33 -0
  255. onnxruntime/transformers/models/sam2/prompt_encoder.py +189 -0
  256. onnxruntime/transformers/models/sam2/sam2_demo.py +321 -0
  257. onnxruntime/transformers/models/sam2/sam2_image_onnx_predictor.py +279 -0
  258. onnxruntime/transformers/models/sam2/sam2_utils.py +147 -0
  259. onnxruntime/transformers/models/stable_diffusion/__init__.py +12 -0
  260. onnxruntime/transformers/models/stable_diffusion/benchmark.py +1519 -0
  261. onnxruntime/transformers/models/stable_diffusion/benchmark_controlnet.py +426 -0
  262. onnxruntime/transformers/models/stable_diffusion/demo_txt2img.py +103 -0
  263. onnxruntime/transformers/models/stable_diffusion/demo_txt2img_xl.py +269 -0
  264. onnxruntime/transformers/models/stable_diffusion/demo_utils.py +778 -0
  265. onnxruntime/transformers/models/stable_diffusion/diffusion_models.py +1318 -0
  266. onnxruntime/transformers/models/stable_diffusion/diffusion_schedulers.py +1179 -0
  267. onnxruntime/transformers/models/stable_diffusion/engine_builder.py +295 -0
  268. onnxruntime/transformers/models/stable_diffusion/engine_builder_ort_cuda.py +387 -0
  269. onnxruntime/transformers/models/stable_diffusion/engine_builder_ort_trt.py +288 -0
  270. onnxruntime/transformers/models/stable_diffusion/engine_builder_tensorrt.py +395 -0
  271. onnxruntime/transformers/models/stable_diffusion/engine_builder_torch.py +108 -0
  272. onnxruntime/transformers/models/stable_diffusion/optimize_pipeline.py +590 -0
  273. onnxruntime/transformers/models/stable_diffusion/ort_optimizer.py +136 -0
  274. onnxruntime/transformers/models/stable_diffusion/pipeline_stable_diffusion.py +831 -0
  275. onnxruntime/transformers/models/stable_diffusion/trt_utilities.py +12 -0
  276. onnxruntime/transformers/models/t5/__init__.py +12 -0
  277. onnxruntime/transformers/models/t5/convert_to_onnx.py +318 -0
  278. onnxruntime/transformers/models/t5/t5_decoder.py +437 -0
  279. onnxruntime/transformers/models/t5/t5_encoder.py +70 -0
  280. onnxruntime/transformers/models/t5/t5_encoder_decoder_init.py +361 -0
  281. onnxruntime/transformers/models/t5/t5_helper.py +302 -0
  282. onnxruntime/transformers/models/whisper/__init__.py +12 -0
  283. onnxruntime/transformers/models/whisper/benchmark.py +585 -0
  284. onnxruntime/transformers/models/whisper/benchmark_all.py +526 -0
  285. onnxruntime/transformers/models/whisper/convert_to_onnx.py +609 -0
  286. onnxruntime/transformers/models/whisper/whisper_chain.py +334 -0
  287. onnxruntime/transformers/models/whisper/whisper_decoder.py +464 -0
  288. onnxruntime/transformers/models/whisper/whisper_encoder.py +164 -0
  289. onnxruntime/transformers/models/whisper/whisper_encoder_decoder_init.py +371 -0
  290. onnxruntime/transformers/models/whisper/whisper_helper.py +1035 -0
  291. onnxruntime/transformers/models/whisper/whisper_inputs.py +380 -0
  292. onnxruntime/transformers/models/whisper/whisper_jump_times.py +477 -0
  293. onnxruntime/transformers/onnx_exporter.py +719 -0
  294. onnxruntime/transformers/onnx_model.py +1636 -0
  295. onnxruntime/transformers/onnx_model_bart.py +141 -0
  296. onnxruntime/transformers/onnx_model_bert.py +488 -0
  297. onnxruntime/transformers/onnx_model_bert_keras.py +474 -0
  298. onnxruntime/transformers/onnx_model_bert_tf.py +588 -0
  299. onnxruntime/transformers/onnx_model_clip.py +42 -0
  300. onnxruntime/transformers/onnx_model_conformer.py +32 -0
  301. onnxruntime/transformers/onnx_model_gpt2.py +101 -0
  302. onnxruntime/transformers/onnx_model_mmdit.py +112 -0
  303. onnxruntime/transformers/onnx_model_phi.py +929 -0
  304. onnxruntime/transformers/onnx_model_sam2.py +137 -0
  305. onnxruntime/transformers/onnx_model_t5.py +985 -0
  306. onnxruntime/transformers/onnx_model_tnlr.py +226 -0
  307. onnxruntime/transformers/onnx_model_unet.py +258 -0
  308. onnxruntime/transformers/onnx_model_vae.py +42 -0
  309. onnxruntime/transformers/onnx_utils.py +55 -0
  310. onnxruntime/transformers/optimizer.py +620 -0
  311. onnxruntime/transformers/past_helper.py +149 -0
  312. onnxruntime/transformers/profile_result_processor.py +358 -0
  313. onnxruntime/transformers/profiler.py +434 -0
  314. onnxruntime/transformers/quantize_helper.py +76 -0
  315. onnxruntime/transformers/shape_infer_helper.py +121 -0
  316. onnxruntime/transformers/shape_optimizer.py +400 -0
  317. onnxruntime/transformers/torch_onnx_export_helper.py +74 -0
  318. onnxruntime_directml-1.24.1.dist-info/METADATA +216 -0
  319. onnxruntime_directml-1.24.1.dist-info/RECORD +322 -0
  320. onnxruntime_directml-1.24.1.dist-info/WHEEL +5 -0
  321. onnxruntime_directml-1.24.1.dist-info/entry_points.txt +2 -0
  322. onnxruntime_directml-1.24.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,629 @@
1
+ # -------------------------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # Licensed under the MIT License.
4
+ # --------------------------------------------------------------------------
5
+
6
+ # This tool measures the inference performance of onnxruntime on BERT-like model with inputs like input_ids,
7
+ # token_type_ids (optional), and attention_mask (optional).
8
+ #
9
+ # If the model does not have exactly three inputs like above, you might need specify names of inputs with
10
+ # --input_ids_name, --segment_ids_name and --input_mask_name
11
+
12
+ # Example command to run test on batch_size 1 and 2 for a model on GPU:
13
+ # python bert_perf_test.py --model bert.onnx --batch_size 1 2 --sequence_length 128 --use_gpu --samples 1000 --test_times 1
14
+
15
+ import argparse
16
+ import csv
17
+ import json
18
+ import multiprocessing
19
+ import os
20
+ import random
21
+ import statistics
22
+ import timeit
23
+ from dataclasses import dataclass
24
+ from datetime import datetime
25
+ from pathlib import Path
26
+
27
+ import numpy as np
28
+ import psutil
29
+ import torch
30
+ from bert_test_data import generate_test_data, get_bert_inputs
31
+
32
+
33
+ @dataclass
34
+ class TestSetting:
35
+ batch_size: int
36
+ sequence_length: int
37
+ test_cases: int
38
+ test_times: int
39
+ use_gpu: bool
40
+ use_io_binding: bool
41
+ provider: str
42
+ intra_op_num_threads: int
43
+ seed: int
44
+ verbose: bool
45
+ log_severity: int
46
+ average_sequence_length: int
47
+ random_sequence_length: bool
48
+
49
+
50
+ @dataclass
51
+ class ModelSetting:
52
+ model_path: str
53
+ input_ids_name: str
54
+ segment_ids_name: str
55
+ input_mask_name: str
56
+ opt_level: int
57
+ input_tuning_results: str | None
58
+ output_tuning_results: str | None
59
+ mask_type: int
60
+
61
+
62
+ def create_session(
63
+ model_path,
64
+ use_gpu,
65
+ provider,
66
+ intra_op_num_threads,
67
+ graph_optimization_level=None,
68
+ log_severity=2,
69
+ tuning_results_path=None,
70
+ ):
71
+ import onnxruntime # noqa: PLC0415
72
+
73
+ onnxruntime.set_default_logger_severity(log_severity)
74
+
75
+ if use_gpu and ("CUDAExecutionProvider" not in onnxruntime.get_available_providers()):
76
+ print(
77
+ "Warning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance."
78
+ )
79
+
80
+ if use_gpu:
81
+ if provider == "dml":
82
+ execution_providers = ["DmlExecutionProvider", "CPUExecutionProvider"]
83
+ elif provider == "migraphx":
84
+ execution_providers = [
85
+ "MIGraphXExecutionProvider",
86
+ "CPUExecutionProvider",
87
+ ]
88
+ elif provider == "cuda":
89
+ execution_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
90
+ elif provider == "tensorrt":
91
+ execution_providers = [
92
+ "TensorrtExecutionProvider",
93
+ "CUDAExecutionProvider",
94
+ "CPUExecutionProvider",
95
+ ]
96
+ else:
97
+ execution_providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
98
+ else:
99
+ execution_providers = ["CPUExecutionProvider"]
100
+
101
+ sess_options = onnxruntime.SessionOptions()
102
+ sess_options.log_severity_level = log_severity
103
+ sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
104
+
105
+ if graph_optimization_level is None:
106
+ sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
107
+ elif graph_optimization_level == 0:
108
+ sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
109
+ elif graph_optimization_level == 1:
110
+ sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC
111
+ elif graph_optimization_level == 2:
112
+ sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
113
+ elif graph_optimization_level == 3:
114
+ sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_LAYOUT
115
+ elif graph_optimization_level == 99:
116
+ sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
117
+ else:
118
+ sess_options.graph_optimization_level = graph_optimization_level
119
+
120
+ if intra_op_num_threads is not None:
121
+ sess_options.intra_op_num_threads = intra_op_num_threads
122
+
123
+ session = onnxruntime.InferenceSession(model_path, sess_options, providers=execution_providers)
124
+
125
+ if use_gpu:
126
+ if provider == "dml":
127
+ assert "DmlExecutionProvider" in session.get_providers()
128
+ elif provider == "migraphx":
129
+ assert "MIGraphXExecutionProvider" in session.get_providers()
130
+ elif provider == "cuda":
131
+ assert "CUDAExecutionProvider" in session.get_providers()
132
+ elif provider == "tensorrt":
133
+ assert "TensorrtExecutionProvider" in session.get_providers()
134
+ assert "CUDAExecutionProvider" in session.get_providers()
135
+ else:
136
+ assert "CUDAExecutionProvider" in session.get_providers()
137
+ else:
138
+ assert "CPUExecutionProvider" in session.get_providers()
139
+
140
+ if tuning_results_path is not None:
141
+ with open(tuning_results_path) as f:
142
+ session.set_tuning_results(json.load(f))
143
+
144
+ return session
145
+
146
+
147
+ def numpy_type(torch_type):
148
+ type_map = {
149
+ torch.float32: np.float32,
150
+ torch.float16: np.float16,
151
+ torch.int32: np.int32,
152
+ torch.int64: np.longlong,
153
+ }
154
+ return type_map[torch_type]
155
+
156
+
157
+ def create_input_output_tensors(inputs, outputs, device):
158
+ input_tensors = {name: torch.from_numpy(array).to(device) for name, array in inputs.items()}
159
+ output_tensors = {name: torch.from_numpy(array).to(device) for name, array in outputs.items()}
160
+ return input_tensors, output_tensors
161
+
162
+
163
+ def create_io_binding(sess, input_tensors, output_tensors):
164
+ io_binding = sess.io_binding()
165
+ for name, tensor in input_tensors.items():
166
+ io_binding.bind_input(
167
+ name,
168
+ tensor.device.type,
169
+ 0,
170
+ numpy_type(tensor.dtype),
171
+ tensor.shape,
172
+ tensor.data_ptr(),
173
+ )
174
+ for name, tensor in output_tensors.items():
175
+ io_binding.bind_output(
176
+ name,
177
+ tensor.device.type,
178
+ 0,
179
+ numpy_type(tensor.dtype),
180
+ tensor.shape,
181
+ tensor.data_ptr(),
182
+ )
183
+ return io_binding
184
+
185
+
186
+ def onnxruntime_inference_with_io_binding(session, all_inputs, output_names, test_setting):
187
+ results = []
188
+ latency_list = []
189
+ device = "cuda" if test_setting.use_gpu else "cpu"
190
+ for _test_case_id, inputs in enumerate(all_inputs):
191
+ result = session.run(output_names, inputs)
192
+ results.append(result)
193
+ outputs = {}
194
+ for i in range(len(output_names)):
195
+ outputs[output_names[i]] = result[i]
196
+
197
+ input_tensors, output_tensors = create_input_output_tensors(inputs, outputs, device)
198
+ io_binding = create_io_binding(session, input_tensors, output_tensors)
199
+
200
+ # warm up once
201
+ session.run_with_iobinding(io_binding)
202
+
203
+ start_time = timeit.default_timer()
204
+ session.run_with_iobinding(io_binding)
205
+ latency = timeit.default_timer() - start_time
206
+ latency_list.append(latency)
207
+
208
+ return results, latency_list
209
+
210
+
211
+ def onnxruntime_inference(session, all_inputs, output_names):
212
+ if len(all_inputs) > 0:
213
+ # Use a random input as warm up.
214
+ session.run(output_names, random.choice(all_inputs))
215
+
216
+ results = []
217
+ latency_list = []
218
+ for _test_case_id, inputs in enumerate(all_inputs):
219
+ start_time = timeit.default_timer()
220
+ result = session.run(output_names, inputs)
221
+ latency = timeit.default_timer() - start_time
222
+ results.append(result)
223
+ latency_list.append(latency)
224
+ return results, latency_list
225
+
226
+
227
+ def to_string(model_path, session, test_setting):
228
+ sess_options = session.get_session_options()
229
+ option = f"model={os.path.basename(model_path)},"
230
+ option += f"graph_optimization_level={sess_options.graph_optimization_level},intra_op_num_threads={sess_options.intra_op_num_threads},".replace(
231
+ "GraphOptimizationLevel.ORT_", ""
232
+ )
233
+
234
+ option += f"batch_size={test_setting.batch_size},sequence_length={test_setting.sequence_length},"
235
+ option += f"test_cases={test_setting.test_cases},test_times={test_setting.test_times},"
236
+ option += f"use_gpu={test_setting.use_gpu},use_io_binding={test_setting.use_io_binding},"
237
+ option += f"average_sequence_length={test_setting.average_sequence_length},"
238
+ option += f"random_sequence_length={test_setting.random_sequence_length}"
239
+ return option
240
+
241
+
242
+ def run_one_test(model_setting, test_setting, perf_results, all_inputs, intra_op_num_threads):
243
+ session = create_session(
244
+ model_setting.model_path,
245
+ test_setting.use_gpu,
246
+ test_setting.provider,
247
+ intra_op_num_threads,
248
+ model_setting.opt_level,
249
+ log_severity=test_setting.log_severity,
250
+ tuning_results_path=model_setting.input_tuning_results,
251
+ )
252
+ output_names = [output.name for output in session.get_outputs()]
253
+
254
+ key = to_string(model_setting.model_path, session, test_setting)
255
+ if key in perf_results:
256
+ print("skip duplicated test:", key)
257
+ return
258
+
259
+ print("Running test:", key)
260
+
261
+ all_latency_list = []
262
+ if test_setting.use_io_binding:
263
+ for _i in range(test_setting.test_times):
264
+ results, latency_list = onnxruntime_inference_with_io_binding(
265
+ session, all_inputs, output_names, test_setting
266
+ )
267
+ all_latency_list.extend(latency_list)
268
+ else:
269
+ for _i in range(test_setting.test_times):
270
+ results, latency_list = onnxruntime_inference(session, all_inputs, output_names)
271
+ all_latency_list.extend(latency_list)
272
+
273
+ # latency in milliseconds
274
+ latency_ms = np.array(all_latency_list) * 1000
275
+
276
+ average_latency = statistics.mean(latency_ms)
277
+ latency_50 = np.percentile(latency_ms, 50)
278
+ latency_75 = np.percentile(latency_ms, 75)
279
+ latency_90 = np.percentile(latency_ms, 90)
280
+ latency_95 = np.percentile(latency_ms, 95)
281
+ latency_99 = np.percentile(latency_ms, 99)
282
+ throughput = test_setting.batch_size * (1000.0 / average_latency)
283
+
284
+ perf_results[key] = (
285
+ average_latency,
286
+ latency_50,
287
+ latency_75,
288
+ latency_90,
289
+ latency_95,
290
+ latency_99,
291
+ throughput,
292
+ )
293
+
294
+ print(
295
+ "Average latency = {} ms, Throughput = {} QPS".format(format(average_latency, ".2f"), format(throughput, ".2f"))
296
+ )
297
+
298
+ if model_setting.output_tuning_results:
299
+ output_path = os.path.abspath(model_setting.output_tuning_results)
300
+ if os.path.exists(output_path):
301
+ old_output_path = output_path
302
+ output_path = f"""{output_path.rsplit(".json", 1)[0]}.{datetime.now().timestamp()}.json"""
303
+ print("WARNING:", old_output_path, "exists, will write to", output_path, "instead.")
304
+
305
+ trs = session.get_tuning_results()
306
+ with open(output_path, "w") as f:
307
+ json.dump(trs, f)
308
+ print("Tuning results is saved to", output_path)
309
+
310
+
311
+ def launch_test(model_setting, test_setting, perf_results, all_inputs, intra_op_num_threads):
312
+ process = multiprocessing.Process(
313
+ target=run_one_test,
314
+ args=(
315
+ model_setting,
316
+ test_setting,
317
+ perf_results,
318
+ all_inputs,
319
+ intra_op_num_threads,
320
+ ),
321
+ )
322
+ process.start()
323
+ process.join()
324
+
325
+
326
+ def run_perf_tests(model_setting, test_setting, perf_results, all_inputs):
327
+ if test_setting.intra_op_num_threads is not None:
328
+ launch_test(
329
+ model_setting,
330
+ test_setting,
331
+ perf_results,
332
+ all_inputs,
333
+ test_setting.intra_op_num_threads,
334
+ )
335
+ return
336
+
337
+ cpu_count = psutil.cpu_count(logical=False)
338
+ logical_cores = psutil.cpu_count(logical=True)
339
+
340
+ candidate_threads = list({logical_cores, cpu_count})
341
+ for i in range(1, min(16, logical_cores)):
342
+ if i not in candidate_threads:
343
+ candidate_threads.append(i)
344
+ candidate_threads.sort(reverse=True)
345
+
346
+ for intra_op_num_threads in candidate_threads:
347
+ launch_test(model_setting, test_setting, perf_results, all_inputs, intra_op_num_threads)
348
+
349
+
350
+ def run_performance(model_setting, test_setting, perf_results):
351
+ input_ids, segment_ids, input_mask = get_bert_inputs(
352
+ model_setting.model_path,
353
+ model_setting.input_ids_name,
354
+ model_setting.segment_ids_name,
355
+ model_setting.input_mask_name,
356
+ )
357
+
358
+ # Do not generate random mask for performance test.
359
+ print(
360
+ f"Generating {test_setting.test_cases} samples for batch_size={test_setting.batch_size} sequence_length={test_setting.sequence_length}"
361
+ )
362
+ all_inputs = generate_test_data(
363
+ test_setting.batch_size,
364
+ test_setting.sequence_length,
365
+ test_setting.test_cases,
366
+ test_setting.seed,
367
+ test_setting.verbose,
368
+ input_ids,
369
+ segment_ids,
370
+ input_mask,
371
+ test_setting.average_sequence_length,
372
+ test_setting.random_sequence_length,
373
+ mask_type=model_setting.mask_type,
374
+ )
375
+
376
+ run_perf_tests(model_setting, test_setting, perf_results, all_inputs)
377
+
378
+
379
+ def parse_arguments():
380
+ parser = argparse.ArgumentParser()
381
+ parser.add_argument("--model", required=True, type=str, help="bert onnx model path")
382
+
383
+ parser.add_argument(
384
+ "-b",
385
+ "--batch_size",
386
+ required=True,
387
+ type=int,
388
+ nargs="+",
389
+ help="batch size of input. Allow one or multiple values in the range of [1, 128].",
390
+ )
391
+
392
+ parser.add_argument(
393
+ "-s",
394
+ "--sequence_length",
395
+ required=True,
396
+ type=int,
397
+ help="maximum sequence length of input",
398
+ )
399
+
400
+ parser.add_argument(
401
+ "--samples",
402
+ required=False,
403
+ type=int,
404
+ default=10,
405
+ help="number of samples to be generated",
406
+ )
407
+
408
+ parser.add_argument(
409
+ "-t",
410
+ "--test_times",
411
+ required=False,
412
+ type=int,
413
+ default=0,
414
+ help="number of times to run per sample. By default, the value is 1000 / samples",
415
+ )
416
+
417
+ parser.add_argument(
418
+ "--opt_level",
419
+ required=False,
420
+ type=int,
421
+ choices=[0, 1, 2, 3, 99],
422
+ default=99,
423
+ help="onnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 3 - layout, 99 - enable all.",
424
+ )
425
+
426
+ parser.add_argument(
427
+ "--seed",
428
+ required=False,
429
+ type=int,
430
+ default=3,
431
+ help="random seed. Use the same seed to make sure test data is same in multiple tests.",
432
+ )
433
+
434
+ parser.add_argument(
435
+ "--verbose",
436
+ required=False,
437
+ action="store_true",
438
+ help="print verbose information",
439
+ )
440
+ parser.set_defaults(verbose=False)
441
+
442
+ parser.add_argument(
443
+ "--log_severity",
444
+ required=False,
445
+ type=int,
446
+ default=2,
447
+ choices=[0, 1, 2, 3, 4],
448
+ help="0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal",
449
+ )
450
+
451
+ parser.add_argument("--use_gpu", required=False, action="store_true", help="use GPU")
452
+ parser.set_defaults(use_gpu=False)
453
+
454
+ parser.add_argument("--use_io_binding", required=False, action="store_true", help="use io_binding")
455
+ parser.set_defaults(use_io_binding=False)
456
+
457
+ parser.add_argument(
458
+ "--provider",
459
+ required=False,
460
+ type=str,
461
+ default=None,
462
+ help="Execution provider to use",
463
+ )
464
+
465
+ parser.add_argument(
466
+ "-n",
467
+ "--intra_op_num_threads",
468
+ required=False,
469
+ type=int,
470
+ default=None,
471
+ help=">=0, set intra_op_num_threads",
472
+ )
473
+
474
+ parser.add_argument(
475
+ "--input_ids_name",
476
+ required=False,
477
+ type=str,
478
+ default=None,
479
+ help="input name for input ids",
480
+ )
481
+
482
+ parser.add_argument(
483
+ "--segment_ids_name",
484
+ required=False,
485
+ type=str,
486
+ default=None,
487
+ help="input name for segment ids",
488
+ )
489
+
490
+ parser.add_argument(
491
+ "--input_mask_name",
492
+ required=False,
493
+ type=str,
494
+ default=None,
495
+ help="input name for attention mask",
496
+ )
497
+
498
+ parser.add_argument(
499
+ "--input_tuning_results",
500
+ default=None,
501
+ type=str,
502
+ help="tuning results (json) to be loaded before benchmark",
503
+ )
504
+
505
+ parser.add_argument(
506
+ "--output_tuning_results",
507
+ default=None,
508
+ type=str,
509
+ help="tuning results (json) to be saved after benchmark",
510
+ )
511
+
512
+ parser.add_argument(
513
+ "-a",
514
+ "--average_sequence_length",
515
+ default=-1,
516
+ type=int,
517
+ help="average sequence length excluding padding",
518
+ )
519
+
520
+ parser.add_argument(
521
+ "-r",
522
+ "--random_sequence_length",
523
+ required=False,
524
+ action="store_true",
525
+ help="use uniform random instead of fixed sequence length",
526
+ )
527
+ parser.set_defaults(random_sequence_length=False)
528
+
529
+ parser.add_argument(
530
+ "--mask_type",
531
+ required=False,
532
+ type=int,
533
+ default=2,
534
+ help="mask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key)",
535
+ )
536
+
537
+ args = parser.parse_args()
538
+ return args
539
+
540
+
541
+ def main():
542
+ args = parse_arguments()
543
+
544
+ if args.test_times == 0:
545
+ args.test_times = max(1, int(1000 / args.samples))
546
+
547
+ if args.average_sequence_length <= 0:
548
+ args.average_sequence_length = args.sequence_length
549
+
550
+ manager = multiprocessing.Manager()
551
+ perf_results = manager.dict()
552
+
553
+ batch_size_set = set(args.batch_size)
554
+ if not (min(batch_size_set) >= 1 and max(batch_size_set) <= 128):
555
+ raise Exception("batch_size not in range [1, 128]")
556
+
557
+ model_setting = ModelSetting(
558
+ args.model,
559
+ args.input_ids_name,
560
+ args.segment_ids_name,
561
+ args.input_mask_name,
562
+ args.opt_level,
563
+ args.input_tuning_results,
564
+ args.output_tuning_results,
565
+ args.mask_type,
566
+ )
567
+
568
+ for batch_size in batch_size_set:
569
+ test_setting = TestSetting(
570
+ batch_size,
571
+ args.sequence_length,
572
+ args.samples,
573
+ args.test_times,
574
+ args.use_gpu,
575
+ args.use_io_binding,
576
+ args.provider,
577
+ args.intra_op_num_threads,
578
+ args.seed,
579
+ args.verbose,
580
+ args.log_severity,
581
+ args.average_sequence_length,
582
+ args.random_sequence_length,
583
+ )
584
+
585
+ print("test setting", test_setting)
586
+ run_performance(model_setting, test_setting, perf_results)
587
+
588
+ # Sort the results so that the first one has smallest latency.
589
+ sorted_results = sorted(perf_results.items(), reverse=False, key=lambda x: x[1])
590
+
591
+ summary_file = os.path.join(
592
+ Path(args.model).parent,
593
+ "perf_results_{}_B{}_S{}_{}.txt".format(
594
+ "GPU" if args.use_gpu else "CPU",
595
+ "-".join([str(x) for x in sorted(batch_size_set)]),
596
+ args.sequence_length,
597
+ datetime.now().strftime("%Y%m%d-%H%M%S"),
598
+ ),
599
+ )
600
+ with open(summary_file, "w+", newline="") as tsv_file:
601
+ tsv_writer = csv.writer(tsv_file, delimiter="\t", lineterminator="\n")
602
+ headers = None
603
+ for key, perf_result in sorted_results:
604
+ params = key.split(",")
605
+ if headers is None:
606
+ headers = [
607
+ "Latency(ms)",
608
+ "Latency_P50",
609
+ "Latency_P75",
610
+ "Latency_P90",
611
+ "Latency_P95",
612
+ "Latency_P99",
613
+ "Throughput(QPS)",
614
+ ]
615
+ headers.extend([x.split("=")[0] for x in params])
616
+ tsv_writer.writerow(headers)
617
+
618
+ values = [format(x, ".2f") for x in perf_result]
619
+ values.extend([x.split("=")[1] for x in params])
620
+ tsv_writer.writerow(values)
621
+
622
+ print("Test summary is saved to", summary_file)
623
+
624
+
625
+ if __name__ == "__main__":
626
+ # work around for AnaConda Jupyter. See https://stackoverflow.com/questions/45720153/python-multiprocessing-error-attributeerror-module-main-has-no-attribute
627
+ __spec__ = None
628
+
629
+ main()