mindstudio-probe 1.0.1__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/LICENSE +201 -201
  2. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/METADATA +36 -30
  3. mindstudio_probe-1.0.4.dist-info/RECORD +276 -0
  4. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/WHEEL +1 -1
  5. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/entry_points.txt +1 -0
  6. msprobe/README.md +101 -182
  7. msprobe/__init__.py +1 -0
  8. msprobe/{config/config.json → config.json} +49 -27
  9. msprobe/core/__init__.py +0 -0
  10. msprobe/{pytorch → core}/advisor/advisor.py +124 -124
  11. msprobe/{pytorch → core}/advisor/advisor_const.py +59 -59
  12. msprobe/{pytorch → core}/advisor/advisor_result.py +58 -58
  13. msprobe/core/common/const.py +341 -241
  14. msprobe/core/common/exceptions.py +100 -88
  15. msprobe/core/common/{file_check.py → file_utils.py} +478 -265
  16. msprobe/core/common/log.py +76 -55
  17. msprobe/core/common/utils.py +385 -516
  18. msprobe/core/common_config.py +85 -58
  19. msprobe/core/compare/acc_compare.py +300 -0
  20. msprobe/core/compare/check.py +95 -0
  21. msprobe/core/compare/compare_cli.py +49 -0
  22. msprobe/core/compare/highlight.py +223 -0
  23. msprobe/core/compare/multiprocessing_compute.py +149 -0
  24. msprobe/{pytorch → core}/compare/npy_compare.py +295 -244
  25. msprobe/core/compare/utils.py +430 -0
  26. msprobe/core/data_dump/data_collector.py +154 -140
  27. msprobe/core/data_dump/data_processor/base.py +314 -245
  28. msprobe/core/data_dump/data_processor/factory.py +59 -61
  29. msprobe/core/data_dump/data_processor/mindspore_processor.py +186 -0
  30. msprobe/core/data_dump/data_processor/pytorch_processor.py +366 -346
  31. msprobe/core/data_dump/json_writer.py +96 -116
  32. msprobe/core/data_dump/scope.py +178 -178
  33. msprobe/core/grad_probe/__init__.py +0 -0
  34. msprobe/core/grad_probe/constant.py +71 -0
  35. msprobe/core/grad_probe/grad_compare.py +171 -0
  36. msprobe/core/grad_probe/utils.py +64 -0
  37. msprobe/docs/01.installation.md +89 -0
  38. msprobe/docs/02.config_introduction.md +165 -0
  39. msprobe/docs/03.config_examples.md +247 -0
  40. msprobe/docs/04.acl_config_examples.md +76 -0
  41. msprobe/docs/05.data_dump_PyTorch.md +198 -0
  42. msprobe/docs/06.data_dump_MindSpore.md +243 -0
  43. msprobe/docs/07.accuracy_checker_PyTorch.md +274 -0
  44. msprobe/docs/08.accuracy_checker_online_PyTorch.md +198 -0
  45. msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
  46. msprobe/docs/10.accuracy_compare_PyTorch.md +245 -0
  47. msprobe/docs/11.accuracy_compare_MindSpore.md +202 -0
  48. msprobe/docs/12.overflow_check_PyTorch.md +79 -0
  49. msprobe/docs/13.overflow_check_MindSpore.md +31 -0
  50. msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
  51. msprobe/docs/15.free_benchmarking_PyTorch.md +164 -0
  52. msprobe/docs/17.grad_probe.md +207 -0
  53. msprobe/docs/FAQ_PyTorch.md +177 -0
  54. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
  55. msprobe/docs/img/free_benchmark_framework.png +0 -0
  56. msprobe/docs/img/grad_probe_image-1.png +0 -0
  57. msprobe/docs/img/grad_probe_image-2.png +0 -0
  58. msprobe/docs/img/grad_probe_image-3.png +0 -0
  59. msprobe/docs/img/grad_probe_image-4.png +0 -0
  60. msprobe/docs/img/grad_probe_image.png +0 -0
  61. msprobe/mindspore/__init__.py +1 -1
  62. msprobe/mindspore/api_accuracy_checker/__init__.py +0 -0
  63. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +255 -0
  64. msprobe/mindspore/api_accuracy_checker/api_info.py +69 -0
  65. msprobe/mindspore/api_accuracy_checker/api_runner.py +156 -0
  66. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +197 -0
  67. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
  68. msprobe/mindspore/api_accuracy_checker/compute_element.py +239 -0
  69. msprobe/mindspore/api_accuracy_checker/main.py +9 -0
  70. msprobe/mindspore/api_accuracy_checker/type_mapping.py +114 -0
  71. msprobe/mindspore/api_accuracy_checker/utils.py +80 -0
  72. msprobe/mindspore/cell_processor.py +34 -0
  73. msprobe/mindspore/common/const.py +106 -0
  74. msprobe/mindspore/common/log.py +38 -0
  75. msprobe/mindspore/common/utils.py +81 -0
  76. msprobe/mindspore/compare/distributed_compare.py +75 -0
  77. msprobe/mindspore/compare/ms_compare.py +219 -0
  78. msprobe/mindspore/compare/ms_graph_compare.py +348 -0
  79. msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -0
  80. msprobe/mindspore/debugger/debugger_config.py +66 -51
  81. msprobe/mindspore/debugger/precision_debugger.py +126 -32
  82. msprobe/mindspore/dump/dump_tool_factory.py +35 -38
  83. msprobe/mindspore/dump/hook_cell/api_registry.py +118 -0
  84. msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -0
  85. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +922 -0
  86. msprobe/mindspore/dump/hook_cell/wrap_api.py +113 -0
  87. msprobe/mindspore/dump/jit_dump.py +72 -0
  88. msprobe/mindspore/dump/kernel_graph_dump.py +59 -60
  89. msprobe/mindspore/dump/kernel_kbyk_dump.py +64 -0
  90. msprobe/mindspore/free_benchmark/__init__.py +0 -0
  91. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -0
  92. msprobe/mindspore/free_benchmark/common/__init__.py +0 -0
  93. msprobe/mindspore/free_benchmark/common/config.py +12 -0
  94. msprobe/mindspore/free_benchmark/common/handler_params.py +17 -0
  95. msprobe/mindspore/free_benchmark/common/utils.py +71 -0
  96. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -0
  97. msprobe/mindspore/free_benchmark/decorator/__init__.py +0 -0
  98. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +43 -0
  99. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -0
  100. msprobe/mindspore/free_benchmark/handler/__init__.py +0 -0
  101. msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -0
  102. msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -0
  103. msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -0
  104. msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -0
  105. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -0
  106. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -0
  107. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -0
  108. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +51 -0
  109. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +35 -0
  110. msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -0
  111. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +29 -0
  112. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -0
  113. msprobe/mindspore/grad_probe/__init__.py +0 -0
  114. msprobe/mindspore/grad_probe/global_context.py +90 -0
  115. msprobe/mindspore/grad_probe/grad_analyzer.py +231 -0
  116. msprobe/mindspore/grad_probe/grad_monitor.py +27 -0
  117. msprobe/mindspore/grad_probe/grad_stat_csv.py +132 -0
  118. msprobe/mindspore/grad_probe/hook.py +94 -0
  119. msprobe/mindspore/grad_probe/utils.py +30 -0
  120. msprobe/mindspore/ms_config.py +128 -78
  121. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +44 -45
  122. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +34 -32
  123. msprobe/mindspore/runtime.py +4 -0
  124. msprobe/mindspore/service.py +378 -0
  125. msprobe/mindspore/task_handler_factory.py +24 -21
  126. msprobe/msprobe.py +105 -67
  127. msprobe/pytorch/__init__.py +4 -4
  128. msprobe/pytorch/api_accuracy_checker/common/config.py +53 -50
  129. msprobe/pytorch/api_accuracy_checker/common/utils.py +214 -224
  130. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +213 -216
  131. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +606 -545
  132. msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
  133. msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
  134. msprobe/pytorch/api_accuracy_checker/compare/compare.py +386 -345
  135. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +73 -73
  136. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +245 -248
  137. msprobe/pytorch/api_accuracy_checker/config.yaml +10 -4
  138. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +335 -328
  139. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +200 -203
  140. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +133 -127
  141. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +592 -493
  142. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +70 -7
  143. msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
  144. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/__init__.py +0 -0
  145. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +197 -0
  146. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +325 -0
  147. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -0
  148. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +219 -0
  149. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -0
  150. msprobe/pytorch/bench_functions/__init__.py +15 -0
  151. msprobe/pytorch/bench_functions/apply_adam_w.py +28 -0
  152. msprobe/pytorch/bench_functions/confusion_transpose.py +19 -0
  153. msprobe/pytorch/bench_functions/fast_gelu.py +55 -0
  154. msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -0
  155. msprobe/pytorch/bench_functions/linear.py +12 -0
  156. msprobe/pytorch/bench_functions/matmul_backward.py +48 -0
  157. msprobe/pytorch/bench_functions/npu_fusion_attention.py +509 -0
  158. msprobe/pytorch/bench_functions/rms_norm.py +15 -0
  159. msprobe/pytorch/bench_functions/rotary_mul.py +52 -0
  160. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -0
  161. msprobe/pytorch/bench_functions/swiglu.py +55 -0
  162. msprobe/pytorch/common/__init__.py +2 -2
  163. msprobe/pytorch/common/compare_script.template +14 -14
  164. msprobe/pytorch/common/log.py +20 -31
  165. msprobe/pytorch/common/parse_json.py +39 -37
  166. msprobe/pytorch/common/utils.py +305 -224
  167. msprobe/pytorch/compare/distributed_compare.py +66 -111
  168. msprobe/pytorch/compare/mapping.yaml +607 -607
  169. msprobe/pytorch/compare/match.py +34 -36
  170. msprobe/pytorch/compare/pt_compare.py +50 -0
  171. msprobe/pytorch/debugger/debugger_config.py +95 -86
  172. msprobe/pytorch/debugger/precision_debugger.py +125 -95
  173. msprobe/pytorch/free_benchmark/__init__.py +8 -8
  174. msprobe/pytorch/free_benchmark/common/constant.py +70 -67
  175. msprobe/pytorch/free_benchmark/common/counter.py +71 -71
  176. msprobe/pytorch/free_benchmark/common/enums.py +37 -37
  177. msprobe/pytorch/free_benchmark/common/params.py +129 -129
  178. msprobe/pytorch/free_benchmark/common/utils.py +102 -98
  179. msprobe/pytorch/free_benchmark/compare/grad_saver.py +179 -183
  180. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +104 -104
  181. msprobe/pytorch/free_benchmark/main.py +105 -102
  182. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +13 -13
  183. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +41 -41
  184. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +90 -90
  185. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +104 -104
  186. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +63 -63
  187. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +68 -68
  188. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +28 -28
  189. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +45 -45
  190. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +19 -19
  191. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +217 -203
  192. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +39 -39
  193. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +23 -23
  194. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +30 -31
  195. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +170 -170
  196. msprobe/pytorch/function_factory.py +76 -0
  197. msprobe/pytorch/functional/dump_module.py +39 -39
  198. msprobe/pytorch/grad_probe/__init__.py +0 -0
  199. msprobe/pytorch/grad_probe/grad_monitor.py +91 -0
  200. msprobe/pytorch/grad_probe/grad_stat_csv.py +129 -0
  201. msprobe/pytorch/hook_module/api_registry.py +161 -161
  202. msprobe/pytorch/hook_module/hook_module.py +120 -109
  203. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1876
  204. msprobe/pytorch/hook_module/utils.py +30 -29
  205. msprobe/pytorch/hook_module/wrap_aten.py +110 -100
  206. msprobe/pytorch/hook_module/wrap_distributed.py +78 -75
  207. msprobe/pytorch/hook_module/wrap_functional.py +105 -108
  208. msprobe/pytorch/hook_module/wrap_npu_custom.py +93 -73
  209. msprobe/pytorch/hook_module/wrap_tensor.py +71 -72
  210. msprobe/pytorch/hook_module/wrap_torch.py +86 -88
  211. msprobe/pytorch/hook_module/wrap_vf.py +62 -64
  212. msprobe/pytorch/module_processer.py +138 -98
  213. msprobe/pytorch/online_dispatch/__init__.py +20 -20
  214. msprobe/pytorch/online_dispatch/compare.py +236 -236
  215. msprobe/pytorch/online_dispatch/dispatch.py +271 -273
  216. msprobe/pytorch/online_dispatch/dump_compare.py +155 -186
  217. msprobe/pytorch/online_dispatch/single_compare.py +391 -391
  218. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +49 -49
  219. msprobe/pytorch/online_dispatch/utils.py +130 -187
  220. msprobe/pytorch/parse.py +4 -4
  221. msprobe/pytorch/parse_tool/cli.py +32 -32
  222. msprobe/pytorch/parse_tool/lib/compare.py +260 -259
  223. msprobe/pytorch/parse_tool/lib/config.py +52 -51
  224. msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
  225. msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
  226. msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
  227. msprobe/pytorch/parse_tool/lib/parse_tool.py +158 -158
  228. msprobe/pytorch/parse_tool/lib/utils.py +316 -367
  229. msprobe/pytorch/parse_tool/lib/visualization.py +85 -90
  230. msprobe/pytorch/pt_config.py +188 -93
  231. msprobe/pytorch/service.py +246 -167
  232. mindstudio_probe-1.0.1.dist-info/RECORD +0 -228
  233. msprobe/config/README.md +0 -397
  234. msprobe/mindspore/doc/dump.md +0 -65
  235. msprobe/mindspore/dump/api_kbk_dump.py +0 -55
  236. msprobe/pytorch/compare/acc_compare.py +0 -1024
  237. msprobe/pytorch/compare/highlight.py +0 -100
  238. msprobe/pytorch/doc/FAQ.md +0 -193
  239. msprobe/pytorch/doc/api_accuracy_checker.md +0 -269
  240. msprobe/pytorch/doc/atat/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
  241. msprobe/pytorch/doc/dump.md +0 -207
  242. msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -176
  243. msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
  244. msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
  245. msprobe/pytorch/doc/run_overflow_check.md +0 -25
  246. msprobe/pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md +0 -90
  247. msprobe/test/core_ut/common/test_utils.py +0 -345
  248. msprobe/test/core_ut/data_dump/test_data_collector.py +0 -47
  249. msprobe/test/core_ut/data_dump/test_json_writer.py +0 -183
  250. msprobe/test/core_ut/data_dump/test_scope.py +0 -151
  251. msprobe/test/core_ut/test_common_config.py +0 -152
  252. msprobe/test/core_ut/test_file_check.py +0 -218
  253. msprobe/test/core_ut/test_log.py +0 -109
  254. msprobe/test/mindspore_ut/test_api_kbk_dump.py +0 -51
  255. msprobe/test/mindspore_ut/test_debugger_config.py +0 -42
  256. msprobe/test/mindspore_ut/test_dump_tool_factory.py +0 -51
  257. msprobe/test/mindspore_ut/test_kernel_graph_dump.py +0 -66
  258. msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py +0 -63
  259. msprobe/test/mindspore_ut/test_ms_config.py +0 -69
  260. msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py +0 -51
  261. msprobe/test/mindspore_ut/test_precision_debugger.py +0 -56
  262. msprobe/test/mindspore_ut/test_task_handler_factory.py +0 -58
  263. msprobe/test/pytorch_ut/advisor/test_advisor.py +0 -83
  264. msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +0 -108
  265. msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +0 -39
  266. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +0 -112
  267. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py +0 -77
  268. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py +0 -125
  269. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py +0 -10
  270. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py +0 -43
  271. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json +0 -179
  272. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json +0 -63
  273. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py +0 -99
  274. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +0 -115
  275. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py +0 -72
  276. msprobe/test/pytorch_ut/compare/test_acc_compare.py +0 -17
  277. msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py +0 -105
  278. msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py +0 -121
  279. msprobe/test/pytorch_ut/free_benchmark/test_main.py +0 -101
  280. msprobe/test/pytorch_ut/functional/test_dump_module.py +0 -15
  281. msprobe/test/pytorch_ut/hook_module/test_api_registry.py +0 -130
  282. msprobe/test/pytorch_ut/hook_module/test_hook_module.py +0 -42
  283. msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py +0 -65
  284. msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +0 -35
  285. msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +0 -20
  286. msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +0 -35
  287. msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +0 -43
  288. msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +0 -11
  289. msprobe/test/pytorch_ut/test_pt_config.py +0 -69
  290. msprobe/test/pytorch_ut/test_service.py +0 -59
  291. msprobe/test/resources/advisor.txt +0 -3
  292. msprobe/test/resources/compare_result_20230703104808.csv +0 -9
  293. msprobe/test/resources/compare_result_without_accuracy.csv +0 -9
  294. msprobe/test/resources/config.yaml +0 -3
  295. msprobe/test/resources/npu_test.pkl +0 -8
  296. msprobe/test/run_test.sh +0 -30
  297. msprobe/test/run_ut.py +0 -58
  298. msprobe/test/test_module_processer.py +0 -64
  299. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/top_level.txt +0 -0
  300. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
  301. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
  302. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
  303. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
  304. /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
  305. /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
  306. /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
  307. /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
  308. /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
  309. /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
  310. /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
  311. /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
  312. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
  313. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
  314. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
  315. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
  316. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
  317. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
  318. /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
  319. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
  320. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
  321. /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
  322. /msprobe/{config → docs}/img/free_benchmark.png +0 -0
  323. /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
@@ -1,216 +1,213 @@
1
- # 定义比对算法及比对标准
2
- import torch
3
- import numpy as np
4
-
5
- from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import ULP_PARAMETERS
6
- from msprobe.core.common.const import CompareConst
7
-
8
-
9
- DEFAULT_THRESHOLD = 1
10
-
11
-
12
- #cos
13
- def cosine_sim(bench_output, device_output):
14
- msg = ""
15
- n_value = device_output.reshape(-1)
16
- b_value = bench_output.reshape(-1)
17
- cos = CompareConst.SPACE
18
- np.seterr(divide="ignore", invalid="ignore")
19
- if n_value.shape != b_value.shape:
20
- msg = f"Shape of device and bench outputs don't match. device: {n_value.shape}, bench: {b_value.shape}."
21
- return -1, False, msg
22
- if len(n_value) == 1:
23
- msg = "All the data in device dump data is scalar. Please refer to other compare algorithms."
24
- return cos, True, msg
25
- n_value_max = np.max(np.abs(n_value))
26
- b_value_max = np.max(np.abs(b_value))
27
- if n_value_max <= np.finfo(float).eps and b_value_max <= np.finfo(float).eps:
28
- msg = "All the data in device and bench outputs are zero."
29
- return cos, True, msg
30
- elif n_value_max <= np.finfo(float).eps:
31
- msg = "All the data is zero in device dump data."
32
- return CompareConst.SPACE, False, msg
33
- elif b_value_max <= np.finfo(float).eps:
34
- msg = "All the data is zero in bench dump data."
35
- return CompareConst.SPACE, False, msg
36
- else:
37
- n_value = n_value.astype(float) / n_value_max
38
- b_value = b_value.astype(float) / b_value_max
39
- cos = np.dot(n_value, b_value) / (np.linalg.norm(n_value) * np.linalg.norm(b_value))
40
- if np.isnan(cos):
41
- msg = "Dump data has NaN when comparing with Cosine Similarity."
42
- cos = np.clip(cos, -1, 1)
43
- return cos, cos > 0.99, msg
44
-
45
-
46
- #rmse
47
- def get_rmse(abs_err, inf_nan_mask):
48
- masked_ae = np.where(inf_nan_mask, 0, abs_err)
49
- mse = np.mean(np.square(masked_ae))
50
- inf_nan_cnt = np.sum(inf_nan_mask)
51
- mse = mse * (abs_err.size / (abs_err.size - inf_nan_cnt + 0.0001) + 0.0001)
52
- rmse = np.sqrt(mse)
53
- return rmse
54
-
55
-
56
- #误差均衡性
57
- def get_error_balance(bench_data, device_data):
58
- larger_count = np.sum(np.greater(device_data - bench_data.astype(device_data.dtype), 0))
59
- smaller_count = np.sum(np.less(device_data - bench_data.astype(device_data.dtype), 0))
60
- total_count = bench_data.size
61
- error_balance = abs(larger_count - smaller_count) / total_count if total_count > 0 else 0
62
- return error_balance
63
-
64
-
65
- #小值域错误占比
66
- def get_small_value_err_ratio(small_value_mask, abs_err_greater_mask):
67
- err_mask = np.logical_and(small_value_mask, abs_err_greater_mask)
68
- small_value_err_num = np.sum(err_mask)
69
- small_value_num = np.sum(small_value_mask)
70
- return 0 if small_value_num == 0 else small_value_err_num / small_value_num
71
-
72
-
73
- def get_rel_err(abs_err, abs_bench_with_eps, small_value_mask, inf_nan_mask):
74
- rel_err_tmp = abs_err / abs_bench_with_eps
75
- rel_err_mask = np.logical_or(small_value_mask, inf_nan_mask)
76
- rel_err = np.where(rel_err_mask, -1, rel_err_tmp)
77
- return rel_err
78
-
79
-
80
- def get_abs_err(bench_data, device_data):
81
- abs_err = np.abs(device_data - bench_data)
82
- return abs_err
83
-
84
-
85
- def get_rel_err_origin(abs_err, b_value):
86
- rel_err_origin = np.abs(abs_err / b_value)
87
- return rel_err_origin
88
-
89
-
90
- def get_max_abs_err(abs_err):
91
- max_abs_err = abs_err.max()
92
- bool_result = max_abs_err < 0.001
93
- return max_abs_err, bool_result
94
-
95
-
96
- #相对误差最大值
97
- def get_max_rel_err(rel_err):
98
- return np.max(rel_err) if np.max(rel_err) >= 0 else 0
99
-
100
-
101
- #相对误差均值
102
- def get_mean_rel_err(rel_err):
103
- non_negative_rel_err = rel_err[rel_err >= 0]
104
- return np.mean(non_negative_rel_err) if non_negative_rel_err.size > 0 else 0
105
-
106
-
107
- def get_rel_err_ratio(rel_err, thresholding):
108
- if np.size(rel_err) == 0:
109
- ratio = 1
110
- else:
111
- ratio = np.divide(np.sum(rel_err < thresholding), np.size(rel_err))
112
- bool_result = ratio > (1 - thresholding)
113
- return ratio, bool_result
114
-
115
-
116
- def get_finite_and_infinite_mask(bench_output, device_output):
117
- device_finite_mask = np.isfinite(device_output)
118
- bench_finite_mask = np.isfinite(bench_output.astype(device_output.dtype))
119
- both_finite_mask = np.logical_and(device_finite_mask, bench_finite_mask)
120
- inf_nan_mask = np.logical_not(both_finite_mask)
121
- return both_finite_mask, inf_nan_mask
122
-
123
-
124
- def get_small_value_mask(abs_bench, both_finite_mask, small_value_threshold):
125
- small_value_mask = np.less_equal(abs_bench, small_value_threshold)
126
- small_value_mask = np.logical_and(small_value_mask, both_finite_mask)
127
- return small_value_mask
128
-
129
-
130
- def get_abs_bench_with_eps(bench, dtype):
131
- abs_bench = np.abs(bench)
132
- eps = np.finfo(bench.dtype).eps if dtype != torch.bfloat16 else CompareConst.BFLOAT16_EPS
133
- abs_bench_with_eps = abs_bench + eps
134
- return abs_bench, abs_bench_with_eps
135
-
136
-
137
- def check_inf_nan_value(inf_nan_mask, bench_output, device_output, dtype, rtol):
138
- '''
139
- 新精度标准的绝对阈值法中,检查npu和golden输出的inf、nan是否一致
140
- 输入:
141
- inf_nan_mask:npu输出和golden输出的inf、nan的mask
142
- bench_output:golden输出
143
- device_output:npu输出
144
- dtype:npu输出的dtype
145
- 输出:
146
- inf_nan_err_ratio:npu输出和golden输出的inf、nan不一致的比例
147
- '''
148
- abs_gpu, abs_gpu_with_eps = get_abs_bench_with_eps(bench_output, dtype)
149
- golden_same_dtype = bench_output.astype(device_output.dtype)
150
- a_min = np.finfo(device_output.dtype).min if dtype != torch.bfloat16 else CompareConst.BFLOAT16_MIN
151
- a_max = np.finfo(device_output.dtype).max if dtype != torch.bfloat16 else CompareConst.BFLOAT16_MAX
152
- golden_clip = np.clip(golden_same_dtype, a_min, a_max)
153
- npu_clip = np.clip(device_output, a_min, a_max)
154
- clipped_abs_ae = np.abs(npu_clip - golden_clip)
155
- clipped_re = clipped_abs_ae / abs_gpu_with_eps
156
- pass_mask = np.less_equal(clipped_re, rtol)
157
- both_nan_mask = np.logical_and(np.isnan(device_output), np.isnan(golden_clip))
158
- pass_mask = np.logical_or(pass_mask, both_nan_mask)
159
- not_pass_mask = np.logical_not(pass_mask)
160
- not_pass_mask = np.logical_and(not_pass_mask, inf_nan_mask)
161
-
162
- inf_nan_err_cnt = np.sum(not_pass_mask)
163
- return 0 if np.sum(inf_nan_mask) == 0 else inf_nan_err_cnt / np.sum(inf_nan_mask)
164
-
165
-
166
- def check_small_value(abs_err, small_value_mask, small_value_atol):
167
- '''
168
- 新精度标准的相对阈值法中,检查npugolden小值域输出的相对误差是否满足阈值
169
- 输入:
170
- rel_err:npu输出和golden输出的相对误差
171
- normal_value_mask:npu输出和golden输出的正常值mask
172
- rtol:相对误差的阈值
173
- 输出:
174
- rel_err_ratio:npu输出和golden输出的相对误差不满足阈值的比例
175
- '''
176
- greater_mask = np.greater(abs_err, small_value_atol)
177
- err_mask = np.logical_and(greater_mask, small_value_mask)
178
- err_cnt = np.sum(err_mask)
179
- return 0 if np.sum(small_value_mask) == 0 else err_cnt / np.sum(small_value_mask)
180
-
181
-
182
- def check_norm_value(normal_value_mask, rel_err, rtol):
183
- '''
184
- 新精度标准的绝对阈值法中,检查npugolden正常值输出的绝对误差是否满足阈值
185
- 输入:
186
- abs_err:npu输出和golden输出的绝对误差
187
- normal_value_mask:npu输出和golden输出的正常值mask
188
- atol:绝对误差的阈值
189
- 输出:
190
- abs_err_ratio:npu输出和golden输出的绝对误差不满足阈值的比例
191
- '''
192
- err_mask = np.greater(rel_err, rtol)
193
- err_mask = np.logical_and(err_mask, normal_value_mask)
194
- err_cnt = np.sum(err_mask)
195
- return 0 if np.sum(normal_value_mask) == 0 else err_cnt / np.sum(normal_value_mask)
196
-
197
-
198
- def get_ulp_err(bench_output, device_output, dtype):
199
- parameters = ULP_PARAMETERS.get(dtype)
200
- min_eb = parameters.get('min_eb', DEFAULT_THRESHOLD)[0]
201
- exponent_num = parameters.get('exponent_num', DEFAULT_THRESHOLD)[0]
202
- abs_bench = np.abs(bench_output)
203
- eb = np.where(abs_bench == 0, 0, np.floor(np.log2(abs_bench)))
204
- eb = np.maximum(eb, min_eb)
205
-
206
- if dtype == torch.float32:
207
- ulp_err = calc_ulp_err(bench_output, device_output, eb, exponent_num, np.float64)
208
- else:
209
- ulp_err = calc_ulp_err(bench_output, device_output, eb, exponent_num, np.float32)
210
- ulp_err = np.abs(ulp_err)
211
- return ulp_err
212
-
213
-
214
- def calc_ulp_err(bench_output, device_output, eb, exponent_num, data_type):
215
- return (device_output.astype(data_type) - bench_output).astype(data_type) * \
216
- np.exp2(-eb + exponent_num).astype(data_type)
1
+ # 定义比对算法及比对标准
2
+ import torch
3
+ import numpy as np
4
+
5
+ from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import ULP_PARAMETERS
6
+ from msprobe.core.common.const import CompareConst
7
+
8
+
9
+ #cos
10
+ def cosine_sim(bench_output, device_output):
11
+ msg = ""
12
+ n_value = device_output.reshape(-1)
13
+ b_value = bench_output.reshape(-1)
14
+ cos = CompareConst.SPACE
15
+ np.seterr(divide="ignore", invalid="ignore")
16
+ if n_value.shape != b_value.shape:
17
+ msg = f"Shape of device and bench outputs don't match. device: {n_value.shape}, bench: {b_value.shape}."
18
+ return -1, False, msg
19
+ if len(n_value) == 1:
20
+ msg = "All the data in device dump data is scalar. Please refer to other compare algorithms."
21
+ return cos, True, msg
22
+ n_value_max = np.max(np.abs(n_value))
23
+ b_value_max = np.max(np.abs(b_value))
24
+ if n_value_max <= np.finfo(float).eps and b_value_max <= np.finfo(float).eps:
25
+ msg = "All the data in device and bench outputs are zero."
26
+ return cos, True, msg
27
+ elif n_value_max <= np.finfo(float).eps:
28
+ msg = "All the data is zero in device dump data."
29
+ return CompareConst.SPACE, False, msg
30
+ elif b_value_max <= np.finfo(float).eps:
31
+ msg = "All the data is zero in bench dump data."
32
+ return CompareConst.SPACE, False, msg
33
+ else:
34
+ n_value = n_value.astype(float) / n_value_max
35
+ b_value = b_value.astype(float) / b_value_max
36
+ cos = np.dot(n_value, b_value) / (np.linalg.norm(n_value) * np.linalg.norm(b_value))
37
+ if np.isnan(cos):
38
+ msg = "Dump data has NaN when comparing with Cosine Similarity."
39
+ cos = np.clip(cos, -1, 1)
40
+ return cos, cos > 0.99, msg
41
+
42
+
43
+ #rmse
44
+ def get_rmse(abs_err, inf_nan_mask):
45
+ masked_ae = np.where(inf_nan_mask, 0, abs_err)
46
+ mse = np.mean(np.square(masked_ae))
47
+ inf_nan_cnt = np.sum(inf_nan_mask)
48
+ mse = mse * (abs_err.size / (abs_err.size - inf_nan_cnt + 0.0001) + 0.0001)
49
+ rmse = np.sqrt(mse)
50
+ return rmse
51
+
52
+
53
+ #误差均衡性
54
+ def get_error_balance(bench_data, device_data):
55
+ larger_count = np.sum(np.greater(device_data - bench_data.astype(device_data.dtype), 0))
56
+ smaller_count = np.sum(np.less(device_data - bench_data.astype(device_data.dtype), 0))
57
+ total_count = bench_data.size
58
+ error_balance = abs(larger_count - smaller_count) / total_count if total_count > 0 else 0
59
+ return error_balance
60
+
61
+
62
+ #小值域错误占比
63
+ def get_small_value_err_ratio(small_value_mask, abs_err_greater_mask):
64
+ err_mask = np.logical_and(small_value_mask, abs_err_greater_mask)
65
+ small_value_err_num = np.sum(err_mask)
66
+ small_value_num = np.sum(small_value_mask)
67
+ return 0 if small_value_num == 0 else small_value_err_num / small_value_num
68
+
69
+
70
+ def get_rel_err(abs_err, abs_bench_with_eps, small_value_mask, inf_nan_mask):
71
+ rel_err_tmp = abs_err / abs_bench_with_eps
72
+ rel_err_mask = np.logical_or(small_value_mask, inf_nan_mask)
73
+ rel_err = np.where(rel_err_mask, -1, rel_err_tmp)
74
+ return rel_err
75
+
76
+
77
+ def get_abs_err(bench_data, device_data):
78
+ abs_err = np.abs(device_data - bench_data)
79
+ return abs_err
80
+
81
+
82
+ def get_rel_err_origin(abs_err, b_value):
83
+ rel_err_origin = np.abs(abs_err / b_value)
84
+ return rel_err_origin
85
+
86
+
87
+ def get_max_abs_err(abs_err):
88
+ max_abs_err = abs_err.max()
89
+ bool_result = max_abs_err < 0.001
90
+ return max_abs_err, bool_result
91
+
92
+
93
+ #相对误差最大值
94
+ def get_max_rel_err(rel_err):
95
+ return np.max(rel_err) if np.max(rel_err) >= 0 else 0
96
+
97
+
98
+ #相对误差均值
99
+ def get_mean_rel_err(rel_err):
100
+ non_negative_rel_err = rel_err[rel_err >= 0]
101
+ return np.mean(non_negative_rel_err) if non_negative_rel_err.size > 0 else 0
102
+
103
+
104
+ def get_rel_err_ratio(rel_err, thresholding):
105
+ if np.size(rel_err) == 0:
106
+ ratio = 1
107
+ else:
108
+ ratio = np.divide(np.sum(rel_err < thresholding), np.size(rel_err))
109
+ bool_result = ratio > (1 - thresholding)
110
+ return ratio, bool_result
111
+
112
+
113
+ def get_finite_and_infinite_mask(bench_output, device_output):
114
+ device_finite_mask = np.isfinite(device_output)
115
+ bench_finite_mask = np.isfinite(bench_output.astype(device_output.dtype))
116
+ both_finite_mask = np.logical_and(device_finite_mask, bench_finite_mask)
117
+ inf_nan_mask = np.logical_not(both_finite_mask)
118
+ return both_finite_mask, inf_nan_mask
119
+
120
+
121
+ def get_small_value_mask(abs_bench, both_finite_mask, small_value_threshold):
122
+ small_value_mask = np.less_equal(abs_bench, small_value_threshold)
123
+ small_value_mask = np.logical_and(small_value_mask, both_finite_mask)
124
+ return small_value_mask
125
+
126
+
127
+ def get_abs_bench_with_eps(bench, dtype):
128
+ abs_bench = np.abs(bench)
129
+ eps = np.finfo(bench.dtype).eps if dtype != torch.bfloat16 else CompareConst.BFLOAT16_EPS
130
+ abs_bench_with_eps = abs_bench + eps
131
+ return abs_bench, abs_bench_with_eps
132
+
133
+
134
+ def check_inf_nan_value(inf_nan_mask, bench_output, device_output, dtype, rtol):
135
+ '''
136
+ 新精度标准的绝对阈值法中,检查npu和golden输出的inf、nan是否一致
137
+ 输入:
138
+ inf_nan_mask:npu输出和golden输出的inf、nan的mask
139
+ bench_output:golden输出
140
+ device_output:npu输出
141
+ dtype:npu输出的dtype
142
+ 输出:
143
+ inf_nan_err_ratio:npu输出和golden输出的inf、nan不一致的比例
144
+ '''
145
+ abs_gpu, abs_gpu_with_eps = get_abs_bench_with_eps(bench_output, dtype)
146
+ golden_same_dtype = bench_output.astype(device_output.dtype)
147
+ a_min = np.finfo(device_output.dtype).min if dtype != torch.bfloat16 else CompareConst.BFLOAT16_MIN
148
+ a_max = np.finfo(device_output.dtype).max if dtype != torch.bfloat16 else CompareConst.BFLOAT16_MAX
149
+ golden_clip = np.clip(golden_same_dtype, a_min, a_max)
150
+ npu_clip = np.clip(device_output, a_min, a_max)
151
+ clipped_abs_ae = np.abs(npu_clip - golden_clip)
152
+ clipped_re = clipped_abs_ae / abs_gpu_with_eps
153
+ pass_mask = np.less_equal(clipped_re, rtol)
154
+ both_nan_mask = np.logical_and(np.isnan(device_output), np.isnan(golden_clip))
155
+ pass_mask = np.logical_or(pass_mask, both_nan_mask)
156
+ not_pass_mask = np.logical_not(pass_mask)
157
+ not_pass_mask = np.logical_and(not_pass_mask, inf_nan_mask)
158
+
159
+ inf_nan_err_cnt = np.sum(not_pass_mask)
160
+ return 0 if np.sum(inf_nan_mask) == 0 else inf_nan_err_cnt / np.sum(inf_nan_mask)
161
+
162
+
163
+ def check_small_value(abs_err, small_value_mask, small_value_atol):
164
+ '''
165
+ 新精度标准的相对阈值法中,检查npu和golden小值域输出的相对误差是否满足阈值
166
+ 输入:
167
+ rel_err:npu输出和golden输出的相对误差
168
+ normal_value_mask:npu输出和golden输出的正常值mask
169
+ rtol:相对误差的阈值
170
+ 输出:
171
+ rel_err_ratio:npu输出和golden输出的相对误差不满足阈值的比例
172
+ '''
173
+ greater_mask = np.greater(abs_err, small_value_atol)
174
+ err_mask = np.logical_and(greater_mask, small_value_mask)
175
+ err_cnt = np.sum(err_mask)
176
+ return 0 if np.sum(small_value_mask) == 0 else err_cnt / np.sum(small_value_mask)
177
+
178
+
179
+ def check_norm_value(normal_value_mask, rel_err, rtol):
180
+ '''
181
+ 新精度标准的绝对阈值法中,检查npu和golden正常值输出的绝对误差是否满足阈值
182
+ 输入:
183
+ abs_err:npu输出和golden输出的绝对误差
184
+ normal_value_mask:npu输出和golden输出的正常值mask
185
+ atol:绝对误差的阈值
186
+ 输出:
187
+ abs_err_ratio:npu输出和golden输出的绝对误差不满足阈值的比例
188
+ '''
189
+ err_mask = np.greater(rel_err, rtol)
190
+ err_mask = np.logical_and(err_mask, normal_value_mask)
191
+ err_cnt = np.sum(err_mask)
192
+ return 0 if np.sum(normal_value_mask) == 0 else err_cnt / np.sum(normal_value_mask)
193
+
194
+
195
+ def get_ulp_err(bench_output, device_output, dtype):
196
+ parameters = ULP_PARAMETERS.get(dtype)
197
+ min_eb = parameters.get('min_eb')[0]
198
+ exponent_num = parameters.get('exponent_num')[0]
199
+ abs_bench = np.abs(bench_output)
200
+ eb = np.where(abs_bench == 0, 0, np.floor(np.log2(abs_bench)))
201
+ eb = np.maximum(eb, min_eb)
202
+
203
+ if dtype == torch.float32:
204
+ ulp_err = calc_ulp_err(bench_output, device_output, eb, exponent_num, np.float64)
205
+ else:
206
+ ulp_err = calc_ulp_err(bench_output, device_output, eb, exponent_num, np.float32)
207
+ ulp_err = np.abs(ulp_err)
208
+ return ulp_err
209
+
210
+
211
+ def calc_ulp_err(bench_output, device_output, eb, exponent_num, data_type):
212
+ return (device_output.astype(data_type) - bench_output).astype(data_type) * \
213
+ np.exp2(-eb + exponent_num).astype(data_type)