mindstudio-probe 1.0.1__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/LICENSE +201 -201
  2. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/METADATA +36 -30
  3. mindstudio_probe-1.0.4.dist-info/RECORD +276 -0
  4. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/WHEEL +1 -1
  5. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/entry_points.txt +1 -0
  6. msprobe/README.md +101 -182
  7. msprobe/__init__.py +1 -0
  8. msprobe/{config/config.json → config.json} +49 -27
  9. msprobe/core/__init__.py +0 -0
  10. msprobe/{pytorch → core}/advisor/advisor.py +124 -124
  11. msprobe/{pytorch → core}/advisor/advisor_const.py +59 -59
  12. msprobe/{pytorch → core}/advisor/advisor_result.py +58 -58
  13. msprobe/core/common/const.py +341 -241
  14. msprobe/core/common/exceptions.py +100 -88
  15. msprobe/core/common/{file_check.py → file_utils.py} +478 -265
  16. msprobe/core/common/log.py +76 -55
  17. msprobe/core/common/utils.py +385 -516
  18. msprobe/core/common_config.py +85 -58
  19. msprobe/core/compare/acc_compare.py +300 -0
  20. msprobe/core/compare/check.py +95 -0
  21. msprobe/core/compare/compare_cli.py +49 -0
  22. msprobe/core/compare/highlight.py +223 -0
  23. msprobe/core/compare/multiprocessing_compute.py +149 -0
  24. msprobe/{pytorch → core}/compare/npy_compare.py +295 -244
  25. msprobe/core/compare/utils.py +430 -0
  26. msprobe/core/data_dump/data_collector.py +154 -140
  27. msprobe/core/data_dump/data_processor/base.py +314 -245
  28. msprobe/core/data_dump/data_processor/factory.py +59 -61
  29. msprobe/core/data_dump/data_processor/mindspore_processor.py +186 -0
  30. msprobe/core/data_dump/data_processor/pytorch_processor.py +366 -346
  31. msprobe/core/data_dump/json_writer.py +96 -116
  32. msprobe/core/data_dump/scope.py +178 -178
  33. msprobe/core/grad_probe/__init__.py +0 -0
  34. msprobe/core/grad_probe/constant.py +71 -0
  35. msprobe/core/grad_probe/grad_compare.py +171 -0
  36. msprobe/core/grad_probe/utils.py +64 -0
  37. msprobe/docs/01.installation.md +89 -0
  38. msprobe/docs/02.config_introduction.md +165 -0
  39. msprobe/docs/03.config_examples.md +247 -0
  40. msprobe/docs/04.acl_config_examples.md +76 -0
  41. msprobe/docs/05.data_dump_PyTorch.md +198 -0
  42. msprobe/docs/06.data_dump_MindSpore.md +243 -0
  43. msprobe/docs/07.accuracy_checker_PyTorch.md +274 -0
  44. msprobe/docs/08.accuracy_checker_online_PyTorch.md +198 -0
  45. msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
  46. msprobe/docs/10.accuracy_compare_PyTorch.md +245 -0
  47. msprobe/docs/11.accuracy_compare_MindSpore.md +202 -0
  48. msprobe/docs/12.overflow_check_PyTorch.md +79 -0
  49. msprobe/docs/13.overflow_check_MindSpore.md +31 -0
  50. msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
  51. msprobe/docs/15.free_benchmarking_PyTorch.md +164 -0
  52. msprobe/docs/17.grad_probe.md +207 -0
  53. msprobe/docs/FAQ_PyTorch.md +177 -0
  54. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
  55. msprobe/docs/img/free_benchmark_framework.png +0 -0
  56. msprobe/docs/img/grad_probe_image-1.png +0 -0
  57. msprobe/docs/img/grad_probe_image-2.png +0 -0
  58. msprobe/docs/img/grad_probe_image-3.png +0 -0
  59. msprobe/docs/img/grad_probe_image-4.png +0 -0
  60. msprobe/docs/img/grad_probe_image.png +0 -0
  61. msprobe/mindspore/__init__.py +1 -1
  62. msprobe/mindspore/api_accuracy_checker/__init__.py +0 -0
  63. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +255 -0
  64. msprobe/mindspore/api_accuracy_checker/api_info.py +69 -0
  65. msprobe/mindspore/api_accuracy_checker/api_runner.py +156 -0
  66. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +197 -0
  67. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
  68. msprobe/mindspore/api_accuracy_checker/compute_element.py +239 -0
  69. msprobe/mindspore/api_accuracy_checker/main.py +9 -0
  70. msprobe/mindspore/api_accuracy_checker/type_mapping.py +114 -0
  71. msprobe/mindspore/api_accuracy_checker/utils.py +80 -0
  72. msprobe/mindspore/cell_processor.py +34 -0
  73. msprobe/mindspore/common/const.py +106 -0
  74. msprobe/mindspore/common/log.py +38 -0
  75. msprobe/mindspore/common/utils.py +81 -0
  76. msprobe/mindspore/compare/distributed_compare.py +75 -0
  77. msprobe/mindspore/compare/ms_compare.py +219 -0
  78. msprobe/mindspore/compare/ms_graph_compare.py +348 -0
  79. msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -0
  80. msprobe/mindspore/debugger/debugger_config.py +66 -51
  81. msprobe/mindspore/debugger/precision_debugger.py +126 -32
  82. msprobe/mindspore/dump/dump_tool_factory.py +35 -38
  83. msprobe/mindspore/dump/hook_cell/api_registry.py +118 -0
  84. msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -0
  85. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +922 -0
  86. msprobe/mindspore/dump/hook_cell/wrap_api.py +113 -0
  87. msprobe/mindspore/dump/jit_dump.py +72 -0
  88. msprobe/mindspore/dump/kernel_graph_dump.py +59 -60
  89. msprobe/mindspore/dump/kernel_kbyk_dump.py +64 -0
  90. msprobe/mindspore/free_benchmark/__init__.py +0 -0
  91. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -0
  92. msprobe/mindspore/free_benchmark/common/__init__.py +0 -0
  93. msprobe/mindspore/free_benchmark/common/config.py +12 -0
  94. msprobe/mindspore/free_benchmark/common/handler_params.py +17 -0
  95. msprobe/mindspore/free_benchmark/common/utils.py +71 -0
  96. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -0
  97. msprobe/mindspore/free_benchmark/decorator/__init__.py +0 -0
  98. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +43 -0
  99. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -0
  100. msprobe/mindspore/free_benchmark/handler/__init__.py +0 -0
  101. msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -0
  102. msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -0
  103. msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -0
  104. msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -0
  105. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -0
  106. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -0
  107. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -0
  108. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +51 -0
  109. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +35 -0
  110. msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -0
  111. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +29 -0
  112. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -0
  113. msprobe/mindspore/grad_probe/__init__.py +0 -0
  114. msprobe/mindspore/grad_probe/global_context.py +90 -0
  115. msprobe/mindspore/grad_probe/grad_analyzer.py +231 -0
  116. msprobe/mindspore/grad_probe/grad_monitor.py +27 -0
  117. msprobe/mindspore/grad_probe/grad_stat_csv.py +132 -0
  118. msprobe/mindspore/grad_probe/hook.py +94 -0
  119. msprobe/mindspore/grad_probe/utils.py +30 -0
  120. msprobe/mindspore/ms_config.py +128 -78
  121. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +44 -45
  122. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +34 -32
  123. msprobe/mindspore/runtime.py +4 -0
  124. msprobe/mindspore/service.py +378 -0
  125. msprobe/mindspore/task_handler_factory.py +24 -21
  126. msprobe/msprobe.py +105 -67
  127. msprobe/pytorch/__init__.py +4 -4
  128. msprobe/pytorch/api_accuracy_checker/common/config.py +53 -50
  129. msprobe/pytorch/api_accuracy_checker/common/utils.py +214 -224
  130. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +213 -216
  131. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +606 -545
  132. msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
  133. msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
  134. msprobe/pytorch/api_accuracy_checker/compare/compare.py +386 -345
  135. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +73 -73
  136. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +245 -248
  137. msprobe/pytorch/api_accuracy_checker/config.yaml +10 -4
  138. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +335 -328
  139. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +200 -203
  140. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +133 -127
  141. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +592 -493
  142. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +70 -7
  143. msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
  144. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/__init__.py +0 -0
  145. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +197 -0
  146. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +325 -0
  147. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -0
  148. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +219 -0
  149. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -0
  150. msprobe/pytorch/bench_functions/__init__.py +15 -0
  151. msprobe/pytorch/bench_functions/apply_adam_w.py +28 -0
  152. msprobe/pytorch/bench_functions/confusion_transpose.py +19 -0
  153. msprobe/pytorch/bench_functions/fast_gelu.py +55 -0
  154. msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -0
  155. msprobe/pytorch/bench_functions/linear.py +12 -0
  156. msprobe/pytorch/bench_functions/matmul_backward.py +48 -0
  157. msprobe/pytorch/bench_functions/npu_fusion_attention.py +509 -0
  158. msprobe/pytorch/bench_functions/rms_norm.py +15 -0
  159. msprobe/pytorch/bench_functions/rotary_mul.py +52 -0
  160. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -0
  161. msprobe/pytorch/bench_functions/swiglu.py +55 -0
  162. msprobe/pytorch/common/__init__.py +2 -2
  163. msprobe/pytorch/common/compare_script.template +14 -14
  164. msprobe/pytorch/common/log.py +20 -31
  165. msprobe/pytorch/common/parse_json.py +39 -37
  166. msprobe/pytorch/common/utils.py +305 -224
  167. msprobe/pytorch/compare/distributed_compare.py +66 -111
  168. msprobe/pytorch/compare/mapping.yaml +607 -607
  169. msprobe/pytorch/compare/match.py +34 -36
  170. msprobe/pytorch/compare/pt_compare.py +50 -0
  171. msprobe/pytorch/debugger/debugger_config.py +95 -86
  172. msprobe/pytorch/debugger/precision_debugger.py +125 -95
  173. msprobe/pytorch/free_benchmark/__init__.py +8 -8
  174. msprobe/pytorch/free_benchmark/common/constant.py +70 -67
  175. msprobe/pytorch/free_benchmark/common/counter.py +71 -71
  176. msprobe/pytorch/free_benchmark/common/enums.py +37 -37
  177. msprobe/pytorch/free_benchmark/common/params.py +129 -129
  178. msprobe/pytorch/free_benchmark/common/utils.py +102 -98
  179. msprobe/pytorch/free_benchmark/compare/grad_saver.py +179 -183
  180. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +104 -104
  181. msprobe/pytorch/free_benchmark/main.py +105 -102
  182. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +13 -13
  183. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +41 -41
  184. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +90 -90
  185. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +104 -104
  186. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +63 -63
  187. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +68 -68
  188. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +28 -28
  189. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +45 -45
  190. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +19 -19
  191. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +217 -203
  192. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +39 -39
  193. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +23 -23
  194. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +30 -31
  195. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +170 -170
  196. msprobe/pytorch/function_factory.py +76 -0
  197. msprobe/pytorch/functional/dump_module.py +39 -39
  198. msprobe/pytorch/grad_probe/__init__.py +0 -0
  199. msprobe/pytorch/grad_probe/grad_monitor.py +91 -0
  200. msprobe/pytorch/grad_probe/grad_stat_csv.py +129 -0
  201. msprobe/pytorch/hook_module/api_registry.py +161 -161
  202. msprobe/pytorch/hook_module/hook_module.py +120 -109
  203. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1876
  204. msprobe/pytorch/hook_module/utils.py +30 -29
  205. msprobe/pytorch/hook_module/wrap_aten.py +110 -100
  206. msprobe/pytorch/hook_module/wrap_distributed.py +78 -75
  207. msprobe/pytorch/hook_module/wrap_functional.py +105 -108
  208. msprobe/pytorch/hook_module/wrap_npu_custom.py +93 -73
  209. msprobe/pytorch/hook_module/wrap_tensor.py +71 -72
  210. msprobe/pytorch/hook_module/wrap_torch.py +86 -88
  211. msprobe/pytorch/hook_module/wrap_vf.py +62 -64
  212. msprobe/pytorch/module_processer.py +138 -98
  213. msprobe/pytorch/online_dispatch/__init__.py +20 -20
  214. msprobe/pytorch/online_dispatch/compare.py +236 -236
  215. msprobe/pytorch/online_dispatch/dispatch.py +271 -273
  216. msprobe/pytorch/online_dispatch/dump_compare.py +155 -186
  217. msprobe/pytorch/online_dispatch/single_compare.py +391 -391
  218. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +49 -49
  219. msprobe/pytorch/online_dispatch/utils.py +130 -187
  220. msprobe/pytorch/parse.py +4 -4
  221. msprobe/pytorch/parse_tool/cli.py +32 -32
  222. msprobe/pytorch/parse_tool/lib/compare.py +260 -259
  223. msprobe/pytorch/parse_tool/lib/config.py +52 -51
  224. msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
  225. msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
  226. msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
  227. msprobe/pytorch/parse_tool/lib/parse_tool.py +158 -158
  228. msprobe/pytorch/parse_tool/lib/utils.py +316 -367
  229. msprobe/pytorch/parse_tool/lib/visualization.py +85 -90
  230. msprobe/pytorch/pt_config.py +188 -93
  231. msprobe/pytorch/service.py +246 -167
  232. mindstudio_probe-1.0.1.dist-info/RECORD +0 -228
  233. msprobe/config/README.md +0 -397
  234. msprobe/mindspore/doc/dump.md +0 -65
  235. msprobe/mindspore/dump/api_kbk_dump.py +0 -55
  236. msprobe/pytorch/compare/acc_compare.py +0 -1024
  237. msprobe/pytorch/compare/highlight.py +0 -100
  238. msprobe/pytorch/doc/FAQ.md +0 -193
  239. msprobe/pytorch/doc/api_accuracy_checker.md +0 -269
  240. msprobe/pytorch/doc/atat/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
  241. msprobe/pytorch/doc/dump.md +0 -207
  242. msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -176
  243. msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
  244. msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
  245. msprobe/pytorch/doc/run_overflow_check.md +0 -25
  246. msprobe/pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md +0 -90
  247. msprobe/test/core_ut/common/test_utils.py +0 -345
  248. msprobe/test/core_ut/data_dump/test_data_collector.py +0 -47
  249. msprobe/test/core_ut/data_dump/test_json_writer.py +0 -183
  250. msprobe/test/core_ut/data_dump/test_scope.py +0 -151
  251. msprobe/test/core_ut/test_common_config.py +0 -152
  252. msprobe/test/core_ut/test_file_check.py +0 -218
  253. msprobe/test/core_ut/test_log.py +0 -109
  254. msprobe/test/mindspore_ut/test_api_kbk_dump.py +0 -51
  255. msprobe/test/mindspore_ut/test_debugger_config.py +0 -42
  256. msprobe/test/mindspore_ut/test_dump_tool_factory.py +0 -51
  257. msprobe/test/mindspore_ut/test_kernel_graph_dump.py +0 -66
  258. msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py +0 -63
  259. msprobe/test/mindspore_ut/test_ms_config.py +0 -69
  260. msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py +0 -51
  261. msprobe/test/mindspore_ut/test_precision_debugger.py +0 -56
  262. msprobe/test/mindspore_ut/test_task_handler_factory.py +0 -58
  263. msprobe/test/pytorch_ut/advisor/test_advisor.py +0 -83
  264. msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +0 -108
  265. msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +0 -39
  266. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +0 -112
  267. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py +0 -77
  268. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py +0 -125
  269. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py +0 -10
  270. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py +0 -43
  271. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json +0 -179
  272. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json +0 -63
  273. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py +0 -99
  274. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +0 -115
  275. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py +0 -72
  276. msprobe/test/pytorch_ut/compare/test_acc_compare.py +0 -17
  277. msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py +0 -105
  278. msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py +0 -121
  279. msprobe/test/pytorch_ut/free_benchmark/test_main.py +0 -101
  280. msprobe/test/pytorch_ut/functional/test_dump_module.py +0 -15
  281. msprobe/test/pytorch_ut/hook_module/test_api_registry.py +0 -130
  282. msprobe/test/pytorch_ut/hook_module/test_hook_module.py +0 -42
  283. msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py +0 -65
  284. msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +0 -35
  285. msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +0 -20
  286. msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +0 -35
  287. msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +0 -43
  288. msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +0 -11
  289. msprobe/test/pytorch_ut/test_pt_config.py +0 -69
  290. msprobe/test/pytorch_ut/test_service.py +0 -59
  291. msprobe/test/resources/advisor.txt +0 -3
  292. msprobe/test/resources/compare_result_20230703104808.csv +0 -9
  293. msprobe/test/resources/compare_result_without_accuracy.csv +0 -9
  294. msprobe/test/resources/config.yaml +0 -3
  295. msprobe/test/resources/npu_test.pkl +0 -8
  296. msprobe/test/run_test.sh +0 -30
  297. msprobe/test/run_ut.py +0 -58
  298. msprobe/test/test_module_processer.py +0 -64
  299. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/top_level.txt +0 -0
  300. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
  301. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
  302. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
  303. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
  304. /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
  305. /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
  306. /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
  307. /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
  308. /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
  309. /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
  310. /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
  311. /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
  312. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
  313. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
  314. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
  315. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
  316. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
  317. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
  318. /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
  319. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
  320. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
  321. /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
  322. /msprobe/{config → docs}/img/free_benchmark.png +0 -0
  323. /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
@@ -1,346 +1,366 @@
1
- import os
2
- import zlib
3
- from dataclasses import asdict
4
- from typing import List
5
-
6
- import numpy as np
7
- import torch
8
- from msprobe.core.common.exceptions import MsaccException
9
- from msprobe.core.common.file_check import path_len_exceeds_limit, change_mode
10
- from msprobe.core.common.log import logger
11
- from msprobe.core.common.const import Const, OverflowConst, FileCheckConst
12
- from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
13
- ModuleForwardInputsOutputs, TensorStatInfo
14
- from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
15
-
16
- try:
17
- import torch_npu
18
- except ImportError:
19
- pass
20
-
21
-
22
- class PytorchDataProcessor(BaseDataProcessor):
23
- pytorch_special_type = (torch.device, torch.dtype, torch.Size, torch.Tensor)
24
-
25
- def __init__(self, config, data_writer):
26
- super().__init__(config, data_writer)
27
- self.torch_object_key = {
28
- "device": self.analyze_device_in_kwargs,
29
- "dtype": self.analyze_dtype_in_kwargs
30
- }
31
-
32
- @staticmethod
33
- def get_md5_for_tensor(x):
34
- if x.dtype == torch.bfloat16:
35
- x = x.float()
36
- tensor_bytes = x.cpu().detach().numpy().tobytes()
37
- crc32_hash = zlib.crc32(tensor_bytes)
38
- return f"{crc32_hash:08x}"
39
-
40
- @staticmethod
41
- def analyze_device_in_kwargs(element):
42
- single_arg = {}
43
- single_arg.update({'type': "torch.device"})
44
- if not isinstance(element, str):
45
- if hasattr(element, "index"):
46
- device_value = element.type + ":" + str(element.index)
47
- else:
48
- device_value = element.type
49
- single_arg.update({"value": device_value})
50
- else:
51
- single_arg.update({"value": element})
52
- return single_arg
53
-
54
- @staticmethod
55
- def analyze_dtype_in_kwargs(element):
56
- return {"type": "torch.dtype", "value": str(element)}
57
-
58
- @staticmethod
59
- def get_stat_info(data):
60
- tensor_stat = TensorStatInfo()
61
- if data.is_meta:
62
- return tensor_stat
63
- data_clone = data.detach()
64
- if data_clone.numel() == 0:
65
- return tensor_stat
66
- elif data_clone.dtype == torch.bool:
67
- tensor_stat.max = True in data_clone
68
- tensor_stat.min = False not in data_clone
69
- elif not data_clone.shape:
70
- tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data_clone.item()
71
- else:
72
- if not data_clone.is_floating_point() or data_clone.dtype == torch.float64:
73
- data_clone = data_clone.float()
74
- tensor_stat.max = torch._C._VariableFunctionsClass.max(data_clone).item()
75
- tensor_stat.min = torch._C._VariableFunctionsClass.min(data_clone).item()
76
- tensor_stat.mean = torch._C._VariableFunctionsClass.mean(data_clone).item()
77
- tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item()
78
- return tensor_stat
79
-
80
- @staticmethod
81
- def _analyze_torch_size(arg):
82
- return {"type": "torch.Size", "value": list(arg)}
83
-
84
- @classmethod
85
- def get_special_types(cls):
86
- return super().get_special_types() + cls.pytorch_special_type
87
-
88
- def analyze_single_element(self, element, suffix_stack):
89
- if suffix_stack and suffix_stack[-1] in self.torch_object_key:
90
- return self.torch_object_key[suffix_stack[-1]](element)
91
- if isinstance(element, torch.Size):
92
- return self._analyze_torch_size(element)
93
- converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
94
- if converted_numpy is not element:
95
- return self._analyze_numpy(converted_numpy, numpy_type)
96
- if isinstance(element, torch.Tensor):
97
- return self._analyze_tensor(element, Const.SEP.join(suffix_stack))
98
- if isinstance(element, (bool, int, float, str, slice)):
99
- return self._analyze_builtin(element)
100
- return None
101
-
102
- def analyze_element(self, element):
103
- return self.recursive_apply_transform(element, self.analyze_single_element)
104
-
105
- def _analyze_tensor(self, tensor, suffix):
106
- tensor_stat = self.get_stat_info(tensor)
107
- tensor_json = {}
108
- tensor_json.update({'type': 'torch.Tensor'})
109
- tensor_json.update({'dtype': str(tensor.dtype)})
110
- tensor_json.update({"shape": tensor.shape})
111
- tensor_json.update({"Max": tensor_stat.max})
112
- tensor_json.update({"Min": tensor_stat.min})
113
- tensor_json.update({"Mean": tensor_stat.mean})
114
- tensor_json.update({"Norm": tensor_stat.norm})
115
- tensor_json.update({"requires_grad": tensor.requires_grad})
116
- if self.config.summary_mode == "md5":
117
- tensor_md5 = self.get_md5_for_tensor(tensor)
118
- tensor_json.update({"md5": tensor_md5})
119
- return tensor_json
120
-
121
-
122
- class StatisticsDataProcessor(PytorchDataProcessor):
123
- pass
124
-
125
-
126
- class TensorDataProcessor(PytorchDataProcessor):
127
- def _analyze_tensor(self, tensor, suffix):
128
- dump_data_name, file_path = self.get_save_file_path(suffix)
129
- if not path_len_exceeds_limit(file_path):
130
- torch.save(tensor, file_path)
131
- change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY)
132
- else:
133
- logger.warning(f'The file path {file_path} length exceeds limit.')
134
- single_arg = super()._analyze_tensor(tensor, suffix)
135
- single_arg.update({"data_name": dump_data_name})
136
- return single_arg
137
-
138
-
139
- class OverflowCheckDataProcessor(PytorchDataProcessor):
140
- __slots__ = ["cached_tensors_and_file_paths"]
141
-
142
- def __init__(self, config, data_writer):
143
- super().__init__(config, data_writer)
144
- self.cached_tensors_and_file_paths = {}
145
- self.real_overflow_dump_times = 0
146
- self.overflow_nums = config.overflow_num
147
- self.bits_for_overflow = 8
148
-
149
- @staticmethod
150
- def overflow_debug_mode_enable():
151
- overflow_mode = os.getenv(OverflowConst.OVERFLOW_DEBUG_MODE_ENABLE, Const.ENV_DISABLE)
152
- return overflow_mode == Const.ENV_ENABLE
153
-
154
- @staticmethod
155
- def handle_tensor_extremum_nan_inf(data_clone, operator):
156
- data_nan = torch._C._VariableFunctionsClass.isnan(data_clone)
157
- if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel():
158
- return float('nan')
159
- finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone)
160
- if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0:
161
- finite_values = data_clone[finite_mask]
162
- return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \
163
- torch._C._VariableFunctionsClass.min(finite_values).item()
164
- else:
165
- data_no_nan = data_clone[~data_nan]
166
- return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \
167
- torch._C._VariableFunctionsClass.min(data_no_nan).item()
168
-
169
- def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
170
- self.has_overflow = False
171
- api_info_struct = super().analyze_forward(name, module, module_input_output)
172
- self.maybe_save_overflow_data_and_check_overflow_times()
173
- return api_info_struct if self.has_overflow else None
174
-
175
- def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
176
- self.has_overflow = False
177
- api_info_struct = super().analyze_backward(name, module, module_input_output)
178
- self.maybe_save_overflow_data_and_check_overflow_times()
179
- return api_info_struct if self.has_overflow else None
180
-
181
- def maybe_save_overflow_data_and_check_overflow_times(self):
182
- if self.has_overflow:
183
- for file_path, tensor in self.cached_tensors_and_file_paths.items():
184
- torch.save(tensor, file_path)
185
- change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY)
186
- self.inc_and_check_overflow_times()
187
- self.cached_tensors_and_file_paths = {}
188
-
189
- def inc_and_check_overflow_times(self):
190
- self.real_overflow_dump_times += 1
191
- if self.overflow_nums == -1:
192
- return
193
- if self.real_overflow_dump_times >= self.overflow_nums:
194
- raise MsaccException(MsaccException.OVERFLOW_NUMS_ERROR, str(self.real_overflow_dump_times))
195
-
196
- def check_overflow_npu(self):
197
- if self.overflow_debug_mode_enalbe():
198
- float_status = torch.zeros(self.bits_for_overflow).npu()
199
- result = torch_npu.npu_get_float_status(float_status, OverflowConst.OVERFLOW_DEBUG_MODE)
200
- if result.cpu()[0] != 0:
201
- return True
202
- else:
203
- return False
204
- else:
205
- return torch_npu._C._check_overflow_npu()
206
-
207
- def clear_overflow_npu(self):
208
- if self.overflow_debug_mode_enable():
209
- float_status = torch.zeros(self.bits_for_overflow).npu()
210
- torch_npu.npu_clear_float_status(float_status, OverflowConst.OVERFLOW_DEBUG_MODE)
211
- else:
212
- torch_npu._C._clear_overflow_npu()
213
-
214
- def _analyze_maybe_overflow_tensor(self, tensor_json, tensor):
215
- data_clone = tensor.detach()
216
- if hasattr(torch_npu._C, '_npu_is_support_inf_nan') and torch_npu._C._npu_is_support_inf_nan():
217
- if tensor_json['Max'] is None:
218
- return
219
- if np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']):
220
- tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(data_clone, "max")
221
- self.has_overflow = True
222
- if np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min']):
223
- tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(data_clone, "min")
224
- self.has_overflow = True
225
- else:
226
- self.has_overflow = self.check_overflow_npu()
227
- if self.has_overflow:
228
- self.clear_overflow_npu()
229
-
230
- def _analyze_tensor(self, tensor, suffix):
231
- dump_data_name, file_path = self.get_save_file_path(suffix)
232
- if not path_len_exceeds_limit(file_path):
233
- self.cached_tensors_and_file_paths.update({file_path: tensor})
234
- else:
235
- logger.warning(f'The file path {file_path} length exceeds limit.')
236
- single_arg = super()._analyze_tensor(tensor, suffix)
237
- self._analyze_maybe_overflow_tensor(single_arg, tensor)
238
- single_arg.update({"data_name": dump_data_name})
239
- return single_arg
240
-
241
-
242
- class FreeBenchmarkDataProcessor(PytorchDataProcessor):
243
-
244
- def __init__(self, config, data_writer):
245
- super().__init__(config, data_writer)
246
- self.checker = FreeBenchmarkCheck(config=config)
247
- self._return_forward_new_output = None
248
- self._forward_new_output = None
249
-
250
- def update_iter(self, current_iter):
251
- super().update_iter(current_iter)
252
- self.checker.update_iter(current_iter)
253
-
254
- def update_unequal_rows(self, unequal_rows: List[UnequalRow]):
255
- if not unequal_rows:
256
- return
257
- for row in unequal_rows:
258
- data_dict = asdict(row)
259
- self.data_writer.write_data_to_csv(
260
- data_dict.values(),
261
- data_dict.keys(),
262
- self.data_writer.free_benchmark_file_path
263
- )
264
- return
265
-
266
- def analyze_pre_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
267
- self.checker.pre_forward(name, module, self, module_input_output.args, module_input_output.kwargs)
268
-
269
- def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
270
- new_output, unequal_rows = self.checker.forward(
271
- name,
272
- module,
273
- module_input_output.args,
274
- module_input_output.kwargs,
275
- module_input_output.output,
276
- )
277
- self.update_unequal_rows(unequal_rows)
278
- if self.checker.if_fix():
279
- self._return_forward_new_output = True
280
- self._forward_new_output = new_output
281
-
282
- def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
283
- self.checker.backward(name, module, module_input_output.grad_output)
284
-
285
-
286
- class KernelDumpDataProcessor(PytorchDataProcessor):
287
- forward_init_status = False
288
- multi_output_apis = ["_sort_", "npu_flash_attention"]
289
-
290
- def __init__(self, config, data_writer):
291
- super().__init__(config, data_writer)
292
-
293
- def analyze_forward(self, name, module, module_input_output):
294
- if self.config.is_forward_acl_dump:
295
- self.forward_acl_dump(name, module, module_input_output)
296
- else:
297
- self.dump_mode_backward_acl_dump(name, module, module_input_output)
298
-
299
- def forward_acl_dump(self, name, module, module_input_output):
300
- if not KernelDumpDataProcessor.forward_init_status:
301
- KernelDumpDataProcessor.forward_init_status = True
302
- torch_npu.npu.synchronize()
303
- torch_npu.npu.init_dump()
304
- torch_npu.npu.set_dump(self.config.acl_config)
305
- torch_npu.npu.synchronize()
306
- if self.op_need_trigger(name):
307
- module.forward(*module_input_output.args, **module_input_output.kwargs).cpu()
308
- else:
309
- module.forward(*module_input_output.args, **module_input_output.kwargs)
310
- torch_npu.npu.synchronize()
311
- torch_npu.npu.finalize_dump()
312
- torch_npu.npu.synchronize()
313
- KernelDumpDataProcessor.forward_init_status = False
314
- logger.info("Dump %s op file." % name)
315
-
316
- def acl_backward_dump_status(self, output, grad, module_name):
317
- if isinstance(output, torch.Tensor):
318
- output.backward(grad, retain_graph=True)
319
- return True
320
-
321
- for api_name in KernelDumpDataProcessor.multi_output_apis:
322
- if api_name in module_name:
323
- output[0].backward(grad, retain_graph=True)
324
- return True
325
- return False
326
-
327
- def dump_mode_backward_acl_dump(self, name, module, module_input_output):
328
- grad_path = self.config.backward_input.get(name)
329
- if not KernelDumpDataProcessor.forward_init_status:
330
- KernelDumpDataProcessor.forward_init_status = True
331
- output = module.forward(*module_input_output.args, **module_input_output.kwargs)
332
- grad = torch.load(grad_path).to("npu").requires_grad_()
333
- torch_npu.npu.init_dump()
334
- torch_npu.npu.set_dump(self.config.acl_config)
335
- torch_npu.npu.synchronize()
336
- if not self.acl_backward_dump_status(output, grad, name):
337
- logger.warning("The output of {} is not of tensor type and cannot be automatically derived. "
338
- "you can manually construct a single API backward case for ACL dump.".format(
339
- name))
340
- torch_npu.npu.synchronize()
341
- torch_npu.npu.finalize_dump()
342
- KernelDumpDataProcessor.forward_init_status = False
343
- logger.info("Dump %s op file." % name)
344
-
345
- def op_need_trigger(self, module_name):
346
- return 'Tensor.__getitem__.' in module_name
1
+ import zlib
2
+ from dataclasses import asdict
3
+ from typing import List
4
+
5
+ import numpy as np
6
+ import torch
7
+ from msprobe.core.common.file_utils import path_len_exceeds_limit, change_mode
8
+ from msprobe.core.common.log import logger
9
+ from msprobe.core.common.const import Const, OverflowConst, FileCheckConst
10
+ from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
11
+ ModuleForwardInputsOutputs, TensorStatInfo
12
+ from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
13
+ from msprobe.pytorch.common.utils import save_pt, load_pt
14
+
15
+ try:
16
+ import torch_npu
17
+ is_gpu = False
18
+ except ImportError:
19
+ is_gpu = True
20
+
21
+
22
+ class PytorchDataProcessor(BaseDataProcessor):
23
+ pytorch_special_type = (torch.device, torch.dtype, torch.Size, torch.Tensor)
24
+
25
+ def __init__(self, config, data_writer):
26
+ super().__init__(config, data_writer)
27
+ self.torch_object_key = {
28
+ "device": self.analyze_device_in_kwargs,
29
+ "dtype": self.analyze_dtype_in_kwargs
30
+ }
31
+
32
+ @staticmethod
33
+ def get_md5_for_tensor(x):
34
+ if x.dtype == torch.bfloat16:
35
+ x = x.float()
36
+ tensor_bytes = x.cpu().detach().numpy().tobytes()
37
+ crc32_hash = zlib.crc32(tensor_bytes)
38
+ return f"{crc32_hash:08x}"
39
+
40
+ @staticmethod
41
+ def analyze_device_in_kwargs(element):
42
+ single_arg = {}
43
+ single_arg.update({'type': "torch.device"})
44
+ if not isinstance(element, str):
45
+ if hasattr(element, "index"):
46
+ device_value = element.type + ":" + str(element.index)
47
+ else:
48
+ device_value = element.type
49
+ single_arg.update({"value": device_value})
50
+ else:
51
+ single_arg.update({"value": element})
52
+ return single_arg
53
+
54
+ @staticmethod
55
+ def analyze_dtype_in_kwargs(element):
56
+ return {"type": "torch.dtype", "value": str(element)}
57
+
58
+ @staticmethod
59
+ def get_stat_info(data):
60
+ tensor_stat = TensorStatInfo()
61
+ if data.is_meta:
62
+ return tensor_stat
63
+ data_clone = data.detach()
64
+ if data_clone.numel() == 0:
65
+ return tensor_stat
66
+ elif data_clone.dtype == torch.bool:
67
+ tensor_stat.max = True in data_clone
68
+ tensor_stat.min = False not in data_clone
69
+ elif not data_clone.shape:
70
+ tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data_clone.item()
71
+ elif torch.is_complex(data_clone):
72
+ data_np = data_clone.cpu().numpy()
73
+ data_abs = np.abs(data_np)
74
+ tensor_stat.max = np.max(data_abs).item()
75
+ tensor_stat.min = np.min(data_abs).item()
76
+ tensor_stat.mean = np.mean(data_abs).item()
77
+ else:
78
+ if not data_clone.is_floating_point() or data_clone.dtype == torch.float64:
79
+ data_clone = data_clone.float()
80
+ tensor_stat.max = torch._C._VariableFunctionsClass.max(data_clone).item()
81
+ tensor_stat.min = torch._C._VariableFunctionsClass.min(data_clone).item()
82
+ tensor_stat.mean = torch._C._VariableFunctionsClass.mean(data_clone).item()
83
+ tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item()
84
+ return tensor_stat
85
+
86
+ @staticmethod
87
+ def handle_tensor_extremum_nan_inf(tensor, operator):
88
+ data_clone = tensor.detach()
89
+ data_nan = torch._C._VariableFunctionsClass.isnan(data_clone)
90
+ if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel():
91
+ return float('nan')
92
+ finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone)
93
+ if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0:
94
+ finite_values = data_clone[finite_mask]
95
+ return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \
96
+ torch._C._VariableFunctionsClass.min(finite_values).item()
97
+ else:
98
+ data_no_nan = data_clone[~data_nan]
99
+ return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \
100
+ torch._C._VariableFunctionsClass.min(data_no_nan).item()
101
+
102
+ @staticmethod
103
+ def _analyze_torch_size(arg):
104
+ return {"type": "torch.Size", "value": list(arg)}
105
+
106
+ @classmethod
107
+ def get_special_types(cls):
108
+ return super().get_special_types() + cls.pytorch_special_type
109
+
110
+ def analyze_single_element(self, element, suffix_stack):
111
+ if suffix_stack and suffix_stack[-1] in self.torch_object_key:
112
+ return self.torch_object_key[suffix_stack[-1]](element)
113
+ if isinstance(element, torch.Size):
114
+ return self._analyze_torch_size(element)
115
+ converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
116
+ if converted_numpy is not element:
117
+ return self._analyze_numpy(converted_numpy, numpy_type)
118
+ if isinstance(element, torch.Tensor):
119
+ return self._analyze_tensor(element, Const.SEP.join(suffix_stack))
120
+ if isinstance(element, (bool, int, float, str, slice, type(Ellipsis))):
121
+ return self._analyze_builtin(element)
122
+ return {}
123
+
124
+ def _analyze_tensor(self, tensor, suffix):
125
+ tensor_stat = self.get_stat_info(tensor)
126
+ tensor_json = {}
127
+ tensor_json.update({'type': 'torch.Tensor'})
128
+ tensor_json.update({'dtype': str(tensor.dtype)})
129
+ tensor_json.update({"shape": tensor.shape})
130
+ tensor_json.update({"Max": tensor_stat.max})
131
+ tensor_json.update({"Min": tensor_stat.min})
132
+ tensor_json.update({"Mean": tensor_stat.mean})
133
+ tensor_json.update({"Norm": tensor_stat.norm})
134
+ tensor_json.update({"requires_grad": tensor.requires_grad})
135
+
136
+ if tensor_stat.max is not None:
137
+ if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max):
138
+ tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max")
139
+ if tensor_stat.min is not None:
140
+ if np.isinf(tensor_stat.min) or np.isnan(tensor_stat.min):
141
+ tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min")
142
+
143
+ if self.config.summary_mode == Const.MD5:
144
+ tensor_md5 = self.get_md5_for_tensor(tensor)
145
+ tensor_json.update({Const.MD5: tensor_md5})
146
+ return tensor_json
147
+
148
+
149
+ class StatisticsDataProcessor(PytorchDataProcessor):
150
+ pass
151
+
152
+
153
+ class TensorDataProcessor(PytorchDataProcessor):
154
+ def _analyze_tensor(self, tensor, suffix):
155
+ dump_data_name, file_path = self.get_save_file_path(suffix)
156
+ saved_tensor = tensor.contiguous().detach()
157
+ save_pt(saved_tensor, file_path)
158
+ single_arg = super()._analyze_tensor(tensor, suffix)
159
+ single_arg.update({"data_name": dump_data_name})
160
+ return single_arg
161
+
162
+
163
+ class OverflowCheckDataProcessor(PytorchDataProcessor):
164
+ __slots__ = ["cached_tensors_and_file_paths"]
165
+
166
+ def __init__(self, config, data_writer):
167
+ super().__init__(config, data_writer)
168
+ self.has_overflow = False
169
+ self.support_inf_nan = None
170
+ self.cached_inplace_api_info = {}
171
+ self.cached_tensors_and_file_paths = {}
172
+ self.bits_for_overflow = 8
173
+ self.real_overflow_nums = 0
174
+ self.overflow_nums = config.overflow_nums
175
+
176
+ @property
177
+ def is_terminated(self):
178
+ if self.overflow_nums == -1:
179
+ return False
180
+ if self.real_overflow_nums >= self.overflow_nums:
181
+ logger.info(f"[msprobe] 超过预设溢出次数 当前溢出次数: {self.real_overflow_nums}")
182
+ return True
183
+ return False
184
+
185
+ def analyze_pre_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
186
+ self.has_overflow = False
187
+ self._is_support_inf_nan()
188
+ self.cached_inplace_api_info = super().analyze_pre_forward_inplace(name, module_input_output)
189
+ return None
190
+
191
+ def analyze_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
192
+ self._is_support_inf_nan()
193
+ api_info_struct = super().analyze_forward_inplace(name, module_input_output)
194
+ if name in self.cached_inplace_api_info and name in api_info_struct:
195
+ self.cached_inplace_api_info[name].update(api_info_struct[name])
196
+ elif name in api_info_struct:
197
+ self.cached_inplace_api_info = api_info_struct
198
+ self.handle_overflow()
199
+ return self.cached_inplace_api_info if self.has_overflow else None
200
+
201
+ def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
202
+ self.has_overflow = False
203
+ self._is_support_inf_nan()
204
+ api_info_struct = super().analyze_forward(name, module, module_input_output)
205
+ self.handle_overflow()
206
+ return api_info_struct if self.has_overflow else None
207
+
208
+ def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
209
+ self.has_overflow = False
210
+ self._is_support_inf_nan()
211
+ api_info_struct = super().analyze_backward(name, module, module_input_output)
212
+ self.handle_overflow()
213
+ return api_info_struct if self.has_overflow else None
214
+
215
+ def handle_overflow(self):
216
+ if not self.support_inf_nan:
217
+ self._analyze_maybe_overflow_flag()
218
+ if self.has_overflow:
219
+ for file_path, tensor in self.cached_tensors_and_file_paths.items():
220
+ save_pt(tensor, file_path)
221
+ self.real_overflow_nums += 1
222
+ self.cached_tensors_and_file_paths = {}
223
+
224
+ def _is_support_inf_nan(self):
225
+ if self.support_inf_nan is not None:
226
+ return
227
+ try:
228
+ self.support_inf_nan = is_gpu or torch_npu.npu.utils.is_support_inf_nan()
229
+ except Exception:
230
+ logger.warning(f"Unable to determine if the current device supports inf/nan mode, default not supported.")
231
+ self.support_inf_nan = False
232
+
233
+ def _analyze_maybe_overflow_flag(self):
234
+ try:
235
+ self.has_overflow = torch_npu.npu.utils.get_npu_overflow_flag()
236
+ if self.has_overflow:
237
+ torch_npu.npu.utils.clear_npu_overflow_flag()
238
+ except Exception as e:
239
+ logger.error(f"Overflow check failed, the current environment may be abnormal.")
240
+ raise RuntimeError(f"overflow check failed") from e
241
+
242
+ def _analyze_maybe_overflow_tensor(self, tensor_json):
243
+ if tensor_json['Max'] is None or tensor_json['Min'] is None:
244
+ return
245
+ self.has_overflow = np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']) or \
246
+ np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min'])
247
+
248
+ def _analyze_tensor(self, tensor, suffix):
249
+ dump_data_name, file_path = self.get_save_file_path(suffix)
250
+ if not path_len_exceeds_limit(file_path):
251
+ self.cached_tensors_and_file_paths.update({file_path: tensor})
252
+ else:
253
+ logger.warning(f'The file path {file_path} length exceeds limit.')
254
+ single_arg = super()._analyze_tensor(tensor, suffix)
255
+ single_arg.update({"data_name": dump_data_name})
256
+ if not self.has_overflow and self.support_inf_nan:
257
+ self._analyze_maybe_overflow_tensor(single_arg)
258
+ return single_arg
259
+
260
+
261
+ class FreeBenchmarkDataProcessor(PytorchDataProcessor):
262
+
263
+ def __init__(self, config, data_writer):
264
+ super().__init__(config, data_writer)
265
+ self.checker = FreeBenchmarkCheck(config=config)
266
+ self._return_forward_new_output = None
267
+ self._forward_new_output = None
268
+
269
+ def update_iter(self, current_iter):
270
+ super().update_iter(current_iter)
271
+ self.checker.update_iter(current_iter)
272
+
273
+ def update_unequal_rows(self, unequal_rows: List[UnequalRow]):
274
+ if not unequal_rows:
275
+ return
276
+ for row in unequal_rows:
277
+ data_dict = asdict(row)
278
+ self.data_writer.write_data_to_csv(
279
+ data_dict.values(),
280
+ data_dict.keys(),
281
+ self.data_writer.free_benchmark_file_path
282
+ )
283
+ return
284
+
285
+ def analyze_pre_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
286
+ self.checker.pre_forward(name, module, self, module_input_output.args, module_input_output.kwargs)
287
+
288
+ def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
289
+ new_output, unequal_rows = self.checker.forward(
290
+ name,
291
+ module,
292
+ module_input_output.args,
293
+ module_input_output.kwargs,
294
+ module_input_output.output,
295
+ )
296
+ self.update_unequal_rows(unequal_rows)
297
+ if self.checker.if_fix():
298
+ self._return_forward_new_output = True
299
+ self._forward_new_output = new_output
300
+
301
+ def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
302
+ self.checker.backward(name, module, module_input_output.grad_input)
303
+
304
+
305
+ class KernelDumpDataProcessor(PytorchDataProcessor):
306
+ forward_init_status = False
307
+ multi_output_apis = ["_sort_", "npu_flash_attention"]
308
+
309
+ def __init__(self, config, data_writer):
310
+ super().__init__(config, data_writer)
311
+
312
+ def analyze_forward(self, name, module, module_input_output):
313
+ if self.config.is_forward_acl_dump:
314
+ self.forward_acl_dump(name, module, module_input_output)
315
+ else:
316
+ self.dump_mode_backward_acl_dump(name, module, module_input_output)
317
+
318
+ def forward_acl_dump(self, name, module, module_input_output):
319
+ if not KernelDumpDataProcessor.forward_init_status:
320
+ KernelDumpDataProcessor.forward_init_status = True
321
+ torch_npu.npu.synchronize()
322
+ torch_npu.npu.init_dump()
323
+ torch_npu.npu.set_dump(self.config.acl_config)
324
+ torch_npu.npu.synchronize()
325
+ if self.op_need_trigger(name):
326
+ module.forward(*module_input_output.args, **module_input_output.kwargs).cpu()
327
+ else:
328
+ module.forward(*module_input_output.args, **module_input_output.kwargs)
329
+ torch_npu.npu.synchronize()
330
+ torch_npu.npu.finalize_dump()
331
+ torch_npu.npu.synchronize()
332
+ KernelDumpDataProcessor.forward_init_status = False
333
+ logger.info("Dump %s op file." % name)
334
+
335
+ def acl_backward_dump_status(self, output, grad, module_name):
336
+ if isinstance(output, torch.Tensor):
337
+ output.backward(grad, retain_graph=True)
338
+ return True
339
+
340
+ for api_name in KernelDumpDataProcessor.multi_output_apis:
341
+ if api_name in module_name:
342
+ output[0].backward(grad, retain_graph=True)
343
+ return True
344
+ return False
345
+
346
+ def dump_mode_backward_acl_dump(self, name, module, module_input_output):
347
+ grad_path = self.config.backward_input.get(name)
348
+ if not KernelDumpDataProcessor.forward_init_status:
349
+ KernelDumpDataProcessor.forward_init_status = True
350
+ output = module.forward(*module_input_output.args, **module_input_output.kwargs)
351
+ pt = load_pt(grad_path)
352
+ grad = pt.to("npu").requires_grad_()
353
+ torch_npu.npu.init_dump()
354
+ torch_npu.npu.set_dump(self.config.acl_config)
355
+ torch_npu.npu.synchronize()
356
+ if not self.acl_backward_dump_status(output, grad, name):
357
+ logger.warning("The output of {} is not of tensor type and cannot be automatically derived. "
358
+ "you can manually construct a single API backward case for ACL dump.".format(
359
+ name))
360
+ torch_npu.npu.synchronize()
361
+ torch_npu.npu.finalize_dump()
362
+ KernelDumpDataProcessor.forward_init_status = False
363
+ logger.info("Dump %s op file." % name)
364
+
365
+ def op_need_trigger(self, module_name):
366
+ return 'Tensor.__getitem__.' in module_name