mindstudio-probe 1.0.1__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/LICENSE +201 -201
  2. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/METADATA +36 -30
  3. mindstudio_probe-1.0.4.dist-info/RECORD +276 -0
  4. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/WHEEL +1 -1
  5. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/entry_points.txt +1 -0
  6. msprobe/README.md +101 -182
  7. msprobe/__init__.py +1 -0
  8. msprobe/{config/config.json → config.json} +49 -27
  9. msprobe/core/__init__.py +0 -0
  10. msprobe/{pytorch → core}/advisor/advisor.py +124 -124
  11. msprobe/{pytorch → core}/advisor/advisor_const.py +59 -59
  12. msprobe/{pytorch → core}/advisor/advisor_result.py +58 -58
  13. msprobe/core/common/const.py +341 -241
  14. msprobe/core/common/exceptions.py +100 -88
  15. msprobe/core/common/{file_check.py → file_utils.py} +478 -265
  16. msprobe/core/common/log.py +76 -55
  17. msprobe/core/common/utils.py +385 -516
  18. msprobe/core/common_config.py +85 -58
  19. msprobe/core/compare/acc_compare.py +300 -0
  20. msprobe/core/compare/check.py +95 -0
  21. msprobe/core/compare/compare_cli.py +49 -0
  22. msprobe/core/compare/highlight.py +223 -0
  23. msprobe/core/compare/multiprocessing_compute.py +149 -0
  24. msprobe/{pytorch → core}/compare/npy_compare.py +295 -244
  25. msprobe/core/compare/utils.py +430 -0
  26. msprobe/core/data_dump/data_collector.py +154 -140
  27. msprobe/core/data_dump/data_processor/base.py +314 -245
  28. msprobe/core/data_dump/data_processor/factory.py +59 -61
  29. msprobe/core/data_dump/data_processor/mindspore_processor.py +186 -0
  30. msprobe/core/data_dump/data_processor/pytorch_processor.py +366 -346
  31. msprobe/core/data_dump/json_writer.py +96 -116
  32. msprobe/core/data_dump/scope.py +178 -178
  33. msprobe/core/grad_probe/__init__.py +0 -0
  34. msprobe/core/grad_probe/constant.py +71 -0
  35. msprobe/core/grad_probe/grad_compare.py +171 -0
  36. msprobe/core/grad_probe/utils.py +64 -0
  37. msprobe/docs/01.installation.md +89 -0
  38. msprobe/docs/02.config_introduction.md +165 -0
  39. msprobe/docs/03.config_examples.md +247 -0
  40. msprobe/docs/04.acl_config_examples.md +76 -0
  41. msprobe/docs/05.data_dump_PyTorch.md +198 -0
  42. msprobe/docs/06.data_dump_MindSpore.md +243 -0
  43. msprobe/docs/07.accuracy_checker_PyTorch.md +274 -0
  44. msprobe/docs/08.accuracy_checker_online_PyTorch.md +198 -0
  45. msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
  46. msprobe/docs/10.accuracy_compare_PyTorch.md +245 -0
  47. msprobe/docs/11.accuracy_compare_MindSpore.md +202 -0
  48. msprobe/docs/12.overflow_check_PyTorch.md +79 -0
  49. msprobe/docs/13.overflow_check_MindSpore.md +31 -0
  50. msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
  51. msprobe/docs/15.free_benchmarking_PyTorch.md +164 -0
  52. msprobe/docs/17.grad_probe.md +207 -0
  53. msprobe/docs/FAQ_PyTorch.md +177 -0
  54. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
  55. msprobe/docs/img/free_benchmark_framework.png +0 -0
  56. msprobe/docs/img/grad_probe_image-1.png +0 -0
  57. msprobe/docs/img/grad_probe_image-2.png +0 -0
  58. msprobe/docs/img/grad_probe_image-3.png +0 -0
  59. msprobe/docs/img/grad_probe_image-4.png +0 -0
  60. msprobe/docs/img/grad_probe_image.png +0 -0
  61. msprobe/mindspore/__init__.py +1 -1
  62. msprobe/mindspore/api_accuracy_checker/__init__.py +0 -0
  63. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +255 -0
  64. msprobe/mindspore/api_accuracy_checker/api_info.py +69 -0
  65. msprobe/mindspore/api_accuracy_checker/api_runner.py +156 -0
  66. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +197 -0
  67. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
  68. msprobe/mindspore/api_accuracy_checker/compute_element.py +239 -0
  69. msprobe/mindspore/api_accuracy_checker/main.py +9 -0
  70. msprobe/mindspore/api_accuracy_checker/type_mapping.py +114 -0
  71. msprobe/mindspore/api_accuracy_checker/utils.py +80 -0
  72. msprobe/mindspore/cell_processor.py +34 -0
  73. msprobe/mindspore/common/const.py +106 -0
  74. msprobe/mindspore/common/log.py +38 -0
  75. msprobe/mindspore/common/utils.py +81 -0
  76. msprobe/mindspore/compare/distributed_compare.py +75 -0
  77. msprobe/mindspore/compare/ms_compare.py +219 -0
  78. msprobe/mindspore/compare/ms_graph_compare.py +348 -0
  79. msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -0
  80. msprobe/mindspore/debugger/debugger_config.py +66 -51
  81. msprobe/mindspore/debugger/precision_debugger.py +126 -32
  82. msprobe/mindspore/dump/dump_tool_factory.py +35 -38
  83. msprobe/mindspore/dump/hook_cell/api_registry.py +118 -0
  84. msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -0
  85. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +922 -0
  86. msprobe/mindspore/dump/hook_cell/wrap_api.py +113 -0
  87. msprobe/mindspore/dump/jit_dump.py +72 -0
  88. msprobe/mindspore/dump/kernel_graph_dump.py +59 -60
  89. msprobe/mindspore/dump/kernel_kbyk_dump.py +64 -0
  90. msprobe/mindspore/free_benchmark/__init__.py +0 -0
  91. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -0
  92. msprobe/mindspore/free_benchmark/common/__init__.py +0 -0
  93. msprobe/mindspore/free_benchmark/common/config.py +12 -0
  94. msprobe/mindspore/free_benchmark/common/handler_params.py +17 -0
  95. msprobe/mindspore/free_benchmark/common/utils.py +71 -0
  96. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -0
  97. msprobe/mindspore/free_benchmark/decorator/__init__.py +0 -0
  98. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +43 -0
  99. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -0
  100. msprobe/mindspore/free_benchmark/handler/__init__.py +0 -0
  101. msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -0
  102. msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -0
  103. msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -0
  104. msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -0
  105. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -0
  106. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -0
  107. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -0
  108. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +51 -0
  109. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +35 -0
  110. msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -0
  111. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +29 -0
  112. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -0
  113. msprobe/mindspore/grad_probe/__init__.py +0 -0
  114. msprobe/mindspore/grad_probe/global_context.py +90 -0
  115. msprobe/mindspore/grad_probe/grad_analyzer.py +231 -0
  116. msprobe/mindspore/grad_probe/grad_monitor.py +27 -0
  117. msprobe/mindspore/grad_probe/grad_stat_csv.py +132 -0
  118. msprobe/mindspore/grad_probe/hook.py +94 -0
  119. msprobe/mindspore/grad_probe/utils.py +30 -0
  120. msprobe/mindspore/ms_config.py +128 -78
  121. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +44 -45
  122. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +34 -32
  123. msprobe/mindspore/runtime.py +4 -0
  124. msprobe/mindspore/service.py +378 -0
  125. msprobe/mindspore/task_handler_factory.py +24 -21
  126. msprobe/msprobe.py +105 -67
  127. msprobe/pytorch/__init__.py +4 -4
  128. msprobe/pytorch/api_accuracy_checker/common/config.py +53 -50
  129. msprobe/pytorch/api_accuracy_checker/common/utils.py +214 -224
  130. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +213 -216
  131. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +606 -545
  132. msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
  133. msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
  134. msprobe/pytorch/api_accuracy_checker/compare/compare.py +386 -345
  135. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +73 -73
  136. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +245 -248
  137. msprobe/pytorch/api_accuracy_checker/config.yaml +10 -4
  138. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +335 -328
  139. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +200 -203
  140. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +133 -127
  141. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +592 -493
  142. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +70 -7
  143. msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
  144. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/__init__.py +0 -0
  145. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +197 -0
  146. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +325 -0
  147. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -0
  148. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +219 -0
  149. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -0
  150. msprobe/pytorch/bench_functions/__init__.py +15 -0
  151. msprobe/pytorch/bench_functions/apply_adam_w.py +28 -0
  152. msprobe/pytorch/bench_functions/confusion_transpose.py +19 -0
  153. msprobe/pytorch/bench_functions/fast_gelu.py +55 -0
  154. msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -0
  155. msprobe/pytorch/bench_functions/linear.py +12 -0
  156. msprobe/pytorch/bench_functions/matmul_backward.py +48 -0
  157. msprobe/pytorch/bench_functions/npu_fusion_attention.py +509 -0
  158. msprobe/pytorch/bench_functions/rms_norm.py +15 -0
  159. msprobe/pytorch/bench_functions/rotary_mul.py +52 -0
  160. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -0
  161. msprobe/pytorch/bench_functions/swiglu.py +55 -0
  162. msprobe/pytorch/common/__init__.py +2 -2
  163. msprobe/pytorch/common/compare_script.template +14 -14
  164. msprobe/pytorch/common/log.py +20 -31
  165. msprobe/pytorch/common/parse_json.py +39 -37
  166. msprobe/pytorch/common/utils.py +305 -224
  167. msprobe/pytorch/compare/distributed_compare.py +66 -111
  168. msprobe/pytorch/compare/mapping.yaml +607 -607
  169. msprobe/pytorch/compare/match.py +34 -36
  170. msprobe/pytorch/compare/pt_compare.py +50 -0
  171. msprobe/pytorch/debugger/debugger_config.py +95 -86
  172. msprobe/pytorch/debugger/precision_debugger.py +125 -95
  173. msprobe/pytorch/free_benchmark/__init__.py +8 -8
  174. msprobe/pytorch/free_benchmark/common/constant.py +70 -67
  175. msprobe/pytorch/free_benchmark/common/counter.py +71 -71
  176. msprobe/pytorch/free_benchmark/common/enums.py +37 -37
  177. msprobe/pytorch/free_benchmark/common/params.py +129 -129
  178. msprobe/pytorch/free_benchmark/common/utils.py +102 -98
  179. msprobe/pytorch/free_benchmark/compare/grad_saver.py +179 -183
  180. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +104 -104
  181. msprobe/pytorch/free_benchmark/main.py +105 -102
  182. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +13 -13
  183. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +41 -41
  184. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +90 -90
  185. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +104 -104
  186. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +63 -63
  187. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +68 -68
  188. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +28 -28
  189. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +45 -45
  190. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +19 -19
  191. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +217 -203
  192. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +39 -39
  193. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +23 -23
  194. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +30 -31
  195. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +170 -170
  196. msprobe/pytorch/function_factory.py +76 -0
  197. msprobe/pytorch/functional/dump_module.py +39 -39
  198. msprobe/pytorch/grad_probe/__init__.py +0 -0
  199. msprobe/pytorch/grad_probe/grad_monitor.py +91 -0
  200. msprobe/pytorch/grad_probe/grad_stat_csv.py +129 -0
  201. msprobe/pytorch/hook_module/api_registry.py +161 -161
  202. msprobe/pytorch/hook_module/hook_module.py +120 -109
  203. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1876
  204. msprobe/pytorch/hook_module/utils.py +30 -29
  205. msprobe/pytorch/hook_module/wrap_aten.py +110 -100
  206. msprobe/pytorch/hook_module/wrap_distributed.py +78 -75
  207. msprobe/pytorch/hook_module/wrap_functional.py +105 -108
  208. msprobe/pytorch/hook_module/wrap_npu_custom.py +93 -73
  209. msprobe/pytorch/hook_module/wrap_tensor.py +71 -72
  210. msprobe/pytorch/hook_module/wrap_torch.py +86 -88
  211. msprobe/pytorch/hook_module/wrap_vf.py +62 -64
  212. msprobe/pytorch/module_processer.py +138 -98
  213. msprobe/pytorch/online_dispatch/__init__.py +20 -20
  214. msprobe/pytorch/online_dispatch/compare.py +236 -236
  215. msprobe/pytorch/online_dispatch/dispatch.py +271 -273
  216. msprobe/pytorch/online_dispatch/dump_compare.py +155 -186
  217. msprobe/pytorch/online_dispatch/single_compare.py +391 -391
  218. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +49 -49
  219. msprobe/pytorch/online_dispatch/utils.py +130 -187
  220. msprobe/pytorch/parse.py +4 -4
  221. msprobe/pytorch/parse_tool/cli.py +32 -32
  222. msprobe/pytorch/parse_tool/lib/compare.py +260 -259
  223. msprobe/pytorch/parse_tool/lib/config.py +52 -51
  224. msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
  225. msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
  226. msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
  227. msprobe/pytorch/parse_tool/lib/parse_tool.py +158 -158
  228. msprobe/pytorch/parse_tool/lib/utils.py +316 -367
  229. msprobe/pytorch/parse_tool/lib/visualization.py +85 -90
  230. msprobe/pytorch/pt_config.py +188 -93
  231. msprobe/pytorch/service.py +246 -167
  232. mindstudio_probe-1.0.1.dist-info/RECORD +0 -228
  233. msprobe/config/README.md +0 -397
  234. msprobe/mindspore/doc/dump.md +0 -65
  235. msprobe/mindspore/dump/api_kbk_dump.py +0 -55
  236. msprobe/pytorch/compare/acc_compare.py +0 -1024
  237. msprobe/pytorch/compare/highlight.py +0 -100
  238. msprobe/pytorch/doc/FAQ.md +0 -193
  239. msprobe/pytorch/doc/api_accuracy_checker.md +0 -269
  240. msprobe/pytorch/doc/atat/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
  241. msprobe/pytorch/doc/dump.md +0 -207
  242. msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -176
  243. msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
  244. msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
  245. msprobe/pytorch/doc/run_overflow_check.md +0 -25
  246. msprobe/pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md +0 -90
  247. msprobe/test/core_ut/common/test_utils.py +0 -345
  248. msprobe/test/core_ut/data_dump/test_data_collector.py +0 -47
  249. msprobe/test/core_ut/data_dump/test_json_writer.py +0 -183
  250. msprobe/test/core_ut/data_dump/test_scope.py +0 -151
  251. msprobe/test/core_ut/test_common_config.py +0 -152
  252. msprobe/test/core_ut/test_file_check.py +0 -218
  253. msprobe/test/core_ut/test_log.py +0 -109
  254. msprobe/test/mindspore_ut/test_api_kbk_dump.py +0 -51
  255. msprobe/test/mindspore_ut/test_debugger_config.py +0 -42
  256. msprobe/test/mindspore_ut/test_dump_tool_factory.py +0 -51
  257. msprobe/test/mindspore_ut/test_kernel_graph_dump.py +0 -66
  258. msprobe/test/mindspore_ut/test_kernel_graph_overflow_check.py +0 -63
  259. msprobe/test/mindspore_ut/test_ms_config.py +0 -69
  260. msprobe/test/mindspore_ut/test_overflow_check_tool_factory.py +0 -51
  261. msprobe/test/mindspore_ut/test_precision_debugger.py +0 -56
  262. msprobe/test/mindspore_ut/test_task_handler_factory.py +0 -58
  263. msprobe/test/pytorch_ut/advisor/test_advisor.py +0 -83
  264. msprobe/test/pytorch_ut/api_accuracy_checker/common/test_common_utils.py +0 -108
  265. msprobe/test/pytorch_ut/api_accuracy_checker/common/test_config.py +0 -39
  266. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +0 -112
  267. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_api_precision_compare.py +0 -77
  268. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare.py +0 -125
  269. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_column.py +0 -10
  270. msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_compare_utils.py +0 -43
  271. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/dump.json +0 -179
  272. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/forward.json +0 -63
  273. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_data_generate.py +0 -99
  274. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +0 -115
  275. msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut.py +0 -72
  276. msprobe/test/pytorch_ut/compare/test_acc_compare.py +0 -17
  277. msprobe/test/pytorch_ut/free_benchmark/perturbed_layers/test_perturbed_layser.py +0 -105
  278. msprobe/test/pytorch_ut/free_benchmark/result_handlers/test_result_handler.py +0 -121
  279. msprobe/test/pytorch_ut/free_benchmark/test_main.py +0 -101
  280. msprobe/test/pytorch_ut/functional/test_dump_module.py +0 -15
  281. msprobe/test/pytorch_ut/hook_module/test_api_registry.py +0 -130
  282. msprobe/test/pytorch_ut/hook_module/test_hook_module.py +0 -42
  283. msprobe/test/pytorch_ut/hook_module/test_wrap_aten.py +0 -65
  284. msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +0 -35
  285. msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +0 -20
  286. msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +0 -35
  287. msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +0 -43
  288. msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +0 -11
  289. msprobe/test/pytorch_ut/test_pt_config.py +0 -69
  290. msprobe/test/pytorch_ut/test_service.py +0 -59
  291. msprobe/test/resources/advisor.txt +0 -3
  292. msprobe/test/resources/compare_result_20230703104808.csv +0 -9
  293. msprobe/test/resources/compare_result_without_accuracy.csv +0 -9
  294. msprobe/test/resources/config.yaml +0 -3
  295. msprobe/test/resources/npu_test.pkl +0 -8
  296. msprobe/test/run_test.sh +0 -30
  297. msprobe/test/run_ut.py +0 -58
  298. msprobe/test/test_module_processer.py +0 -64
  299. {mindstudio_probe-1.0.1.dist-info → mindstudio_probe-1.0.4.dist-info}/top_level.txt +0 -0
  300. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
  301. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
  302. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
  303. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
  304. /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
  305. /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
  306. /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
  307. /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
  308. /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
  309. /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
  310. /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
  311. /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
  312. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
  313. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
  314. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
  315. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
  316. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
  317. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
  318. /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
  319. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
  320. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
  321. /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
  322. /msprobe/{config → docs}/img/free_benchmark.png +0 -0
  323. /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
@@ -0,0 +1,348 @@
1
+ import copy
2
+ import csv
3
+ import glob
4
+ import os
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from msprobe.core.common.const import CompareConst, GraphMode, Const, FileCheckConst
9
+ from msprobe.core.common.file_utils import FileOpen, check_path_before_create, change_mode, load_npy
10
+ from msprobe.core.common.log import logger
11
+ from msprobe.core.common.utils import add_time_with_xlsx, CompareException
12
+ from msprobe.core.compare.multiprocessing_compute import _ms_graph_handle_multi_process, check_accuracy
13
+ from msprobe.core.compare.npy_compare import npy_data_check, statistics_data_check, reshape_value, compare_ops_apply
14
+ from msprobe.mindspore.common.utils import convert_to_int, list_lowest_level_directories
15
+
16
+
17
+ class row_data:
18
+ def __init__(self, mode):
19
+ self.basic_data = copy.deepcopy(CompareConst.MS_GRAPH_BASE)
20
+ self.npy_data = copy.deepcopy(CompareConst.MS_GRAPH_NPY)
21
+ self.statistic_data = copy.deepcopy(CompareConst.MS_GRAPH_STATISTIC)
22
+ if mode == GraphMode.NPY_MODE:
23
+ self.data = {**self.basic_data, **self.npy_data}
24
+ else:
25
+ self.data = {**self.basic_data, **self.statistic_data}
26
+
27
+ def __call__(self):
28
+ return self.data
29
+
30
+
31
+ def npy_data_read(data_path, npy_file_list, mapping_dict):
32
+ data_list = []
33
+ for data in npy_file_list:
34
+ if data in mapping_dict:
35
+ split_list = mapping_dict[data].split(Const.SEP)
36
+ else:
37
+ split_list = data.split(Const.SEP)
38
+ if len(split_list) < 7:
39
+ continue
40
+ compare_key = f"{split_list[1]}.{split_list[2]}.{split_list[3]}.{split_list[5]}.{split_list[6]}"
41
+ timestamp = convert_to_int(split_list[4])
42
+
43
+ data_list.append([os.path.join(data_path, data), compare_key, timestamp])
44
+ return data_list
45
+
46
+
47
+ def statistic_data_read(statistic_file_list, statistic_file_path):
48
+ data_list = []
49
+ statistic_data_list = []
50
+ header_index = {'Data Type': None, 'Shape': None, 'Max Value': None, 'Min Value': None,
51
+ 'Avg Value': None, 'L2Norm Value': None}
52
+ for statistic_file in statistic_file_list:
53
+ with FileOpen(statistic_file, "r") as f:
54
+ csv_reader = csv.reader(f, delimiter=",")
55
+ header = next(csv_reader)
56
+ for key in header_index.keys():
57
+ for index, value in enumerate(header):
58
+ if key == value:
59
+ header_index[key] = index
60
+ statistic_data_list.extend([row for row in csv_reader])
61
+
62
+ for key in header_index.keys():
63
+ if header_index[key] is None:
64
+ logger.warning(f"Data_path {statistic_file_path} has no key {key}.")
65
+
66
+ for data in statistic_data_list:
67
+ compare_key = f"{data[1]}.{data[2]}.{data[3]}.{data[5]}"
68
+ timestamp = int(data[4])
69
+ result_data = [statistic_file_path, compare_key, timestamp]
70
+ for key in header_index.keys():
71
+ if header_index[key] is None:
72
+ result_data.append(np.nan)
73
+ else:
74
+ result_data.append(data[header_index[key]])
75
+ data_list.append(result_data)
76
+ return data_list
77
+
78
+
79
+ def generate_data_name(data_path):
80
+ data_list = []
81
+
82
+ mapping_path = os.path.join(data_path, "mapping.csv")
83
+ statistic_path = os.path.join(data_path, "statistic.csv")
84
+ npy_path = os.path.join(data_path, "*.npy")
85
+
86
+ mapping_file_list = glob.glob(mapping_path)
87
+ statistic_file_list = glob.glob(statistic_path)
88
+ npy_file_list = glob.glob(npy_path)
89
+
90
+ mapping_exist = bool(mapping_file_list)
91
+ statistic_exist = bool(statistic_file_list)
92
+ npy_exist = bool(npy_file_list)
93
+
94
+ mapping_dict = {}
95
+ if mapping_exist:
96
+ for mapping_file in mapping_file_list:
97
+ with FileOpen(mapping_file, "r") as f:
98
+ csv_reader = csv.reader(f, delimiter=",")
99
+ header = next(csv_reader)
100
+ for row in csv_reader:
101
+ mapping_dict[row[0]] = row[1]
102
+
103
+ if npy_exist:
104
+ data_list = npy_data_read(data_path, npy_file_list, mapping_dict)
105
+
106
+ elif statistic_exist:
107
+ data_list = statistic_data_read(statistic_file_list, os.path.join(data_path, statistic_path))
108
+
109
+ if npy_exist:
110
+ mode = GraphMode.NPY_MODE
111
+ elif statistic_exist:
112
+ mode = GraphMode.STATISTIC_MODE
113
+ else:
114
+ mode = GraphMode.ERROR_MODE
115
+ logger.error(f"Error mode.")
116
+ return mode, data_list
117
+
118
+
119
+ class GraphMSComparator:
120
+ def __init__(self, input_param, output_path):
121
+ self.output_path = output_path
122
+ self.base_npu_path = input_param.get('npu_path', None)
123
+ self.base_bench_path = input_param.get('bench_path', None)
124
+ self.rank_list = [convert_to_int(rank_id) for rank_id in input_param.get('rank_id', [])]
125
+ self.step_list = [convert_to_int(step_id) for step_id in input_param.get('step_id', [])]
126
+ # split by rank and step, generate rank step path
127
+ self.npu_rank_step_dict = self.generate_rank_step_path(self.base_npu_path)
128
+ self.bench_rank_step_dict = self.generate_rank_step_path(self.base_bench_path)
129
+ self.common_rank_step = sorted(
130
+ set(self.npu_rank_step_dict.keys()).intersection(self.bench_rank_step_dict.keys()))
131
+
132
+ @staticmethod
133
+ def compare_ops(compare_result_db, mode):
134
+
135
+ def npy_mode_compute(row):
136
+ result_dict = row_data(GraphMode.NPY_MODE)()
137
+
138
+ def process_npy_file(file_path, name_prefix, result):
139
+ if os.path.exists(file_path):
140
+ data = load_npy(file_path)
141
+ result[f'{name_prefix} Name'] = file_path
142
+ result[f'{name_prefix} Dtype'] = data.dtype
143
+ result[f'{name_prefix} Tensor Shape'] = data.shape
144
+ result[f'{name_prefix} max'] = np.max(data)
145
+ result[f'{name_prefix} min'] = np.min(data)
146
+ result[f'{name_prefix} mean'] = np.mean(data)
147
+ result[f'{name_prefix} l2norm'] = np.linalg.norm(data)
148
+ return data
149
+ return ""
150
+
151
+ n_value = process_npy_file(row[CompareConst.NPU_NAME], 'NPU', result_dict)
152
+ b_value = process_npy_file(row[CompareConst.BENCH_NAME], 'Bench', result_dict)
153
+
154
+ error_flag, error_message = npy_data_check(n_value, b_value)
155
+ result_dict[CompareConst.ERROR_MESSAGE] = error_message
156
+
157
+ if not error_flag:
158
+ n_value, b_value = reshape_value(n_value, b_value)
159
+ result_list, err_msg = compare_ops_apply(n_value, b_value, False, "")
160
+ result_dict[CompareConst.COSINE] = result_list[0]
161
+ result_dict[CompareConst.MAX_ABS_ERR] = result_list[1]
162
+ result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[2]
163
+ result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[3]
164
+ result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[4]
165
+ result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[1])
166
+ result_dict[CompareConst.ERROR_MESSAGE] = err_msg
167
+
168
+ return pd.Series(result_dict)
169
+
170
+ def statistic_mode_compute(row):
171
+ result_dict = row_data('STATISTIC')()
172
+
173
+ def update_result_dict(result, rows, prefix):
174
+ result[f'{prefix} Name'] = rows[f'{prefix} Name']
175
+ result[f'{prefix} Dtype'] = rows[f'{prefix} Dtype']
176
+ result[f'{prefix} Tensor Shape'] = rows[f'{prefix} Tensor Shape']
177
+ result[f'{prefix} max'] = np.float32(rows[f'{prefix} max'])
178
+ result[f'{prefix} min'] = np.float32(rows[f'{prefix} min'])
179
+ result[f'{prefix} mean'] = np.float32(rows[f'{prefix} mean'])
180
+ result[f'{prefix} l2norm'] = np.float32(rows[f'{prefix} l2norm'])
181
+
182
+ # 使用示例
183
+ update_result_dict(result_dict, row, 'NPU')
184
+ update_result_dict(result_dict, row, 'Bench')
185
+ error_flag, error_message = statistics_data_check(result_dict)
186
+ result_dict[CompareConst.ERROR_MESSAGE] += error_message
187
+ if not error_flag:
188
+ result_dict[CompareConst.MAX_DIFF] = np.abs(
189
+ result_dict[CompareConst.NPU_MAX] - result_dict[CompareConst.BENCH_MAX])
190
+ result_dict[CompareConst.MIN_DIFF] = np.abs(
191
+ result_dict[CompareConst.NPU_MIN] - result_dict[CompareConst.BENCH_MIN])
192
+ result_dict[CompareConst.MEAN_DIFF] = np.abs(
193
+ result_dict[CompareConst.NPU_MEAN] - result_dict[CompareConst.BENCH_MEAN])
194
+ result_dict[CompareConst.NORM_DIFF] = np.abs(
195
+ result_dict[CompareConst.NPU_NORM] - result_dict[CompareConst.BENCH_NORM])
196
+ result_dict[CompareConst.MAX_RELATIVE_ERR] = result_dict[CompareConst.MAX_DIFF] / result_dict[
197
+ CompareConst.BENCH_MAX] if result_dict[CompareConst.BENCH_MAX] > 0 else 0
198
+ result_dict[CompareConst.MAX_RELATIVE_ERR] = str(result_dict[CompareConst.MAX_RELATIVE_ERR] * 100) + "%"
199
+ result_dict[CompareConst.MIN_RELATIVE_ERR] = result_dict[CompareConst.MIN_DIFF] / result_dict[
200
+ CompareConst.BENCH_MIN] if result_dict[CompareConst.BENCH_MIN] > 0 else 0
201
+ result_dict[CompareConst.MIN_RELATIVE_ERR] = str(result_dict[CompareConst.MIN_RELATIVE_ERR] * 100) + "%"
202
+ result_dict[CompareConst.MEAN_RELATIVE_ERR] = result_dict[CompareConst.MEAN_DIFF] / result_dict[
203
+ CompareConst.BENCH_MEAN] if result_dict[CompareConst.BENCH_MEAN] > 0 else 0
204
+ result_dict[CompareConst.MEAN_RELATIVE_ERR] = str(
205
+ result_dict[CompareConst.MEAN_RELATIVE_ERR] * 100) + "%"
206
+ result_dict[CompareConst.NORM_RELATIVE_ERR] = result_dict[CompareConst.NORM_DIFF] / result_dict[
207
+ CompareConst.BENCH_NORM] if result_dict[CompareConst.BENCH_NORM] > 0 else 0
208
+ result_dict[CompareConst.NORM_RELATIVE_ERR] = str(
209
+ result_dict[CompareConst.NORM_RELATIVE_ERR] * 100) + "%"
210
+ magnitude_diff = result_dict[CompareConst.MAX_DIFF] / (
211
+ max(result_dict[CompareConst.NPU_MAX], result_dict[CompareConst.BENCH_MAX]) + 1e-10)
212
+ if magnitude_diff > CompareConst.MAGNITUDE:
213
+ result_dict[CompareConst.ACCURACY] = 'No'
214
+ else:
215
+ result_dict[CompareConst.ACCURACY] = 'Yes'
216
+
217
+ return pd.Series(result_dict)
218
+
219
+ if mode == GraphMode.NPY_MODE:
220
+ compare_result_db = compare_result_db.apply(npy_mode_compute, axis=1)
221
+ else:
222
+ compare_result_db = compare_result_db.apply(statistic_mode_compute, axis=1)
223
+ return compare_result_db
224
+
225
+ def compare_core(self):
226
+ logger.info("Please check whether the input data belongs to you. If not, there may be security risks.")
227
+
228
+ for rank_id, step_id in self.common_rank_step:
229
+ compare_result_df, mode = self.compare_process(rank_id, step_id)
230
+ if isinstance(compare_result_df, list):
231
+ is_empty = not compare_result_df
232
+ elif isinstance(compare_result_df, pd.DataFrame):
233
+ is_empty = compare_result_df.empty
234
+ else:
235
+ is_empty = True
236
+ if is_empty or not mode:
237
+ continue
238
+ compare_result_df = self._do_multi_process(compare_result_df, mode)
239
+ compare_result_name = add_time_with_xlsx(f"compare_result_{str(rank_id)}_{str(step_id)}")
240
+ compare_result_path = os.path.join(os.path.realpath(self.output_path), f"{compare_result_name}")
241
+ check_path_before_create(compare_result_path)
242
+ compare_result_df.to_excel(compare_result_path, index=False)
243
+ change_mode(compare_result_path, FileCheckConst.DATA_FILE_AUTHORITY)
244
+ logger.info(f"Compare rank: {rank_id} step: {step_id} finish. Compare result: {compare_result_path}.")
245
+
246
+ def compare_process(self, rank_id, step_id):
247
+ # generate data_path
248
+ npu_data_path_list = self.npu_rank_step_dict.get((rank_id, step_id))
249
+ bench_data_path_list = self.bench_rank_step_dict.get((rank_id, step_id))
250
+ if not npu_data_path_list or not npu_data_path_list:
251
+ return [], ''
252
+
253
+ # generate file name
254
+ npu_mode = 'ERROR_MODE'
255
+ bench_mode = 'ERROR_MODE'
256
+ npu_data_list = []
257
+ bench_data_list = []
258
+ for npu_data_path in npu_data_path_list:
259
+ npu_mode, data_list = generate_data_name(npu_data_path)
260
+ npu_data_list.extend(data_list)
261
+ for bench_data_path in bench_data_path_list:
262
+ bench_mode, data_list = generate_data_name(bench_data_path)
263
+ bench_data_list.extend(data_list)
264
+
265
+ if npu_mode == "ERROR_MODE" or bench_mode == "ERROR_MODE":
266
+ logger.warning(f"Data_path {npu_data_path} or {bench_data_path} is not exist.")
267
+ return [], ''
268
+ if npu_mode != bench_mode:
269
+ logger.error(f"NPU mode {npu_mode} not equal to MATCH mode {bench_mode}.")
270
+ return [], ''
271
+
272
+ if npu_mode == 'NPY_MODE':
273
+ npu_data_df = pd.DataFrame(npu_data_list, columns=[CompareConst.NPU_NAME, 'Compare Key', 'TimeStamp'])
274
+ bench_data_df = pd.DataFrame(bench_data_list, columns=[CompareConst.BENCH_NAME, 'Compare Key', 'TimeStamp'])
275
+ else:
276
+ npu_data_df = pd.DataFrame(npu_data_list,
277
+ columns=[CompareConst.NPU_NAME, 'Compare Key', 'TimeStamp',
278
+ CompareConst.NPU_DTYPE, CompareConst.NPU_SHAPE,
279
+ CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN,
280
+ CompareConst.NPU_NORM])
281
+ bench_data_df = pd.DataFrame(bench_data_list,
282
+ columns=[CompareConst.BENCH_NAME, 'Compare Key', 'TimeStamp',
283
+ CompareConst.BENCH_DTYPE,
284
+ CompareConst.BENCH_SHAPE, CompareConst.BENCH_MAX,
285
+ CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN,
286
+ CompareConst.BENCH_NORM])
287
+
288
+ npu_float_type = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, CompareConst.NPU_NORM]
289
+ npu_data_df[npu_float_type] = npu_data_df[npu_float_type].astype(np.float32)
290
+
291
+ bench_float_type = [CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN,
292
+ CompareConst.BENCH_NORM]
293
+ bench_data_df[bench_float_type] = bench_data_df[bench_float_type].astype(np.float32)
294
+
295
+ npu_data_df['Local Index'] = npu_data_df.sort_values('TimeStamp').groupby('Compare Key').cumcount()
296
+ bench_data_df['Local Index'] = bench_data_df.sort_values('TimeStamp').groupby('Compare Key').cumcount()
297
+
298
+ compare_result_df = pd.merge(npu_data_df, bench_data_df, on=['Compare Key', 'Local Index'], how='outer')
299
+
300
+ compare_result_df[CompareConst.NPU_NAME] = compare_result_df[CompareConst.NPU_NAME].fillna('')
301
+ compare_result_df[CompareConst.BENCH_NAME] = compare_result_df[CompareConst.BENCH_NAME].fillna('')
302
+
303
+ return compare_result_df, npu_mode
304
+
305
+ def generate_rank_step_path(self, base_path):
306
+
307
+ def generate_rank_step_id(path_with_rank_step):
308
+ split_path = path_with_rank_step.split("/")
309
+ rank_id = -1
310
+ if "rank_" in path_with_rank_step:
311
+ # KBK mode
312
+ if len(split_path) > 4:
313
+ rank_id = convert_to_int(split_path[-4].split("_")[-1])
314
+ step_id = convert_to_int(split_path[-1])
315
+ else:
316
+ if len(split_path) > 4:
317
+ rank_id = convert_to_int(split_path[-4])
318
+ if rank_id == -1 and len(split_path) > 3:
319
+ rank_id = convert_to_int(split_path[-3])
320
+ step_id = convert_to_int(split_path[-1])
321
+ return rank_id, step_id
322
+
323
+ base_path = os.path.abspath(base_path)
324
+ lowest_level = list_lowest_level_directories(base_path)
325
+
326
+ rank_step_path_dict = {}
327
+ for dir_path in lowest_level:
328
+ rank_id, step_id = generate_rank_step_id(dir_path)
329
+ if rank_id == -1 or step_id == -1:
330
+ continue
331
+ if self.rank_list and rank_id not in self.rank_list:
332
+ continue
333
+ if self.step_list and step_id not in self.step_list:
334
+ continue
335
+ rank_step_key = (rank_id, step_id)
336
+ if rank_step_key in rank_step_path_dict:
337
+ rank_step_path_dict[rank_step_key].append(dir_path)
338
+ else:
339
+ rank_step_path_dict[rank_step_key] = [dir_path]
340
+ return dict(sorted(rank_step_path_dict.items()))
341
+
342
+ def _do_multi_process(self, result_df, mode):
343
+ try:
344
+ result_df = _ms_graph_handle_multi_process(self.compare_ops, result_df, mode)
345
+ except ValueError as e:
346
+ logger.error('result dataframe is not found.')
347
+ raise CompareException(CompareException.INVALID_DATA_ERROR) from e
348
+ return result_df