mindstudio-probe 8.3.3__py3-none-any.whl → 26.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (689) hide show
  1. {mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/METADATA +26 -14
  2. mindstudio_probe-26.0.0a1.dist-info/RECORD +498 -0
  3. {mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/WHEEL +1 -1
  4. mindstudio_probe-26.0.0a1.dist-info/entry_points.txt +5 -0
  5. mindstudio_probe-26.0.0a1.dist-info/licenses/LICENSE +124 -0
  6. mindstudio_probe-26.0.0a1.dist-info/top_level.txt +2 -0
  7. msprobe/__init__.py +12 -13
  8. msprobe/config.json +9 -31
  9. msprobe/core/__init__.py +12 -11
  10. msprobe/core/acc_check/acc_check_cli.py +145 -0
  11. msprobe/core/common/const.py +97 -38
  12. msprobe/core/common/db_manager.py +133 -12
  13. msprobe/core/common/decorator.py +12 -11
  14. msprobe/core/common/exceptions.py +12 -11
  15. msprobe/core/common/file_utils.py +101 -25
  16. msprobe/core/common/framework_adapter.py +36 -25
  17. msprobe/core/common/global_lock.py +12 -11
  18. msprobe/core/common/inplace_op_checker.py +12 -11
  19. msprobe/core/common/log.py +22 -11
  20. msprobe/core/common/megatron_utils.py +566 -11
  21. msprobe/core/common/parallel_state.py +12 -11
  22. msprobe/core/common/runtime.py +12 -11
  23. msprobe/core/common/utils.py +41 -41
  24. msprobe/core/compare/acc_compare.py +361 -104
  25. msprobe/core/compare/atb_data_compare.py +422 -0
  26. msprobe/core/compare/auto_compare.py +134 -0
  27. msprobe/core/compare/check.py +14 -17
  28. msprobe/core/compare/compare_cli.py +72 -149
  29. msprobe/core/compare/config.py +12 -13
  30. msprobe/core/compare/diff_analyze/first_diff_analyze.py +28 -15
  31. msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
  32. msprobe/core/compare/find_first/analyzer.py +18 -18
  33. msprobe/core/compare/find_first/graph.py +12 -11
  34. msprobe/core/compare/find_first/utils.py +13 -12
  35. msprobe/core/compare/indicator_analysis/__init__.py +15 -0
  36. msprobe/core/compare/indicator_analysis/algorithm.py +363 -0
  37. msprobe/core/compare/indicator_analysis/api_data.py +141 -0
  38. msprobe/core/compare/indicator_analysis/calculator.py +181 -0
  39. msprobe/core/compare/indicator_analysis/utils.py +116 -0
  40. msprobe/core/compare/layer_mapping/__init__.py +12 -11
  41. msprobe/core/compare/layer_mapping/data_scope_parser.py +20 -11
  42. msprobe/core/compare/layer_mapping/layer_mapping.py +14 -13
  43. msprobe/core/compare/layer_mapping/postprocess_pass.py +13 -11
  44. msprobe/core/compare/merge_result/merge_result.py +12 -11
  45. msprobe/core/compare/merge_result/merge_result_cli.py +12 -11
  46. msprobe/core/compare/merge_result/utils.py +12 -11
  47. msprobe/core/compare/multiprocessing_compute.py +13 -14
  48. msprobe/core/compare/npy_compare.py +13 -11
  49. msprobe/core/compare/offline_data_compare.py +160 -0
  50. msprobe/core/compare/stats_diff_calc.py +39 -0
  51. msprobe/core/compare/torchair_acc_cmp.py +764 -0
  52. msprobe/core/compare/torchair_cmp_utils.py +338 -0
  53. msprobe/core/compare/utils.py +140 -49
  54. msprobe/core/config_check/__init__.py +12 -11
  55. msprobe/core/config_check/checkers/__init__.py +12 -11
  56. msprobe/core/config_check/checkers/base_checker.py +15 -14
  57. msprobe/core/config_check/checkers/dataset_checker.py +13 -12
  58. msprobe/core/config_check/checkers/env_args_checker.py +13 -12
  59. msprobe/core/config_check/checkers/hyperparameter_checker.py +16 -15
  60. msprobe/core/config_check/checkers/pip_checker.py +15 -15
  61. msprobe/core/config_check/checkers/random_checker.py +13 -12
  62. msprobe/core/config_check/checkers/weights_checker.py +14 -12
  63. msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +13 -17
  64. msprobe/core/config_check/ckpt_compare/megatron_loader.py +13 -12
  65. msprobe/core/config_check/ckpt_compare/metrics.py +12 -11
  66. msprobe/core/config_check/config_check_cli.py +18 -17
  67. msprobe/core/config_check/config_checker.py +16 -14
  68. msprobe/core/config_check/resource/dependency.yaml +15 -12
  69. msprobe/core/config_check/resource/env.yaml +12 -11
  70. msprobe/core/config_check/utils/hyperparameter_parser.py +12 -11
  71. msprobe/core/config_check/utils/utils.py +12 -11
  72. msprobe/core/{data_dump → dump/api_dump}/api_registry.py +12 -11
  73. msprobe/core/{common_config.py → dump/common_config.py} +13 -24
  74. msprobe/core/dump/data_dump/data_collector.py +257 -0
  75. msprobe/core/{data_dump → dump/data_dump}/data_processor/base.py +45 -36
  76. msprobe/core/{data_dump → dump/data_dump}/data_processor/factory.py +33 -25
  77. msprobe/core/{data_dump → dump/data_dump}/data_processor/mindspore_processor.py +37 -113
  78. msprobe/core/{data_dump → dump/data_dump}/data_processor/pytorch_processor.py +364 -131
  79. msprobe/core/{data_dump → dump/data_dump}/json_writer.py +24 -31
  80. msprobe/core/{data_dump → dump/data_dump}/scope.py +12 -13
  81. msprobe/core/{debugger → dump/debugger}/precision_debugger.py +15 -23
  82. msprobe/core/dump/dump2db/db_utils.py +215 -0
  83. msprobe/core/dump/dump2db/dump2db.py +409 -0
  84. msprobe/core/{hook_manager.py → dump/hook_manager.py} +38 -87
  85. msprobe/core/dump/kernel_dump/kernel_config.py +34 -0
  86. msprobe/core/{service.py → dump/service.py} +43 -27
  87. msprobe/core/install_deps/install_deps.py +51 -0
  88. msprobe/core/monitor/anomaly_processor.py +13 -11
  89. msprobe/core/monitor/csv2db.py +73 -93
  90. msprobe/core/monitor/db_utils.py +140 -205
  91. msprobe/core/monitor/utils.py +18 -17
  92. msprobe/core/monitor_v2/__init__.py +20 -0
  93. msprobe/core/monitor_v2/base.py +83 -0
  94. msprobe/core/monitor_v2/cc.py +287 -0
  95. msprobe/core/monitor_v2/factory.py +81 -0
  96. msprobe/core/monitor_v2/module.py +201 -0
  97. msprobe/core/monitor_v2/optimizer.py +245 -0
  98. msprobe/core/monitor_v2/param.py +154 -0
  99. msprobe/core/monitor_v2/trainer.py +326 -0
  100. msprobe/core/monitor_v2/utils.py +122 -0
  101. msprobe/core/monitor_v2/weight_grad.py +419 -0
  102. msprobe/core/monitor_v2/writer.py +162 -0
  103. msprobe/core/overflow_check/abnormal_scene.py +12 -11
  104. msprobe/core/overflow_check/api_info.py +12 -11
  105. msprobe/core/overflow_check/checker.py +12 -11
  106. msprobe/core/overflow_check/filter.py +13 -11
  107. msprobe/core/overflow_check/level.py +12 -11
  108. msprobe/core/overflow_check/utils.py +12 -11
  109. msprobe/core/single_save/single_comparator.py +12 -11
  110. msprobe/core/single_save/single_saver.py +12 -11
  111. msprobe/infer/__init__.py +16 -0
  112. msprobe/infer/offline/__init__.py +16 -0
  113. msprobe/infer/offline/compare/__init__.py +16 -0
  114. msprobe/infer/offline/compare/msquickcmp/__init__.py +16 -0
  115. msprobe/infer/offline/compare/msquickcmp/adapter_cli/__init__.py +16 -0
  116. msprobe/infer/offline/compare/msquickcmp/adapter_cli/args_adapter.py +46 -0
  117. msprobe/infer/offline/compare/msquickcmp/atc/__init__.py +16 -0
  118. msprobe/infer/offline/compare/msquickcmp/atc/atc_utils.py +98 -0
  119. msprobe/infer/offline/compare/msquickcmp/cmp_process.py +328 -0
  120. msprobe/infer/offline/compare/msquickcmp/common/__init__.py +16 -0
  121. msprobe/infer/offline/compare/msquickcmp/common/args_check.py +112 -0
  122. msprobe/infer/offline/compare/msquickcmp/common/convert.py +74 -0
  123. msprobe/infer/offline/compare/msquickcmp/common/dump_data.py +121 -0
  124. msprobe/infer/offline/compare/msquickcmp/common/dynamic_argument_bean.py +39 -0
  125. msprobe/infer/offline/compare/msquickcmp/common/utils.py +669 -0
  126. msprobe/infer/offline/compare/msquickcmp/config.ini +6 -0
  127. msprobe/infer/offline/compare/msquickcmp/dump/__init__.py +16 -0
  128. msprobe/infer/offline/compare/msquickcmp/dump/args_adapter.py +50 -0
  129. msprobe/infer/offline/compare/msquickcmp/dump/dump_process.py +91 -0
  130. msprobe/infer/offline/compare/msquickcmp/install_aclruntime_aisbench.sh +180 -0
  131. msprobe/infer/offline/compare/msquickcmp/main.py +199 -0
  132. msprobe/infer/offline/compare/msquickcmp/net_compare/__init__.py +16 -0
  133. msprobe/infer/offline/compare/msquickcmp/net_compare/net_compare.py +277 -0
  134. msprobe/infer/offline/compare/msquickcmp/npu/__init__.py +16 -0
  135. msprobe/infer/offline/compare/msquickcmp/npu/npu_dump_data.py +558 -0
  136. msprobe/infer/offline/compare/msquickcmp/npu/om_parser.py +416 -0
  137. msprobe/infer/offline/compare/msquickcmp/onnx_model/__init__.py +16 -0
  138. msprobe/infer/offline/compare/msquickcmp/onnx_model/onnx_dump_data.py +374 -0
  139. msprobe/infer/utils/__init__.py +15 -0
  140. msprobe/infer/utils/acc_cmp.py +94 -0
  141. msprobe/infer/utils/check/__init__.py +37 -0
  142. msprobe/infer/utils/check/args_checker.py +35 -0
  143. msprobe/infer/utils/check/checker.py +227 -0
  144. msprobe/infer/utils/check/dict_checker.py +78 -0
  145. msprobe/infer/utils/check/func_wrapper.py +96 -0
  146. msprobe/infer/utils/check/list_checker.py +56 -0
  147. msprobe/infer/utils/check/number_checker.py +64 -0
  148. msprobe/infer/utils/check/obj_checker.py +41 -0
  149. msprobe/infer/utils/check/path_checker.py +249 -0
  150. msprobe/infer/utils/check/rule.py +126 -0
  151. msprobe/infer/utils/check/string_checker.py +66 -0
  152. msprobe/infer/utils/cmp_algorithm.py +261 -0
  153. msprobe/infer/utils/constants.py +112 -0
  154. msprobe/infer/utils/file_open_check.py +337 -0
  155. msprobe/infer/utils/util.py +177 -0
  156. msprobe/mindspore/__init__.py +14 -13
  157. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +14 -13
  158. msprobe/mindspore/api_accuracy_checker/api_info.py +12 -11
  159. msprobe/mindspore/api_accuracy_checker/api_runner.py +12 -11
  160. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +12 -11
  161. msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +12 -11
  162. msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +12 -11
  163. msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +12 -11
  164. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +15 -14
  165. msprobe/mindspore/api_accuracy_checker/compute_element.py +12 -11
  166. msprobe/mindspore/api_accuracy_checker/data_manager.py +13 -11
  167. msprobe/mindspore/api_accuracy_checker/main.py +12 -11
  168. msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +14 -12
  169. msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +13 -11
  170. msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +12 -11
  171. msprobe/mindspore/api_accuracy_checker/type_mapping.py +12 -11
  172. msprobe/mindspore/api_accuracy_checker/utils.py +12 -11
  173. msprobe/mindspore/common/const.py +15 -74
  174. msprobe/mindspore/common/log.py +12 -11
  175. msprobe/mindspore/common/utils.py +30 -15
  176. msprobe/mindspore/compare/common_dir_compare.py +21 -23
  177. msprobe/mindspore/compare/distributed_compare.py +18 -16
  178. msprobe/mindspore/compare/ms_compare.py +14 -14
  179. msprobe/mindspore/compare/ms_graph_compare.py +26 -20
  180. msprobe/mindspore/compare/utils.py +14 -12
  181. msprobe/mindspore/{cell_processor.py → dump/cell_processor.py} +15 -14
  182. msprobe/mindspore/{debugger → dump/debugger}/debugger_config.py +12 -30
  183. msprobe/mindspore/{debugger → dump/debugger}/precision_debugger.py +43 -45
  184. msprobe/mindspore/dump/{cell_dump_process.py → dump_processor/cell_dump_process.py} +31 -17
  185. msprobe/mindspore/dump/{cell_dump_with_insert_gradient.py → dump_processor/cell_dump_with_insert_gradient.py} +18 -14
  186. msprobe/mindspore/dump/{dump_tool_factory.py → dump_processor/dump_tool_factory.py} +16 -15
  187. msprobe/mindspore/dump/{graph_mode_cell_dump.py → dump_processor/graph_mode_cell_dump.py} +16 -15
  188. msprobe/mindspore/dump/{graph_tensor_dump.py → dump_processor/graph_tensor_dump.py} +134 -133
  189. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/api_register.py +15 -14
  190. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/hook_cell.py +12 -11
  191. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/ms_hook_manager.py +47 -20
  192. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/primitive_hooks.py +14 -13
  193. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/support_wrap_ops.yaml +13 -11
  194. msprobe/mindspore/dump/{jit_dump.py → dump_processor/jit_dump.py} +14 -13
  195. msprobe/mindspore/dump/{kernel_graph_dump.py → dump_processor/kernel_graph_dump.py} +13 -12
  196. msprobe/mindspore/dump/{kernel_kbyk_dump.py → dump_processor/kernel_kbyk_dump.py} +13 -12
  197. msprobe/mindspore/{exception_dump → dump/exception_dump}/exception_dump_tool_factory.py +14 -13
  198. msprobe/mindspore/{exception_dump → dump/exception_dump}/kernel_graph_exception_dump.py +13 -12
  199. msprobe/mindspore/{mindspore_service.py → dump/mindspore_service.py} +18 -17
  200. msprobe/mindspore/dump/mindtorch/__init__.py +19 -0
  201. msprobe/mindspore/dump/ms_config.py +105 -0
  202. msprobe/mindspore/{overflow_check → dump/overflow_check}/kernel_graph_overflow_check.py +13 -12
  203. msprobe/mindspore/{overflow_check → dump/overflow_check}/overflow_check_tool_factory.py +14 -13
  204. msprobe/mindspore/dump/task_handler_factory.py +43 -0
  205. msprobe/mindspore/monitor/common_func.py +12 -11
  206. msprobe/mindspore/monitor/data_writers.py +12 -11
  207. msprobe/mindspore/monitor/distributed/wrap_distributed.py +93 -39
  208. msprobe/mindspore/monitor/features.py +12 -11
  209. msprobe/mindspore/monitor/module_hook.py +19 -22
  210. msprobe/mindspore/monitor/optimizer_collect.py +29 -25
  211. msprobe/mindspore/monitor/utils.py +13 -11
  212. msprobe/msaccucmp/advisor/__init__.py +16 -0
  213. msprobe/msaccucmp/advisor/advisor_const.py +65 -0
  214. msprobe/msaccucmp/advisor/advisor_result.py +73 -0
  215. msprobe/msaccucmp/advisor/compare_advisor.py +99 -0
  216. msprobe/msaccucmp/advisor/input_advisor.py +66 -0
  217. msprobe/msaccucmp/advisor/node_advisor.py +68 -0
  218. msprobe/msaccucmp/advisor/overflow_advisor.py +58 -0
  219. msprobe/msaccucmp/algorithm_manager/__init__.py +16 -0
  220. msprobe/msaccucmp/algorithm_manager/algorithm_manager.py +464 -0
  221. msprobe/msaccucmp/algorithm_manager/algorithm_parameter.py +42 -0
  222. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_AccumulatedRelativeError.py +46 -0
  223. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_CosineSimilarity.py +58 -0
  224. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_KullbackLeiblerDivergence.py +84 -0
  225. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxAbsoluteError.py +41 -0
  226. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxRelativeError.py +46 -0
  227. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanAbsoluteError.py +41 -0
  228. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanRelativeError.py +46 -0
  229. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RelativeEuclideanDistance.py +46 -0
  230. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RootMeanSquareError.py +40 -0
  231. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_StandardDeviation.py +47 -0
  232. msprobe/msaccucmp/cmp_utils/__init__.py +16 -0
  233. msprobe/msaccucmp/cmp_utils/common.py +113 -0
  234. msprobe/msaccucmp/cmp_utils/constant/__init__.py +16 -0
  235. msprobe/msaccucmp/cmp_utils/constant/compare_error.py +81 -0
  236. msprobe/msaccucmp/cmp_utils/constant/const_manager.py +530 -0
  237. msprobe/msaccucmp/cmp_utils/file_utils.py +497 -0
  238. msprobe/msaccucmp/cmp_utils/log.py +257 -0
  239. msprobe/msaccucmp/cmp_utils/multi_process/__init__.py +16 -0
  240. msprobe/msaccucmp/cmp_utils/multi_process/multi_convert_process.py +140 -0
  241. msprobe/msaccucmp/cmp_utils/multi_process/progress.py +78 -0
  242. msprobe/msaccucmp/cmp_utils/path_check.py +274 -0
  243. msprobe/msaccucmp/cmp_utils/reg_manager.py +98 -0
  244. msprobe/msaccucmp/cmp_utils/tlv_parse.py +279 -0
  245. msprobe/msaccucmp/cmp_utils/utils.py +356 -0
  246. msprobe/msaccucmp/cmp_utils/utils_type.py +63 -0
  247. msprobe/msaccucmp/compare_vector.py +48 -0
  248. msprobe/msaccucmp/conversion/__init__.py +16 -0
  249. msprobe/msaccucmp/conversion/data_conversion.py +277 -0
  250. msprobe/msaccucmp/conversion/dtype_conversion.py +99 -0
  251. msprobe/msaccucmp/conversion/shape_format_conversion.py +477 -0
  252. msprobe/msaccucmp/conversion/tensor_conversion.py +369 -0
  253. msprobe/msaccucmp/dump_data_conversion.py +46 -0
  254. msprobe/msaccucmp/dump_parse/__init__.py +16 -0
  255. msprobe/msaccucmp/dump_parse/big_dump_data.py +317 -0
  256. msprobe/msaccucmp/dump_parse/dump.py +423 -0
  257. msprobe/msaccucmp/dump_parse/dump_data_object.py +322 -0
  258. msprobe/msaccucmp/dump_parse/dump_data_parser.py +436 -0
  259. msprobe/msaccucmp/dump_parse/dump_utils.py +246 -0
  260. msprobe/msaccucmp/dump_parse/ffts_parser.py +137 -0
  261. msprobe/msaccucmp/dump_parse/mapping.py +62 -0
  262. msprobe/msaccucmp/dump_parse/nano_dump_data.py +392 -0
  263. msprobe/msaccucmp/dump_parse/proto_dump_data.py +308 -0
  264. msprobe/msaccucmp/dump_parser.py +90 -0
  265. msprobe/msaccucmp/format_manager/__init__.py +16 -0
  266. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NCHW.py +53 -0
  267. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_ND.py +52 -0
  268. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NHWC.py +53 -0
  269. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_HWCN.py +47 -0
  270. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_NCHW.py +47 -0
  271. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_FRACTAL_Z.py +89 -0
  272. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NCHW.py +37 -0
  273. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NHWC.py +37 -0
  274. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_HWCN.py +43 -0
  275. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NCHW.py +48 -0
  276. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NHWC.py +43 -0
  277. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_FRACTAL_Z.py +87 -0
  278. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_NHWC.py +37 -0
  279. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_NCDHW.py +48 -0
  280. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_ND.py +44 -0
  281. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_FRACTAL_Z.py +87 -0
  282. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_HWCN.py +37 -0
  283. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_NCHW.py +37 -0
  284. msprobe/msaccucmp/format_manager/format_manager.py +307 -0
  285. msprobe/msaccucmp/inplace_layer_process.py +186 -0
  286. msprobe/msaccucmp/msaccucmp.py +532 -0
  287. msprobe/msaccucmp/mscmp_advisor.py +128 -0
  288. msprobe/msaccucmp/overflow/__init__.py +16 -0
  289. msprobe/msaccucmp/overflow/overflow_analyse.py +305 -0
  290. msprobe/msaccucmp/overflow/overflow_detection.py +143 -0
  291. msprobe/msaccucmp/pytorch_cmp/__init__.py +16 -0
  292. msprobe/msaccucmp/pytorch_cmp/compare_pytorch.py +389 -0
  293. msprobe/msaccucmp/pytorch_cmp/hdf5_parser.py +377 -0
  294. msprobe/msaccucmp/pytorch_cmp/pytorch_dump_data.py +461 -0
  295. msprobe/msaccucmp/shape_conversion.py +41 -0
  296. msprobe/msaccucmp/vector_cmp/__init__.py +16 -0
  297. msprobe/msaccucmp/vector_cmp/batch_compare.py +197 -0
  298. msprobe/msaccucmp/vector_cmp/compare_detail/__init__.py +16 -0
  299. msprobe/msaccucmp/vector_cmp/compare_detail/compare_detail.py +245 -0
  300. msprobe/msaccucmp/vector_cmp/compare_detail/detail.py +182 -0
  301. msprobe/msaccucmp/vector_cmp/compare_detail/detail_writer.py +580 -0
  302. msprobe/msaccucmp/vector_cmp/fusion_manager/__init__.py +16 -0
  303. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_fusion_op.py +588 -0
  304. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_npu_vs_npu.py +339 -0
  305. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_result.py +326 -0
  306. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_rule.py +156 -0
  307. msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_op.py +204 -0
  308. msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_rule_parser.py +635 -0
  309. msprobe/msaccucmp/vector_cmp/fusion_manager/quant_filter.py +187 -0
  310. msprobe/msaccucmp/vector_cmp/range_manager/__init__.py +16 -0
  311. msprobe/msaccucmp/vector_cmp/range_manager/range_manager.py +100 -0
  312. msprobe/msaccucmp/vector_cmp/range_manager/range_mode.py +94 -0
  313. msprobe/msaccucmp/vector_cmp/range_manager/select_mode.py +86 -0
  314. msprobe/msaccucmp/vector_cmp/vector_comparison.py +535 -0
  315. msprobe/msprobe.py +101 -130
  316. msprobe/overflow_check/__init__.py +15 -0
  317. msprobe/{nan_analyze → overflow_check}/analyzer.py +38 -27
  318. msprobe/{nan_analyze → overflow_check}/graph.py +28 -27
  319. msprobe/{nan_analyze → overflow_check}/utils.py +15 -14
  320. msprobe/pytorch/__init__.py +20 -14
  321. msprobe/pytorch/aclgraph_dump/__init__.py +45 -0
  322. msprobe/pytorch/aclgraph_dump/_meta.py +26 -0
  323. msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut.py → acc_check/acc_check.py} +50 -45
  324. msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut_utils.py → acc_check/acc_check_utils.py} +201 -30
  325. msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/data_generate.py +56 -16
  326. msprobe/pytorch/api_accuracy_checker/{run_ut/multi_run_ut.py → acc_check/multi_acc_check.py} +32 -47
  327. msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/run_overflow_check.py +19 -18
  328. msprobe/pytorch/api_accuracy_checker/common/config.py +22 -20
  329. msprobe/pytorch/api_accuracy_checker/common/utils.py +72 -13
  330. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -11
  331. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +23 -14
  332. msprobe/pytorch/api_accuracy_checker/compare/compare.py +45 -32
  333. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +12 -11
  334. msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +14 -12
  335. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +14 -12
  336. msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +12 -11
  337. msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +12 -11
  338. msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +21 -19
  339. msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +14 -13
  340. msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +12 -11
  341. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +60 -11
  342. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +27 -16
  343. msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +13 -11
  344. msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +39 -18
  345. msprobe/pytorch/bench_functions/__init__.py +12 -11
  346. msprobe/pytorch/bench_functions/apply_adam.py +12 -11
  347. msprobe/pytorch/bench_functions/apply_adam_w.py +12 -11
  348. msprobe/pytorch/bench_functions/confusion_transpose.py +12 -11
  349. msprobe/pytorch/bench_functions/fast_gelu.py +12 -11
  350. msprobe/pytorch/bench_functions/group_norm_silu.py +12 -11
  351. msprobe/pytorch/bench_functions/layer_norm_eval.py +12 -11
  352. msprobe/pytorch/bench_functions/linear.py +12 -11
  353. msprobe/pytorch/bench_functions/matmul_backward.py +12 -11
  354. msprobe/pytorch/bench_functions/mish.py +12 -11
  355. msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +12 -11
  356. msprobe/pytorch/bench_functions/npu_fusion_attention.py +12 -11
  357. msprobe/pytorch/bench_functions/rms_norm.py +12 -11
  358. msprobe/pytorch/bench_functions/rotary_mul.py +12 -11
  359. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +12 -11
  360. msprobe/pytorch/bench_functions/sort_v2.py +12 -11
  361. msprobe/pytorch/bench_functions/swiglu.py +12 -11
  362. msprobe/pytorch/common/__init__.py +12 -11
  363. msprobe/pytorch/common/log.py +12 -11
  364. msprobe/pytorch/common/parse_json.py +12 -11
  365. msprobe/pytorch/common/utils.py +52 -19
  366. msprobe/pytorch/compare/distributed_compare.py +13 -13
  367. msprobe/pytorch/compare/match.py +12 -11
  368. msprobe/pytorch/compare/pt_compare.py +14 -20
  369. msprobe/pytorch/compare/pt_diff_analyze.py +12 -11
  370. msprobe/pytorch/compare/utils.py +12 -11
  371. msprobe/pytorch/{hook_module → dump/api_dump}/api_register.py +18 -16
  372. msprobe/pytorch/{hook_module → dump/api_dump}/hook_module.py +14 -13
  373. msprobe/pytorch/{hook_module → dump/api_dump}/pt_hook_manager.py +68 -23
  374. msprobe/pytorch/{hook_module → dump/api_dump}/register_optimizer_hook.py +13 -11
  375. msprobe/pytorch/{hook_module → dump/api_dump}/script_wrapper.py +17 -14
  376. msprobe/pytorch/{hook_module → dump/api_dump}/utils.py +12 -11
  377. msprobe/pytorch/{debugger → dump/debugger}/debugger_config.py +23 -38
  378. msprobe/pytorch/dump/debugger/precision_debugger.py +130 -0
  379. msprobe/pytorch/{function_factory.py → dump/function_factory.py} +12 -11
  380. msprobe/pytorch/dump/module_dump/hook_wrapper.py +17 -13
  381. msprobe/pytorch/dump/module_dump/module_dump.py +16 -15
  382. msprobe/pytorch/dump/module_dump/{module_processer.py → module_processor.py} +54 -42
  383. msprobe/pytorch/dump/pt_config.py +128 -0
  384. msprobe/pytorch/{pytorch_service.py → dump/pytorch_service.py} +22 -21
  385. msprobe/pytorch/monitor/csv2tb.py +13 -11
  386. msprobe/pytorch/monitor/data_writers.py +13 -11
  387. msprobe/pytorch/monitor/distributed/wrap_distributed.py +13 -11
  388. msprobe/pytorch/monitor/features.py +12 -11
  389. msprobe/pytorch/monitor/module_hook.py +67 -59
  390. msprobe/pytorch/monitor/module_metric.py +13 -11
  391. msprobe/pytorch/monitor/optimizer_collect.py +37 -35
  392. msprobe/pytorch/monitor/utils.py +13 -11
  393. msprobe/pytorch/monitor/visualizer.py +12 -11
  394. msprobe/pytorch/torchair_dump/__init__.py +17 -0
  395. msprobe/pytorch/torchair_dump/torchair_dump.py +114 -0
  396. msprobe/scripts/atb/config_example.json +10 -0
  397. msprobe/scripts/atb/load_atb_probe.sh +101 -0
  398. msprobe/scripts/atb/unload_atb_probe.sh +27 -0
  399. msprobe/scripts/build_msaccucmp.sh +186 -0
  400. msprobe/scripts/conf/help.info +6 -0
  401. msprobe/scripts/conf/version.info +3 -0
  402. msprobe/scripts/run_script/common.sh +538 -0
  403. msprobe/scripts/run_script/main_msaccucmp.sh +232 -0
  404. msprobe/visualization/__init__.py +12 -11
  405. msprobe/visualization/builder/__init__.py +12 -11
  406. msprobe/visualization/builder/graph_builder.py +45 -30
  407. msprobe/visualization/builder/graph_merger.py +53 -32
  408. msprobe/visualization/builder/msprobe_adapter.py +34 -44
  409. msprobe/visualization/compare/__init__.py +12 -11
  410. msprobe/visualization/compare/graph_comparator.py +63 -51
  411. msprobe/visualization/compare/mode_adapter.py +28 -113
  412. msprobe/visualization/db_utils.py +133 -22
  413. msprobe/visualization/graph/__init__.py +12 -11
  414. msprobe/visualization/graph/base_node.py +15 -27
  415. msprobe/visualization/graph/distributed_analyzer.py +97 -40
  416. msprobe/visualization/graph/graph.py +14 -16
  417. msprobe/visualization/graph/node_colors.py +34 -31
  418. msprobe/visualization/graph/node_op.py +12 -11
  419. msprobe/visualization/graph_service.py +580 -205
  420. msprobe/visualization/utils.py +278 -31
  421. tb_graph_ascend/secure_build.py +175 -0
  422. tb_graph_ascend/server/__init__.py +15 -0
  423. tb_graph_ascend/server/app/__init__.py +15 -0
  424. tb_graph_ascend/server/app/model/__init__.py +15 -0
  425. tb_graph_ascend/server/app/model/hierarchy.py +348 -0
  426. tb_graph_ascend/server/app/model/layout_hierarchy_model.py +69 -0
  427. tb_graph_ascend/server/app/model/match_nodes_model.py +573 -0
  428. tb_graph_ascend/server/app/repositories/__init__.py +15 -0
  429. tb_graph_ascend/server/app/repositories/graph_repo_base.py +32 -0
  430. tb_graph_ascend/server/app/repositories/graph_repo_db.py +879 -0
  431. tb_graph_ascend/server/app/repositories/graph_repo_vis.py +83 -0
  432. tb_graph_ascend/server/app/service/__init__.py +18 -0
  433. tb_graph_ascend/server/app/service/graph_service_base.py +158 -0
  434. tb_graph_ascend/server/app/service/graph_service_db.py +438 -0
  435. tb_graph_ascend/server/app/service/graph_service_factory.py +54 -0
  436. tb_graph_ascend/server/app/service/graph_service_vis.py +480 -0
  437. tb_graph_ascend/server/app/utils/__init__.py +15 -0
  438. tb_graph_ascend/server/app/utils/constant.py +80 -0
  439. tb_graph_ascend/server/app/utils/file_check_wrapper.py +46 -0
  440. tb_graph_ascend/server/app/utils/global_state.py +95 -0
  441. tb_graph_ascend/server/app/utils/graph_utils.py +661 -0
  442. tb_graph_ascend/server/app/utils/i18n.py +153 -0
  443. tb_graph_ascend/server/app/utils/request_method.py +46 -0
  444. tb_graph_ascend/server/app/views/__init__.py +15 -0
  445. tb_graph_ascend/server/app/views/graph_views.py +304 -0
  446. tb_graph_ascend/server/plugin.py +108 -0
  447. tb_graph_ascend/server/static/index.html +9250 -0
  448. tb_graph_ascend/server/static/index.js +21 -0
  449. tb_graph_ascend/setup.py +57 -0
  450. mindstudio_probe-8.3.3.dist-info/LICENSE +0 -201
  451. mindstudio_probe-8.3.3.dist-info/RECORD +0 -491
  452. mindstudio_probe-8.3.3.dist-info/entry_points.txt +0 -2
  453. mindstudio_probe-8.3.3.dist-info/top_level.txt +0 -1
  454. msprobe/CMakeLists.txt +0 -5
  455. msprobe/README.md +0 -203
  456. msprobe/core/advisor/advisor.py +0 -129
  457. msprobe/core/advisor/advisor_const.py +0 -58
  458. msprobe/core/advisor/advisor_result.py +0 -58
  459. msprobe/core/compare/find_first/data_processor.py +0 -35
  460. msprobe/core/compare/highlight.py +0 -390
  461. msprobe/core/data_dump/data_collector.py +0 -356
  462. msprobe/core/grad_probe/constant.py +0 -90
  463. msprobe/core/grad_probe/grad_compare.py +0 -187
  464. msprobe/core/grad_probe/utils.py +0 -105
  465. msprobe/core/kernel_dump/kernel_config.py +0 -33
  466. msprobe/docs/01.installation.md +0 -250
  467. msprobe/docs/02.config_introduction.md +0 -221
  468. msprobe/docs/03.config_examples.md +0 -281
  469. msprobe/docs/04.kernel_dump_PyTorch.md +0 -73
  470. msprobe/docs/05.data_dump_PyTorch.md +0 -518
  471. msprobe/docs/06.data_dump_MindSpore.md +0 -618
  472. msprobe/docs/07.accuracy_checker_PyTorch.md +0 -310
  473. msprobe/docs/09.accuracy_checker_MindSpore.md +0 -120
  474. msprobe/docs/10.accuracy_compare_PyTorch.md +0 -637
  475. msprobe/docs/11.accuracy_compare_MindSpore.md +0 -769
  476. msprobe/docs/12.overflow_check_PyTorch.md +0 -82
  477. msprobe/docs/13.overflow_check_MindSpore.md +0 -33
  478. msprobe/docs/14.data_parse_PyTorch.md +0 -282
  479. msprobe/docs/15.free_benchmarking_PyTorch.md +0 -169
  480. msprobe/docs/16.free_benchmarking_MindSpore.md +0 -159
  481. msprobe/docs/17.grad_probe.md +0 -205
  482. msprobe/docs/18.online_dispatch.md +0 -89
  483. msprobe/docs/19.monitor.md +0 -753
  484. msprobe/docs/20.monitor_performance_baseline.md +0 -52
  485. msprobe/docs/21.visualization_PyTorch.md +0 -519
  486. msprobe/docs/22.visualization_MindSpore.md +0 -515
  487. msprobe/docs/23.generate_operator_PyTorch.md +0 -107
  488. msprobe/docs/24.code_mapping_Mindspore.md +0 -29
  489. msprobe/docs/25.tool_function_introduction.md +0 -29
  490. msprobe/docs/26.data_dump_PyTorch_baseline.md +0 -48
  491. msprobe/docs/27.dump_json_instruction.md +0 -795
  492. msprobe/docs/28.debugger_save_instruction.md +0 -288
  493. msprobe/docs/28.kernel_dump_MindSpore.md +0 -69
  494. msprobe/docs/29.data_dump_MSAdapter.md +0 -235
  495. msprobe/docs/30.overflow_check_MSAdapter.md +0 -31
  496. msprobe/docs/31.config_check.md +0 -107
  497. msprobe/docs/32.ckpt_compare.md +0 -69
  498. msprobe/docs/33.generate_operator_MindSpore.md +0 -181
  499. msprobe/docs/34.RL_collect.md +0 -101
  500. msprobe/docs/35.nan_analyze.md +0 -73
  501. msprobe/docs/36.calculation_result_change.md +0 -75
  502. msprobe/docs/FAQ.md +0 -232
  503. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +0 -146
  504. msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +0 -14
  505. msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +0 -33
  506. msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +0 -217
  507. msprobe/docs/img/BLOOM-7B_1.png +0 -0
  508. msprobe/docs/img/BLOOM-7B_2.png +0 -0
  509. msprobe/docs/img/BLOOM-7B_3.png +0 -0
  510. msprobe/docs/img/BLOOM-7B_4.png +0 -0
  511. msprobe/docs/img/GPT-3_1.png +0 -0
  512. msprobe/docs/img/GPT-3_2.png +0 -0
  513. msprobe/docs/img/GPT-3_3.png +0 -0
  514. msprobe/docs/img/GPT-3_4.png +0 -0
  515. msprobe/docs/img/GPT-3_5.png +0 -0
  516. msprobe/docs/img/GPT-3_6.png +0 -0
  517. msprobe/docs/img/GPT-3_7.png +0 -0
  518. msprobe/docs/img/GPT-3_8.png +0 -0
  519. msprobe/docs/img/YOLOV5S_1.png +0 -0
  520. msprobe/docs/img/YOLOV5S_2.png +0 -0
  521. msprobe/docs/img/accuracy_checking_details.png +0 -0
  522. msprobe/docs/img/accuracy_checking_result.png +0 -0
  523. msprobe/docs/img/api_precision_compare_details.png +0 -0
  524. msprobe/docs/img/api_precision_compare_result.png +0 -0
  525. msprobe/docs/img/auto_analyze_log.png +0 -0
  526. msprobe/docs/img/compare_result.png +0 -0
  527. msprobe/docs/img/compare_result_pkl.png +0 -0
  528. msprobe/docs/img/compare_result_pkl_md5.png.png +0 -0
  529. msprobe/docs/img/cpu_info.png +0 -0
  530. msprobe/docs/img/free_benchmark.png +0 -0
  531. msprobe/docs/img/free_benchmark_framework.png +0 -0
  532. msprobe/docs/img/grad_probe_image-1.png +0 -0
  533. msprobe/docs/img/grad_probe_image-2.png +0 -0
  534. msprobe/docs/img/grad_probe_image-3.png +0 -0
  535. msprobe/docs/img/grad_probe_image-4.png +0 -0
  536. msprobe/docs/img/grad_probe_image.png +0 -0
  537. msprobe/docs/img/merge_result.png +0 -0
  538. msprobe/docs/img/module_compare.png +0 -0
  539. msprobe/docs/img/monitor/cpu_info.png +0 -0
  540. msprobe/docs/img/monitor/step_count_per_record.png +0 -0
  541. msprobe/docs/img/ms_dump.png +0 -0
  542. msprobe/docs/img/ms_layer.png +0 -0
  543. msprobe/docs/img/pt_dump.png +0 -0
  544. msprobe/docs/img/save_compare_result_sample.png +0 -0
  545. msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
  546. msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
  547. msprobe/docs/img/visualization/proxy.png +0 -0
  548. msprobe/docs/img/visualization/tensorboard_1.png +0 -0
  549. msprobe/docs/img/visualization/tensorboard_2.png +0 -0
  550. msprobe/docs/img/visualization/vis_browser_1.png +0 -0
  551. msprobe/docs/img/visualization/vis_browser_2.png +0 -0
  552. msprobe/docs/img/visualization/vis_match_info.png +0 -0
  553. msprobe/docs/img/visualization/vis_precision_info.png +0 -0
  554. msprobe/docs/img/visualization/vis_search_info.png +0 -0
  555. msprobe/docs/img/visualization/vis_show_info.png +0 -0
  556. msprobe/docs/img/visualization/vis_showcase.png +0 -0
  557. msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
  558. msprobe/docs/visualization/GPTModel.png +0 -0
  559. msprobe/docs/visualization/ParallelMLP.png +0 -0
  560. msprobe/docs/visualization/layer_mapping_example.md +0 -132
  561. msprobe/docs/visualization/mapping.png +0 -0
  562. msprobe/docs/visualization/mapping1.png +0 -0
  563. msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
  564. msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
  565. msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
  566. msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
  567. msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
  568. msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
  569. msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
  570. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +0 -59
  571. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
  572. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
  573. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +0 -80
  574. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
  575. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
  576. msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +0 -330
  577. msprobe/docs/visualization/module_name.png +0 -0
  578. msprobe/docs/visualization/module_name1.png +0 -0
  579. msprobe/docs/visualization/no_mapping.png +0 -0
  580. msprobe/docs/visualization/no_mapping1.png +0 -0
  581. msprobe/docs/visualization/no_mapping_analyze.png +0 -0
  582. msprobe/docs/visualization/top_layer.png +0 -0
  583. msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +0 -460
  584. msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +0 -2081
  585. msprobe/mindspore/code_mapping/bind.py +0 -283
  586. msprobe/mindspore/code_mapping/cmd_parser.py +0 -40
  587. msprobe/mindspore/code_mapping/graph.py +0 -49
  588. msprobe/mindspore/code_mapping/graph_parser.py +0 -211
  589. msprobe/mindspore/code_mapping/main.py +0 -24
  590. msprobe/mindspore/code_mapping/processor.py +0 -34
  591. msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +0 -111
  592. msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -52
  593. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +0 -257
  594. msprobe/mindspore/free_benchmark/common/config.py +0 -27
  595. msprobe/mindspore/free_benchmark/common/handler_params.py +0 -31
  596. msprobe/mindspore/free_benchmark/common/utils.py +0 -100
  597. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -638
  598. msprobe/mindspore/free_benchmark/handler/base_handler.py +0 -105
  599. msprobe/mindspore/free_benchmark/handler/check_handler.py +0 -55
  600. msprobe/mindspore/free_benchmark/handler/fix_handler.py +0 -51
  601. msprobe/mindspore/free_benchmark/handler/handler_factory.py +0 -36
  602. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +0 -82
  603. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +0 -45
  604. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +0 -78
  605. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +0 -77
  606. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +0 -56
  607. msprobe/mindspore/free_benchmark/perturbation/no_change.py +0 -27
  608. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +0 -46
  609. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +0 -51
  610. msprobe/mindspore/grad_probe/global_context.py +0 -127
  611. msprobe/mindspore/grad_probe/grad_analyzer.py +0 -260
  612. msprobe/mindspore/grad_probe/grad_monitor.py +0 -42
  613. msprobe/mindspore/grad_probe/grad_stat_csv.py +0 -161
  614. msprobe/mindspore/grad_probe/hook.py +0 -115
  615. msprobe/mindspore/grad_probe/utils.py +0 -43
  616. msprobe/mindspore/mindtorch/__init__.py +0 -18
  617. msprobe/mindspore/ms_config.py +0 -153
  618. msprobe/mindspore/task_handler_factory.py +0 -44
  619. msprobe/nan_analyze/__init__.py +0 -14
  620. msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +0 -9
  621. msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +0 -480
  622. msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +0 -567
  623. msprobe/pytorch/debugger/precision_debugger.py +0 -181
  624. msprobe/pytorch/free_benchmark/__init__.py +0 -23
  625. msprobe/pytorch/free_benchmark/common/constant.py +0 -85
  626. msprobe/pytorch/free_benchmark/common/counter.py +0 -87
  627. msprobe/pytorch/free_benchmark/common/enums.py +0 -80
  628. msprobe/pytorch/free_benchmark/common/params.py +0 -152
  629. msprobe/pytorch/free_benchmark/common/utils.py +0 -143
  630. msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -215
  631. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +0 -121
  632. msprobe/pytorch/free_benchmark/main.py +0 -123
  633. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +0 -28
  634. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +0 -56
  635. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +0 -107
  636. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +0 -121
  637. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +0 -89
  638. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +0 -87
  639. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +0 -43
  640. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +0 -60
  641. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +0 -34
  642. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +0 -252
  643. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +0 -54
  644. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +0 -40
  645. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +0 -45
  646. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -181
  647. msprobe/pytorch/grad_probe/__init__.py +0 -0
  648. msprobe/pytorch/grad_probe/grad_monitor.py +0 -108
  649. msprobe/pytorch/grad_probe/grad_stat_csv.py +0 -160
  650. msprobe/pytorch/hook_module/__init__.py +0 -16
  651. msprobe/pytorch/hook_module/wrap_aten.py +0 -111
  652. msprobe/pytorch/online_dispatch/__init__.py +0 -19
  653. msprobe/pytorch/online_dispatch/compare.py +0 -224
  654. msprobe/pytorch/online_dispatch/dispatch.py +0 -332
  655. msprobe/pytorch/online_dispatch/dump_compare.py +0 -179
  656. msprobe/pytorch/online_dispatch/single_compare.py +0 -412
  657. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +0 -58
  658. msprobe/pytorch/online_dispatch/utils.py +0 -158
  659. msprobe/pytorch/parse_tool/__init__.py +0 -0
  660. msprobe/pytorch/parse_tool/cli.py +0 -31
  661. msprobe/pytorch/parse_tool/lib/__init__.py +0 -0
  662. msprobe/pytorch/parse_tool/lib/compare.py +0 -253
  663. msprobe/pytorch/parse_tool/lib/config.py +0 -50
  664. msprobe/pytorch/parse_tool/lib/file_desc.py +0 -45
  665. msprobe/pytorch/parse_tool/lib/interactive_cli.py +0 -97
  666. msprobe/pytorch/parse_tool/lib/parse_exception.py +0 -54
  667. msprobe/pytorch/parse_tool/lib/parse_tool.py +0 -161
  668. msprobe/pytorch/parse_tool/lib/utils.py +0 -299
  669. msprobe/pytorch/parse_tool/lib/visualization.py +0 -85
  670. msprobe/pytorch/pt_config.py +0 -299
  671. /msprobe/core/{grad_probe → dump}/__init__.py +0 -0
  672. /msprobe/{mindspore/code_mapping → core/dump/api_dump}/__init__.py +0 -0
  673. /msprobe/{mindspore/debugger → core/dump/data_dump}/__init__.py +0 -0
  674. /msprobe/{mindspore/exception_dump → core/dump/data_dump/data_processor}/__init__.py +0 -0
  675. /msprobe/{mindspore/free_benchmark → core/dump/debugger}/__init__.py +0 -0
  676. /msprobe/{mindspore/free_benchmark/common → core/dump/kernel_dump}/__init__.py +0 -0
  677. /msprobe/mindspore/{free_benchmark/handler → dump/debugger}/__init__.py +0 -0
  678. /msprobe/mindspore/{grad_probe → dump/dump_processor}/__init__.py +0 -0
  679. /msprobe/mindspore/{overflow_check → dump/exception_dump}/__init__.py +0 -0
  680. /msprobe/mindspore/{mindtorch → dump/mindtorch}/mindtorch_adaptor.py +0 -0
  681. /msprobe/{pytorch/api_accuracy_checker/run_ut → mindspore/dump/overflow_check}/__init__.py +0 -0
  682. /msprobe/{pytorch/debugger → mindspore/monitor}/__init__.py +0 -0
  683. /msprobe/{pytorch/free_benchmark/common → msaccucmp}/__init__.py +0 -0
  684. /msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/.keep +0 -0
  685. /msprobe/pytorch/{free_benchmark/perturbed_layers → api_accuracy_checker/acc_check}/__init__.py +0 -0
  686. /msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/torch_ut_setting.json +0 -0
  687. /msprobe/pytorch/{free_benchmark/perturbed_layers/npu → dump/api_dump}/__init__.py +0 -0
  688. /msprobe/pytorch/{hook_module → dump/api_dump}/support_wrap_ops.yaml +0 -0
  689. /msprobe/pytorch/{free_benchmark/result_handlers → dump/debugger}/__init__.py +0 -0
@@ -0,0 +1,363 @@
1
+ # -------------------------------------------------------------------------
2
+ # This file is part of the MindStudio project.
3
+ # Copyright (c) 2025 Huawei Technologies Co.,Ltd.
4
+ #
5
+ # MindStudio is licensed under Mulan PSL v2.
6
+ # You can use this software according to the terms and conditions of the Mulan PSL v2.
7
+ # You may obtain a copy of Mulan PSL v2 at:
8
+ #
9
+ # http://license.coscl.org.cn/MulanPSL2
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12
+ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13
+ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14
+ # See the Mulan PSL v2 for more details.
15
+ # -------------------------------------------------------------------------
16
+
17
+
18
+ from abc import ABC, abstractmethod
19
+ from msprobe.core.compare.indicator_analysis.api_data import ApiData
20
+ from msprobe.core.compare.indicator_analysis.utils import is_inf_or_nan, str2float, ResultLevel, IgnoreInfo, \
21
+ get_data_list_by_ignore_info
22
+ from msprobe.core.common.const import CompareConst
23
+
24
+
25
+ class BaseAlgorithm(ABC):
26
+ """比对算法基类"""
27
+
28
+ @abstractmethod
29
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
30
+ """
31
+ 算法执行接口
32
+ :param api_data: 结构化的 API 数据
33
+ :param ignore_info: 当前 API 数据需要忽略的指标信息
34
+ """
35
+ pass
36
+
37
+
38
+ class InfNanErrChecker(BaseAlgorithm):
39
+ """
40
+ 适用于真实数据模式、统计数据模式
41
+ 一个 API 或模块的 NPU 的最大值或最小值中存在 nan/inf/-inf 标记为 error
42
+ 但如果 bench 侧也有相同现象,则忽略
43
+ """
44
+
45
+ def __init__(self):
46
+ self.result_level = ResultLevel.ERROR
47
+ self.err_msg = f'{self.result_level.value}: There is nan/inf/-inf in the maximum or minimum value of NPU.'
48
+
49
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
50
+ data_lists = get_data_list_by_ignore_info(api_data, ignore_info)
51
+ if not data_lists:
52
+ return
53
+
54
+ for data_list in data_lists:
55
+ bench_max = api_data.get_data_by_header(CompareConst.BENCH_MAX, data_list)
56
+ bench_min = api_data.get_data_by_header(CompareConst.BENCH_MIN, data_list)
57
+
58
+ if is_inf_or_nan(bench_max) or is_inf_or_nan(bench_min):
59
+ continue
60
+
61
+ npu_max = api_data.get_data_by_header(CompareConst.NPU_MAX, data_list)
62
+ npu_min = api_data.get_data_by_header(CompareConst.NPU_MIN, data_list)
63
+
64
+ if is_inf_or_nan(npu_max) or is_inf_or_nan(npu_min):
65
+ api_data.set_result(data_list, self.result_level)
66
+ api_data.set_err_msg(data_list, self.err_msg)
67
+
68
+
69
+ class RelativeErrChecker(BaseAlgorithm):
70
+ """
71
+ 适用于统计数据模式
72
+ 指标需要结合输入和输出共同计算得到
73
+ 一个 API 或模块的 input 的相对误差 < 0.1 且 output 的相对误差 > 0.5,默认选取norm relative err观测, 标记为 error
74
+ """
75
+
76
+ def __init__(self):
77
+ self.in_threshold = 0.1
78
+ self.out_threshold = 0.5
79
+ self.result_level = ResultLevel.ERROR
80
+ self.err_msg = (f'{self.result_level.value}: The {CompareConst.NORM_RELATIVE_ERR} of output '
81
+ f'is greater than {self.out_threshold}.')
82
+
83
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
84
+ if ignore_info in [IgnoreInfo.ALL_IGNORE, IgnoreInfo.INPUT_IGNORE]:
85
+ return
86
+
87
+ norm_relative_err_max = abs(api_data.get_min_or_max_value(CompareConst.NORM_RELATIVE_ERR, is_min=False))
88
+
89
+ if norm_relative_err_max < self.in_threshold:
90
+ for data_list in api_data.output_data:
91
+ norm_relative_err = str2float(api_data.get_data_by_header(CompareConst.NORM_RELATIVE_ERR, data_list))
92
+ if abs(norm_relative_err) > self.out_threshold:
93
+ api_data.set_result(data_list, self.result_level)
94
+ api_data.set_err_msg(data_list, self.err_msg)
95
+
96
+
97
+ class OneThousandthErrChecker(BaseAlgorithm):
98
+ """
99
+ 适用于真实数据模式
100
+ 指标需要结合输入和输出共同计算得到
101
+ 一个 API 或模块的 One Thousandth Err Ratio 的 input/parameters > 0.9 同时 output < 0.6, 标记为 error
102
+ """
103
+
104
+ def __init__(self):
105
+ self.input_threshold = 0.9
106
+ self.output_threshold = 0.1
107
+ self.result_level = ResultLevel.ERROR
108
+ self.err_msg = (f'{self.result_level.value}: The input/parameters of '
109
+ f'One Thousandth Err Ratio > 0.9 while the output < 0.6.')
110
+
111
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
112
+ if ignore_info in [IgnoreInfo.ALL_IGNORE, IgnoreInfo.INPUT_IGNORE]:
113
+ return
114
+ if not api_data.output_data:
115
+ return
116
+
117
+ min_input_ratio = api_data.get_min_or_max_value(CompareConst.ONE_THOUSANDTH_ERR_RATIO)
118
+ min_output_ratio = api_data.get_min_or_max_value(CompareConst.ONE_THOUSANDTH_ERR_RATIO, is_input=False)
119
+
120
+ if min_input_ratio > self.input_threshold and min_output_ratio < self.output_threshold:
121
+ api_data.set_result(api_data.output_data[0], self.result_level)
122
+ api_data.set_err_msg(api_data.output_data[0], self.err_msg)
123
+
124
+
125
+ class RequiresGradErrChecker(BaseAlgorithm):
126
+ """
127
+ 适用于真实数据模式、统计数据模式
128
+ 一个 API 或模块的 Requires_grad Consistent 为 False,标记为 error
129
+ """
130
+
131
+ def __init__(self):
132
+ self.result_level = ResultLevel.ERROR
133
+ self.err_msg = f'{self.result_level.value}: The Required_Grad of NPU and Bench are inconsistent'
134
+
135
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
136
+ data_lists = get_data_list_by_ignore_info(api_data, ignore_info)
137
+ if not data_lists:
138
+ return
139
+
140
+ for data_list in data_lists:
141
+ # not match
142
+ bench_name = api_data.get_data_by_header(CompareConst.BENCH_NAME, data_list)
143
+ if not bench_name or bench_name == CompareConst.N_A:
144
+ continue
145
+
146
+ npu_req_grad = api_data.get_data_by_header(CompareConst.NPU_REQ_GRAD, data_list)
147
+ bench_req_grad = api_data.get_data_by_header(CompareConst.BENCH_REQ_GRAD, data_list)
148
+
149
+ if not npu_req_grad or not bench_req_grad:
150
+ continue
151
+
152
+ if npu_req_grad != bench_req_grad:
153
+ api_data.set_result(data_list, self.result_level)
154
+ api_data.set_err_msg(data_list, self.err_msg)
155
+
156
+
157
+ class ParametersErrChecker(BaseAlgorithm):
158
+ """
159
+ 适用于真实数据模式、统计数据模式
160
+ 一个 API 或模块的非 tensor 标量参数,NPU 和 Bench 不一致,标记为 error
161
+ """
162
+
163
+ def __init__(self):
164
+ self.result_level = ResultLevel.ERROR
165
+ self.err_msg = f'{self.result_level.value}: The scalar parameters of NPU and Bench are inconsistent.'
166
+
167
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
168
+ if ignore_info in [IgnoreInfo.ALL_IGNORE, IgnoreInfo.INPUT_IGNORE]:
169
+ return
170
+
171
+ for data_list in api_data.input_data:
172
+ # not match
173
+ bench_name = api_data.get_data_by_header(CompareConst.BENCH_NAME, data_list)
174
+ if not bench_name or bench_name == CompareConst.N_A:
175
+ continue
176
+
177
+ npu_dtype = api_data.get_data_by_header(CompareConst.NPU_DTYPE, data_list)
178
+ bench_dtype = api_data.get_data_by_header(CompareConst.BENCH_DTYPE, data_list)
179
+ if not npu_dtype or not bench_dtype:
180
+ continue
181
+ # 非tensor标量的dtype一定包含'class',例如int类型的dtype为<class int>
182
+ if 'class' not in npu_dtype or 'class' not in bench_dtype:
183
+ continue
184
+
185
+ npu_shape = api_data.get_data_by_header(CompareConst.NPU_SHAPE, data_list)
186
+ # 以shape是否为[]判断其是否为标量
187
+ if str(npu_shape) != '[]':
188
+ continue
189
+ npu_max = api_data.get_data_by_header(CompareConst.NPU_MAX, data_list)
190
+ bench_max = api_data.get_data_by_header(CompareConst.BENCH_MAX, data_list)
191
+ if npu_max != bench_max:
192
+ api_data.set_result(data_list, self.result_level)
193
+ api_data.set_err_msg(data_list, self.err_msg)
194
+
195
+
196
+ class CRC32ErrChecker(BaseAlgorithm):
197
+ """
198
+ 适用于MD5模式
199
+ NPU 与标杆的 CRC-32 值不一致,标记为 error
200
+ NPU 与标杆的参数未匹配上,标记为 warning
201
+ """
202
+
203
+ def __init__(self):
204
+ self.err_level = ResultLevel.ERROR
205
+ self.err_msg = f'{self.err_level.value}: The CRC-32 value of NPU differs from that of the bench.'
206
+ self.warn_level = ResultLevel.WARNING
207
+ self.warn_msg = f'{self.warn_level.value}: The parameter of NPU does not match the bench.'
208
+
209
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
210
+ data_lists = get_data_list_by_ignore_info(api_data, ignore_info)
211
+ if not data_lists:
212
+ return
213
+
214
+ null_set = (CompareConst.N_A, CompareConst.NAN)
215
+ for data_list in data_lists:
216
+ npu_md5 = api_data.get_data_by_header(CompareConst.NPU_MD5, data_list)
217
+ bench_md5 = api_data.get_data_by_header(CompareConst.BENCH_MD5, data_list)
218
+ if npu_md5 != bench_md5:
219
+ # 参数未匹配上
220
+ if npu_md5 in null_set or bench_md5 in null_set:
221
+ api_data.set_result(data_list, self.warn_level)
222
+ api_data.set_err_msg(data_list, self.warn_msg)
223
+ else:
224
+ api_data.set_result(data_list, self.err_level)
225
+ api_data.set_err_msg(data_list, self.err_msg)
226
+
227
+
228
+ class DTypeErrChecker(BaseAlgorithm):
229
+ """
230
+ 适用于真实数据模式、统计数据模式
231
+ 一个 API 或模块的 dtype 不一致,标记为 error
232
+ """
233
+
234
+ def __init__(self):
235
+ self.result_level = ResultLevel.ERROR
236
+ self.err_msg = f'{self.result_level.value}: The dtype of NPU and Bench are inconsistent.'
237
+
238
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
239
+ data_lists = get_data_list_by_ignore_info(api_data, ignore_info)
240
+ if not data_lists:
241
+ return
242
+
243
+ for data_list in data_lists:
244
+ # not match
245
+ bench_name = api_data.get_data_by_header(CompareConst.BENCH_NAME, data_list)
246
+ if not bench_name or bench_name == CompareConst.N_A:
247
+ continue
248
+
249
+ npu_dtype = api_data.get_data_by_header(CompareConst.NPU_DTYPE, data_list)
250
+ bench_dtype = api_data.get_data_by_header(CompareConst.BENCH_DTYPE, data_list)
251
+
252
+ if not npu_dtype or not bench_dtype:
253
+ continue
254
+
255
+ if npu_dtype != bench_dtype:
256
+ api_data.set_result(data_list, self.result_level)
257
+ api_data.set_err_msg(data_list, self.err_msg)
258
+
259
+
260
+ class ShapeErrChecker(BaseAlgorithm):
261
+ """
262
+ 适用于真实数据模式、统计数据模式
263
+ 一个 API 或模块的 shape 不一致,标记为 error
264
+ """
265
+
266
+ def __init__(self):
267
+ self.result_level = ResultLevel.ERROR
268
+ self.err_msg = f'{self.result_level.value}: The shape of NPU and Bench are inconsistent.'
269
+
270
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
271
+ data_lists = get_data_list_by_ignore_info(api_data, ignore_info)
272
+ if not data_lists:
273
+ return
274
+ for data_list in data_lists:
275
+ # not match
276
+ bench_name = api_data.get_data_by_header(CompareConst.BENCH_NAME, data_list)
277
+ if not bench_name or bench_name == CompareConst.N_A:
278
+ continue
279
+
280
+ npu_shape = api_data.get_data_by_header(CompareConst.NPU_SHAPE, data_list)
281
+ bench_shape = api_data.get_data_by_header(CompareConst.BENCH_SHAPE, data_list)
282
+
283
+ if npu_shape is None or bench_shape is None:
284
+ continue
285
+
286
+ if npu_shape != bench_shape:
287
+ api_data.set_result(data_list, self.result_level)
288
+ api_data.set_err_msg(data_list, self.err_msg)
289
+
290
+
291
+ class RelativeWarnChecker(BaseAlgorithm):
292
+ """
293
+ 适用于统计数据模式
294
+ 指标需要结合输入和输出共同计算得到
295
+ 一个 API 或模块的 output 相对误差是 input 相对误差的10倍,标记为 warning,默认选取norm观测
296
+ """
297
+
298
+ def __init__(self):
299
+ self.threshold = 10
300
+ self.result_level = ResultLevel.WARNING
301
+ self.err_msg = (f'{self.result_level.value}: The norm relative error of output '
302
+ f'is {self.threshold} times that of input.')
303
+
304
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
305
+ if ignore_info in [IgnoreInfo.ALL_IGNORE, IgnoreInfo.INPUT_IGNORE]:
306
+ return
307
+
308
+ if not api_data.output_data:
309
+ return
310
+
311
+ norm_relative_error_max_in = abs(api_data.get_min_or_max_value(CompareConst.NORM_RELATIVE_ERR, is_min=False))
312
+ norm_relative_error_max_out = abs(
313
+ api_data.get_min_or_max_value(CompareConst.NORM_RELATIVE_ERR, is_input=False, is_min=False))
314
+
315
+ should_set = False
316
+
317
+ if norm_relative_error_max_in == 0:
318
+ if norm_relative_error_max_out > 0.1:
319
+ should_set = True
320
+ elif norm_relative_error_max_out / norm_relative_error_max_in > self.threshold:
321
+ should_set = True
322
+
323
+ if should_set:
324
+ api_data.set_result(api_data.output_data[0], self.result_level)
325
+ api_data.set_err_msg(api_data.output_data[0], self.err_msg)
326
+
327
+
328
+ class CosineWarnChecker(BaseAlgorithm):
329
+ """
330
+ 适用于真实数据模式
331
+ 指标需要结合输入和输出共同计算得到
332
+ 一个 API 或模块的 Cosine 的 input/parameters > 0.9 且 input/parameters - output > 0.1
333
+ """
334
+
335
+ def __init__(self):
336
+ self.input_threshold = 0.9
337
+ self.output_threshold = 0.1
338
+ self.result_level = ResultLevel.WARNING
339
+ self.err_msg = (f'{self.result_level.value}: The input/parameters of Cosine > {self.input_threshold}, '
340
+ f'and input/parameters - output > {self.output_threshold}')
341
+
342
+ def run(self, api_data: ApiData, ignore_info: IgnoreInfo):
343
+ if ignore_info in [IgnoreInfo.ALL_IGNORE, IgnoreInfo.INPUT_IGNORE]:
344
+ return
345
+
346
+ if not api_data.output_data:
347
+ return
348
+
349
+ min_input_cosine = api_data.get_min_or_max_value(CompareConst.COSINE)
350
+ min_output_cosine = api_data.get_min_or_max_value(CompareConst.COSINE, is_input=False)
351
+
352
+ if min_input_cosine > self.input_threshold and min_input_cosine - min_output_cosine > self.output_threshold:
353
+ api_data.set_result(api_data.output_data[0], self.result_level)
354
+ api_data.set_err_msg(api_data.output_data[0], self.err_msg)
355
+
356
+
357
+ TENSOR_CHECKERS = [InfNanErrChecker, OneThousandthErrChecker, RequiresGradErrChecker, ParametersErrChecker,
358
+ DTypeErrChecker, ShapeErrChecker, CosineWarnChecker]
359
+ STATISTICS_CHECKERS = [InfNanErrChecker, RelativeErrChecker, RequiresGradErrChecker, ParametersErrChecker,
360
+ DTypeErrChecker, ShapeErrChecker, RelativeWarnChecker]
361
+ MD5_CHECKERS = [CRC32ErrChecker]
362
+ STATISTICS_CHECKERS_PARALLEL_MERGE = [InfNanErrChecker, RelativeErrChecker, RequiresGradErrChecker,
363
+ ParametersErrChecker, DTypeErrChecker, RelativeWarnChecker]
@@ -0,0 +1,141 @@
1
+ # -------------------------------------------------------------------------
2
+ # This file is part of the MindStudio project.
3
+ # Copyright (c) 2025 Huawei Technologies Co.,Ltd.
4
+ #
5
+ # MindStudio is licensed under Mulan PSL v2.
6
+ # You can use this software according to the terms and conditions of the Mulan PSL v2.
7
+ # You may obtain a copy of Mulan PSL v2 at:
8
+ #
9
+ # http://license.coscl.org.cn/MulanPSL2
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12
+ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13
+ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14
+ # See the Mulan PSL v2 for more details.
15
+ # -------------------------------------------------------------------------
16
+
17
+ from typing import List
18
+
19
+ from msprobe.core.compare.indicator_analysis.utils import CompareMode, ResultLevel, str2float
20
+ from msprobe.core.common.const import Const, CompareConst
21
+ from msprobe.core.common.log import logger
22
+ from msprobe.core.common.exceptions import MsprobeException
23
+
24
+
25
+ class ApiData:
26
+ header_index_mapping_cache = {}
27
+
28
+ def __init__(self, mode, data_lists: List[List]):
29
+ self.input_data = []
30
+ self.output_data = []
31
+ self.mode = mode
32
+ self.data_lists = data_lists
33
+ self.header = self._get_header()
34
+ # 表头与索引映射,可以基于表头拿到索引,从input_data和output_data中获取对应数据
35
+ self.header_index_mapping = self.get_header_index_mapping()
36
+ self._init_data()
37
+
38
+ def get_header_index_mapping(self):
39
+ if self.mode in ApiData.header_index_mapping_cache:
40
+ return ApiData.header_index_mapping_cache[self.mode]
41
+
42
+ mapping = {item: index for index, item in enumerate(self.header)}
43
+ ApiData.header_index_mapping_cache[self.mode] = mapping
44
+ return mapping
45
+
46
+ def get_data_by_header(self, header: str, data_list: List):
47
+ """
48
+ 基于表头从data list获取数据
49
+ """
50
+ index = self.header_index_mapping.get(header)
51
+ try:
52
+ data = data_list[index]
53
+ except Exception as e:
54
+ logger.error(f'Unable to get data from the data list based on the header: {e}')
55
+ raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) from e
56
+ return data
57
+
58
+ def set_data_by_header(self, header: str, data_list: List, new_data):
59
+ index = self.header_index_mapping.get(header)
60
+ try:
61
+ data_list[index] = new_data
62
+ except Exception as e:
63
+ logger.error(f'Unable to set data from the data list based on the header: {e}')
64
+ raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) from e
65
+
66
+ def set_result(self, data_list: List, status: ResultLevel = ResultLevel.PASS):
67
+ index = self.header_index_mapping.get(CompareConst.RESULT)
68
+ try:
69
+ current_status = data_list[index]
70
+ if not isinstance(current_status, ResultLevel) or status > current_status:
71
+ data_list[index] = status
72
+ except Exception as e:
73
+ logger.error(f'Unable to set status from the data list based on the header: {e}')
74
+ raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) from e
75
+
76
+ def set_err_msg(self, data_list: List, msg: str = '', init_msg=False):
77
+ index = self.header_index_mapping.get(CompareConst.ERROR_MESSAGE)
78
+ try:
79
+ if init_msg:
80
+ data_list[index] = []
81
+ else:
82
+ current_msg = data_list[index]
83
+ if not isinstance(current_msg, list):
84
+ current_msg = [current_msg] if current_msg else []
85
+ data_list[index] = current_msg
86
+ if msg:
87
+ current_msg.append(msg)
88
+ except Exception as e:
89
+ logger.error(f'Unable to set err msg from the data list based on the header: {e}')
90
+ raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR) from e
91
+
92
+ def get_min_or_max_value(self, header, is_input=True, is_min=True):
93
+ """
94
+ 获取多个输入或输出参数中的最小或最大指标
95
+ """
96
+ default_value = 1.0 if is_min else 0.0
97
+ data_lists = self.input_data if is_input else self.output_data
98
+ for data_list in data_lists:
99
+ value = self.get_data_by_header(header, data_list)
100
+
101
+ if value is None or value in [CompareConst.NAN, CompareConst.N_A]:
102
+ continue
103
+
104
+ if isinstance(value, str) and value.endswith('%'):
105
+ value = str2float(value)
106
+
107
+ if isinstance(value, str):
108
+ continue
109
+
110
+ default_value = min(default_value, value) if is_min else max(default_value, value)
111
+ return default_value
112
+
113
+ def _get_header(self):
114
+ if self.mode == CompareMode.STATISTICS.value:
115
+ return CompareConst.SUMMARY_COMPARE_RESULT_HEADER
116
+ elif self.mode == CompareMode.TENSOR.value:
117
+ return CompareConst.COMPARE_RESULT_HEADER
118
+ elif self.mode == CompareMode.MD5.value:
119
+ return CompareConst.MD5_COMPARE_RESULT_HEADER
120
+ else:
121
+ logger.error(f'The parameter "mode" error, '
122
+ f'expected {CompareMode.STATISTICS.value}/{CompareMode.TENSOR.value}/{CompareMode.MD5.value}, '
123
+ f'actually {self.mode}.')
124
+ raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR)
125
+
126
+ def _init_data(self):
127
+ for data_list in self.data_lists:
128
+ if not data_list:
129
+ continue
130
+
131
+ if len(data_list) < len(self.header):
132
+ data_list = data_list + [''] * (len(self.header) - len(data_list))
133
+
134
+ # 初始化result为pass,error message为list
135
+ self.set_result(data_list)
136
+ self.set_err_msg(data_list, init_msg=True)
137
+
138
+ if f'{Const.SEP}{Const.OUTPUT}{Const.SEP}' in data_list[0]:
139
+ self.output_data.append(data_list)
140
+ else:
141
+ self.input_data.append(data_list)
@@ -0,0 +1,181 @@
1
+ # -------------------------------------------------------------------------
2
+ # This file is part of the MindStudio project.
3
+ # Copyright (c) 2025 Huawei Technologies Co.,Ltd.
4
+ #
5
+ # MindStudio is licensed under Mulan PSL v2.
6
+ # You can use this software according to the terms and conditions of the Mulan PSL v2.
7
+ # You may obtain a copy of Mulan PSL v2 at:
8
+ #
9
+ # http://license.coscl.org.cn/MulanPSL2
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12
+ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13
+ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14
+ # See the Mulan PSL v2 for more details.
15
+ # -------------------------------------------------------------------------
16
+
17
+ import re
18
+ import json
19
+ from typing import List
20
+
21
+ from msprobe.core.common.const import Const, CompareConst
22
+ from msprobe.core.common.log import logger
23
+ from msprobe.core.compare.indicator_analysis.utils import CompareMode, ResultLevel, divide_result_df, IgnoreInfo
24
+ from msprobe.core.compare.indicator_analysis.algorithm import BaseAlgorithm, TENSOR_CHECKERS, STATISTICS_CHECKERS, \
25
+ MD5_CHECKERS, STATISTICS_CHECKERS_PARALLEL_MERGE
26
+ from msprobe.core.compare.indicator_analysis.api_data import ApiData
27
+
28
+
29
+ class ApiIndicatorCalculator:
30
+ RANK_SUFFIX_PATTERN = re.compile(r'_rank\d+$')
31
+
32
+ def __init__(self, mode, parallel_merge=False):
33
+ self.mode = mode
34
+ self.parallel_merge = parallel_merge
35
+ self.all_ignore_set = {'empty', 'empty_like', 'numpy', 'to', '__setitem__', 'empty_with_format',
36
+ 'new_empty_strided', 'new_empty', 'empty_strided'}
37
+ self.input_ignore_set = {'_reduce_scatter_base', '_all_gather_base', 'all_to_all_single', 'batch_isend_irecv'}
38
+ self.algorithms: List[BaseAlgorithm] = []
39
+ self._add_algorithm()
40
+
41
+ @staticmethod
42
+ def get_api_indicator_and_msg(api_data: ApiData):
43
+ """
44
+ 基于all_data_lists(api或模块的所有参数的数据)得到一个api或模块的指标和异常信息
45
+
46
+ indicator取所有参数最差的(error)
47
+ err msg取所有参数汇总
48
+
49
+ Return:
50
+ 精度比对指标(pass/warning/error)
51
+ """
52
+ all_data_lists = api_data.input_data + api_data.output_data
53
+ final_indicator = ResultLevel.PASS
54
+ for data_list in all_data_lists:
55
+ indicator = api_data.get_data_by_header(CompareConst.RESULT, data_list)
56
+ if isinstance(indicator, ResultLevel):
57
+ api_data.set_data_by_header(CompareConst.RESULT, data_list, indicator.value)
58
+ if indicator > final_indicator:
59
+ final_indicator = indicator
60
+ err_msg = api_data.get_data_by_header(CompareConst.ERROR_MESSAGE, data_list)
61
+ if isinstance(err_msg, list):
62
+ api_data.set_data_by_header(CompareConst.ERROR_MESSAGE, data_list, json.dumps(err_msg))
63
+
64
+ return final_indicator.value
65
+
66
+ def get_api_ignore_info(self, api_data: ApiData):
67
+ """
68
+ api是否需要忽略判断规则的情况
69
+ """
70
+ if not api_data.input_data:
71
+ return IgnoreInfo.NO_IGNORE
72
+ npu_param_name = api_data.get_data_by_header(CompareConst.NPU_NAME, api_data.input_data[0])
73
+ name_split = npu_param_name.split(Const.SEP)
74
+ if len(name_split) < 2:
75
+ return IgnoreInfo.NO_IGNORE
76
+ api_name = self.RANK_SUFFIX_PATTERN.sub('', name_split[1]) if self.parallel_merge else name_split[1]
77
+ if api_name in self.all_ignore_set:
78
+ return IgnoreInfo.ALL_IGNORE
79
+ elif api_name in self.input_ignore_set:
80
+ return IgnoreInfo.INPUT_IGNORE
81
+
82
+ return IgnoreInfo.NO_IGNORE
83
+
84
+ def calculate(self, raw_data_list: List[List]):
85
+ """
86
+ 计算入口
87
+ """
88
+ api_data = ApiData(self.mode, raw_data_list)
89
+
90
+ ignore_info = self.get_api_ignore_info(api_data)
91
+
92
+ self.execute_all(api_data, ignore_info)
93
+
94
+ return self.get_api_indicator_and_msg(api_data)
95
+
96
+ def add_algorithm(self, algorithm: BaseAlgorithm):
97
+ if not isinstance(algorithm, BaseAlgorithm):
98
+ msg = 'It must be an instance of a subclass of BaseAlgorithm.'
99
+ logger.error(msg)
100
+ raise TypeError(msg)
101
+ self.algorithms.append(algorithm)
102
+
103
+ def execute_all(self, api_data: ApiData, ignore_info: IgnoreInfo):
104
+ for algorithm in self.algorithms:
105
+ try:
106
+ algorithm.run(api_data, ignore_info)
107
+ except Exception as e:
108
+ msg = f'Run algorithm failed.'
109
+ logger.error(msg)
110
+ raise RuntimeError(msg) from e
111
+
112
+ def _add_algorithm(self):
113
+ if self.mode == CompareMode.STATISTICS.value:
114
+ checkers = STATISTICS_CHECKERS_PARALLEL_MERGE if self.parallel_merge else STATISTICS_CHECKERS
115
+ for checker in checkers:
116
+ self.add_algorithm(checker())
117
+ elif self.mode == CompareMode.TENSOR.value:
118
+ for checker in TENSOR_CHECKERS:
119
+ self.add_algorithm(checker())
120
+ elif self.mode == CompareMode.MD5.value:
121
+ for checker in MD5_CHECKERS:
122
+ self.add_algorithm(checker())
123
+
124
+
125
+ def calculate_excel_result_df(result_df, mode, chunk_size=1000):
126
+ """
127
+ 仅适用于excel比对场景,得到表格每行数据的精度比对指标(pass/warning/error)
128
+
129
+ Args:
130
+ result_df: DataFrame数据结构,即转换成excel前的表单结构
131
+ mode: 比对模式,分为 tensor 模式、统计量模式和 md5 模式
132
+ chunk_size: 分块赋值参数,默认1000,把 result 分成小块,逐块赋值给 result_df,这样每次只占用小块内存,避免内存峰值过高
133
+ """
134
+ result_dict = divide_result_df(result_df)
135
+ calculator = ApiIndicatorCalculator(mode)
136
+ calculated_result_lists = []
137
+ for data_lists in result_dict.values():
138
+ calculator.calculate(data_lists)
139
+ calculated_result_lists.extend(data_lists)
140
+
141
+ head = CompareConst.HEAD_OF_COMPARE_MODE.get(mode)
142
+ if not head:
143
+ logger.error(f'Unable to obtain header based on compare mode: {mode}')
144
+ raise RuntimeError()
145
+ # 配置列映射关系:[(result_df的目标列名, result子列表的列索引)]
146
+ try:
147
+ cols_mapping = [
148
+ (CompareConst.RESULT, head.index(CompareConst.RESULT)),
149
+ (CompareConst.ERROR_MESSAGE, head.index(CompareConst.ERROR_MESSAGE))
150
+ ]
151
+ except ValueError as e:
152
+ logger.error(f'The {CompareConst.RESULT} or {CompareConst.ERROR_MESSAGE} does not exist in the header: {e}')
153
+ raise e
154
+
155
+ total_rows = len(calculated_result_lists)
156
+
157
+ # 分块逐批赋值,降低内存瞬时峰值
158
+ for i in range(0, total_rows, chunk_size):
159
+ end_idx = min(i + chunk_size, total_rows)
160
+ current_result_chunk = calculated_result_lists[i:end_idx]
161
+
162
+ for df_col_name, result_col_idx in cols_mapping:
163
+ col_data = [sublist[result_col_idx] for sublist in current_result_chunk]
164
+ df_col_idx = result_df.columns.get_loc(df_col_name)
165
+ result_df.iloc[i:end_idx, df_col_idx] = col_data
166
+
167
+
168
+ def calculate_result(result, mode, parallel_merge=False):
169
+ """
170
+ 得到一个api或模块的指标和异常信息
171
+
172
+ Args:
173
+ result: List[List]数据结构,每个list元素代表api或模块参数的具体信息
174
+ mode: 比对模式,分为 tensor 模式、统计量模式和 md5 模式
175
+ parallel_merge: 是否为不同切分策略图合并比对场景,默认False
176
+
177
+ Return:
178
+ 精度比对指标(pass/warning/error)
179
+ """
180
+ calculator = ApiIndicatorCalculator(mode, parallel_merge)
181
+ return calculator.calculate(result)