mindstudio-probe 8.3.3__py3-none-any.whl → 26.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (689) hide show
  1. {mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/METADATA +26 -14
  2. mindstudio_probe-26.0.0a1.dist-info/RECORD +498 -0
  3. {mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/WHEEL +1 -1
  4. mindstudio_probe-26.0.0a1.dist-info/entry_points.txt +5 -0
  5. mindstudio_probe-26.0.0a1.dist-info/licenses/LICENSE +124 -0
  6. mindstudio_probe-26.0.0a1.dist-info/top_level.txt +2 -0
  7. msprobe/__init__.py +12 -13
  8. msprobe/config.json +9 -31
  9. msprobe/core/__init__.py +12 -11
  10. msprobe/core/acc_check/acc_check_cli.py +145 -0
  11. msprobe/core/common/const.py +97 -38
  12. msprobe/core/common/db_manager.py +133 -12
  13. msprobe/core/common/decorator.py +12 -11
  14. msprobe/core/common/exceptions.py +12 -11
  15. msprobe/core/common/file_utils.py +101 -25
  16. msprobe/core/common/framework_adapter.py +36 -25
  17. msprobe/core/common/global_lock.py +12 -11
  18. msprobe/core/common/inplace_op_checker.py +12 -11
  19. msprobe/core/common/log.py +22 -11
  20. msprobe/core/common/megatron_utils.py +566 -11
  21. msprobe/core/common/parallel_state.py +12 -11
  22. msprobe/core/common/runtime.py +12 -11
  23. msprobe/core/common/utils.py +41 -41
  24. msprobe/core/compare/acc_compare.py +361 -104
  25. msprobe/core/compare/atb_data_compare.py +422 -0
  26. msprobe/core/compare/auto_compare.py +134 -0
  27. msprobe/core/compare/check.py +14 -17
  28. msprobe/core/compare/compare_cli.py +72 -149
  29. msprobe/core/compare/config.py +12 -13
  30. msprobe/core/compare/diff_analyze/first_diff_analyze.py +28 -15
  31. msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
  32. msprobe/core/compare/find_first/analyzer.py +18 -18
  33. msprobe/core/compare/find_first/graph.py +12 -11
  34. msprobe/core/compare/find_first/utils.py +13 -12
  35. msprobe/core/compare/indicator_analysis/__init__.py +15 -0
  36. msprobe/core/compare/indicator_analysis/algorithm.py +363 -0
  37. msprobe/core/compare/indicator_analysis/api_data.py +141 -0
  38. msprobe/core/compare/indicator_analysis/calculator.py +181 -0
  39. msprobe/core/compare/indicator_analysis/utils.py +116 -0
  40. msprobe/core/compare/layer_mapping/__init__.py +12 -11
  41. msprobe/core/compare/layer_mapping/data_scope_parser.py +20 -11
  42. msprobe/core/compare/layer_mapping/layer_mapping.py +14 -13
  43. msprobe/core/compare/layer_mapping/postprocess_pass.py +13 -11
  44. msprobe/core/compare/merge_result/merge_result.py +12 -11
  45. msprobe/core/compare/merge_result/merge_result_cli.py +12 -11
  46. msprobe/core/compare/merge_result/utils.py +12 -11
  47. msprobe/core/compare/multiprocessing_compute.py +13 -14
  48. msprobe/core/compare/npy_compare.py +13 -11
  49. msprobe/core/compare/offline_data_compare.py +160 -0
  50. msprobe/core/compare/stats_diff_calc.py +39 -0
  51. msprobe/core/compare/torchair_acc_cmp.py +764 -0
  52. msprobe/core/compare/torchair_cmp_utils.py +338 -0
  53. msprobe/core/compare/utils.py +140 -49
  54. msprobe/core/config_check/__init__.py +12 -11
  55. msprobe/core/config_check/checkers/__init__.py +12 -11
  56. msprobe/core/config_check/checkers/base_checker.py +15 -14
  57. msprobe/core/config_check/checkers/dataset_checker.py +13 -12
  58. msprobe/core/config_check/checkers/env_args_checker.py +13 -12
  59. msprobe/core/config_check/checkers/hyperparameter_checker.py +16 -15
  60. msprobe/core/config_check/checkers/pip_checker.py +15 -15
  61. msprobe/core/config_check/checkers/random_checker.py +13 -12
  62. msprobe/core/config_check/checkers/weights_checker.py +14 -12
  63. msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +13 -17
  64. msprobe/core/config_check/ckpt_compare/megatron_loader.py +13 -12
  65. msprobe/core/config_check/ckpt_compare/metrics.py +12 -11
  66. msprobe/core/config_check/config_check_cli.py +18 -17
  67. msprobe/core/config_check/config_checker.py +16 -14
  68. msprobe/core/config_check/resource/dependency.yaml +15 -12
  69. msprobe/core/config_check/resource/env.yaml +12 -11
  70. msprobe/core/config_check/utils/hyperparameter_parser.py +12 -11
  71. msprobe/core/config_check/utils/utils.py +12 -11
  72. msprobe/core/{data_dump → dump/api_dump}/api_registry.py +12 -11
  73. msprobe/core/{common_config.py → dump/common_config.py} +13 -24
  74. msprobe/core/dump/data_dump/data_collector.py +257 -0
  75. msprobe/core/{data_dump → dump/data_dump}/data_processor/base.py +45 -36
  76. msprobe/core/{data_dump → dump/data_dump}/data_processor/factory.py +33 -25
  77. msprobe/core/{data_dump → dump/data_dump}/data_processor/mindspore_processor.py +37 -113
  78. msprobe/core/{data_dump → dump/data_dump}/data_processor/pytorch_processor.py +364 -131
  79. msprobe/core/{data_dump → dump/data_dump}/json_writer.py +24 -31
  80. msprobe/core/{data_dump → dump/data_dump}/scope.py +12 -13
  81. msprobe/core/{debugger → dump/debugger}/precision_debugger.py +15 -23
  82. msprobe/core/dump/dump2db/db_utils.py +215 -0
  83. msprobe/core/dump/dump2db/dump2db.py +409 -0
  84. msprobe/core/{hook_manager.py → dump/hook_manager.py} +38 -87
  85. msprobe/core/dump/kernel_dump/kernel_config.py +34 -0
  86. msprobe/core/{service.py → dump/service.py} +43 -27
  87. msprobe/core/install_deps/install_deps.py +51 -0
  88. msprobe/core/monitor/anomaly_processor.py +13 -11
  89. msprobe/core/monitor/csv2db.py +73 -93
  90. msprobe/core/monitor/db_utils.py +140 -205
  91. msprobe/core/monitor/utils.py +18 -17
  92. msprobe/core/monitor_v2/__init__.py +20 -0
  93. msprobe/core/monitor_v2/base.py +83 -0
  94. msprobe/core/monitor_v2/cc.py +287 -0
  95. msprobe/core/monitor_v2/factory.py +81 -0
  96. msprobe/core/monitor_v2/module.py +201 -0
  97. msprobe/core/monitor_v2/optimizer.py +245 -0
  98. msprobe/core/monitor_v2/param.py +154 -0
  99. msprobe/core/monitor_v2/trainer.py +326 -0
  100. msprobe/core/monitor_v2/utils.py +122 -0
  101. msprobe/core/monitor_v2/weight_grad.py +419 -0
  102. msprobe/core/monitor_v2/writer.py +162 -0
  103. msprobe/core/overflow_check/abnormal_scene.py +12 -11
  104. msprobe/core/overflow_check/api_info.py +12 -11
  105. msprobe/core/overflow_check/checker.py +12 -11
  106. msprobe/core/overflow_check/filter.py +13 -11
  107. msprobe/core/overflow_check/level.py +12 -11
  108. msprobe/core/overflow_check/utils.py +12 -11
  109. msprobe/core/single_save/single_comparator.py +12 -11
  110. msprobe/core/single_save/single_saver.py +12 -11
  111. msprobe/infer/__init__.py +16 -0
  112. msprobe/infer/offline/__init__.py +16 -0
  113. msprobe/infer/offline/compare/__init__.py +16 -0
  114. msprobe/infer/offline/compare/msquickcmp/__init__.py +16 -0
  115. msprobe/infer/offline/compare/msquickcmp/adapter_cli/__init__.py +16 -0
  116. msprobe/infer/offline/compare/msquickcmp/adapter_cli/args_adapter.py +46 -0
  117. msprobe/infer/offline/compare/msquickcmp/atc/__init__.py +16 -0
  118. msprobe/infer/offline/compare/msquickcmp/atc/atc_utils.py +98 -0
  119. msprobe/infer/offline/compare/msquickcmp/cmp_process.py +328 -0
  120. msprobe/infer/offline/compare/msquickcmp/common/__init__.py +16 -0
  121. msprobe/infer/offline/compare/msquickcmp/common/args_check.py +112 -0
  122. msprobe/infer/offline/compare/msquickcmp/common/convert.py +74 -0
  123. msprobe/infer/offline/compare/msquickcmp/common/dump_data.py +121 -0
  124. msprobe/infer/offline/compare/msquickcmp/common/dynamic_argument_bean.py +39 -0
  125. msprobe/infer/offline/compare/msquickcmp/common/utils.py +669 -0
  126. msprobe/infer/offline/compare/msquickcmp/config.ini +6 -0
  127. msprobe/infer/offline/compare/msquickcmp/dump/__init__.py +16 -0
  128. msprobe/infer/offline/compare/msquickcmp/dump/args_adapter.py +50 -0
  129. msprobe/infer/offline/compare/msquickcmp/dump/dump_process.py +91 -0
  130. msprobe/infer/offline/compare/msquickcmp/install_aclruntime_aisbench.sh +180 -0
  131. msprobe/infer/offline/compare/msquickcmp/main.py +199 -0
  132. msprobe/infer/offline/compare/msquickcmp/net_compare/__init__.py +16 -0
  133. msprobe/infer/offline/compare/msquickcmp/net_compare/net_compare.py +277 -0
  134. msprobe/infer/offline/compare/msquickcmp/npu/__init__.py +16 -0
  135. msprobe/infer/offline/compare/msquickcmp/npu/npu_dump_data.py +558 -0
  136. msprobe/infer/offline/compare/msquickcmp/npu/om_parser.py +416 -0
  137. msprobe/infer/offline/compare/msquickcmp/onnx_model/__init__.py +16 -0
  138. msprobe/infer/offline/compare/msquickcmp/onnx_model/onnx_dump_data.py +374 -0
  139. msprobe/infer/utils/__init__.py +15 -0
  140. msprobe/infer/utils/acc_cmp.py +94 -0
  141. msprobe/infer/utils/check/__init__.py +37 -0
  142. msprobe/infer/utils/check/args_checker.py +35 -0
  143. msprobe/infer/utils/check/checker.py +227 -0
  144. msprobe/infer/utils/check/dict_checker.py +78 -0
  145. msprobe/infer/utils/check/func_wrapper.py +96 -0
  146. msprobe/infer/utils/check/list_checker.py +56 -0
  147. msprobe/infer/utils/check/number_checker.py +64 -0
  148. msprobe/infer/utils/check/obj_checker.py +41 -0
  149. msprobe/infer/utils/check/path_checker.py +249 -0
  150. msprobe/infer/utils/check/rule.py +126 -0
  151. msprobe/infer/utils/check/string_checker.py +66 -0
  152. msprobe/infer/utils/cmp_algorithm.py +261 -0
  153. msprobe/infer/utils/constants.py +112 -0
  154. msprobe/infer/utils/file_open_check.py +337 -0
  155. msprobe/infer/utils/util.py +177 -0
  156. msprobe/mindspore/__init__.py +14 -13
  157. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +14 -13
  158. msprobe/mindspore/api_accuracy_checker/api_info.py +12 -11
  159. msprobe/mindspore/api_accuracy_checker/api_runner.py +12 -11
  160. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +12 -11
  161. msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +12 -11
  162. msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +12 -11
  163. msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +12 -11
  164. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +15 -14
  165. msprobe/mindspore/api_accuracy_checker/compute_element.py +12 -11
  166. msprobe/mindspore/api_accuracy_checker/data_manager.py +13 -11
  167. msprobe/mindspore/api_accuracy_checker/main.py +12 -11
  168. msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +14 -12
  169. msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +13 -11
  170. msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +12 -11
  171. msprobe/mindspore/api_accuracy_checker/type_mapping.py +12 -11
  172. msprobe/mindspore/api_accuracy_checker/utils.py +12 -11
  173. msprobe/mindspore/common/const.py +15 -74
  174. msprobe/mindspore/common/log.py +12 -11
  175. msprobe/mindspore/common/utils.py +30 -15
  176. msprobe/mindspore/compare/common_dir_compare.py +21 -23
  177. msprobe/mindspore/compare/distributed_compare.py +18 -16
  178. msprobe/mindspore/compare/ms_compare.py +14 -14
  179. msprobe/mindspore/compare/ms_graph_compare.py +26 -20
  180. msprobe/mindspore/compare/utils.py +14 -12
  181. msprobe/mindspore/{cell_processor.py → dump/cell_processor.py} +15 -14
  182. msprobe/mindspore/{debugger → dump/debugger}/debugger_config.py +12 -30
  183. msprobe/mindspore/{debugger → dump/debugger}/precision_debugger.py +43 -45
  184. msprobe/mindspore/dump/{cell_dump_process.py → dump_processor/cell_dump_process.py} +31 -17
  185. msprobe/mindspore/dump/{cell_dump_with_insert_gradient.py → dump_processor/cell_dump_with_insert_gradient.py} +18 -14
  186. msprobe/mindspore/dump/{dump_tool_factory.py → dump_processor/dump_tool_factory.py} +16 -15
  187. msprobe/mindspore/dump/{graph_mode_cell_dump.py → dump_processor/graph_mode_cell_dump.py} +16 -15
  188. msprobe/mindspore/dump/{graph_tensor_dump.py → dump_processor/graph_tensor_dump.py} +134 -133
  189. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/api_register.py +15 -14
  190. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/hook_cell.py +12 -11
  191. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/ms_hook_manager.py +47 -20
  192. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/primitive_hooks.py +14 -13
  193. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/support_wrap_ops.yaml +13 -11
  194. msprobe/mindspore/dump/{jit_dump.py → dump_processor/jit_dump.py} +14 -13
  195. msprobe/mindspore/dump/{kernel_graph_dump.py → dump_processor/kernel_graph_dump.py} +13 -12
  196. msprobe/mindspore/dump/{kernel_kbyk_dump.py → dump_processor/kernel_kbyk_dump.py} +13 -12
  197. msprobe/mindspore/{exception_dump → dump/exception_dump}/exception_dump_tool_factory.py +14 -13
  198. msprobe/mindspore/{exception_dump → dump/exception_dump}/kernel_graph_exception_dump.py +13 -12
  199. msprobe/mindspore/{mindspore_service.py → dump/mindspore_service.py} +18 -17
  200. msprobe/mindspore/dump/mindtorch/__init__.py +19 -0
  201. msprobe/mindspore/dump/ms_config.py +105 -0
  202. msprobe/mindspore/{overflow_check → dump/overflow_check}/kernel_graph_overflow_check.py +13 -12
  203. msprobe/mindspore/{overflow_check → dump/overflow_check}/overflow_check_tool_factory.py +14 -13
  204. msprobe/mindspore/dump/task_handler_factory.py +43 -0
  205. msprobe/mindspore/monitor/common_func.py +12 -11
  206. msprobe/mindspore/monitor/data_writers.py +12 -11
  207. msprobe/mindspore/monitor/distributed/wrap_distributed.py +93 -39
  208. msprobe/mindspore/monitor/features.py +12 -11
  209. msprobe/mindspore/monitor/module_hook.py +19 -22
  210. msprobe/mindspore/monitor/optimizer_collect.py +29 -25
  211. msprobe/mindspore/monitor/utils.py +13 -11
  212. msprobe/msaccucmp/advisor/__init__.py +16 -0
  213. msprobe/msaccucmp/advisor/advisor_const.py +65 -0
  214. msprobe/msaccucmp/advisor/advisor_result.py +73 -0
  215. msprobe/msaccucmp/advisor/compare_advisor.py +99 -0
  216. msprobe/msaccucmp/advisor/input_advisor.py +66 -0
  217. msprobe/msaccucmp/advisor/node_advisor.py +68 -0
  218. msprobe/msaccucmp/advisor/overflow_advisor.py +58 -0
  219. msprobe/msaccucmp/algorithm_manager/__init__.py +16 -0
  220. msprobe/msaccucmp/algorithm_manager/algorithm_manager.py +464 -0
  221. msprobe/msaccucmp/algorithm_manager/algorithm_parameter.py +42 -0
  222. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_AccumulatedRelativeError.py +46 -0
  223. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_CosineSimilarity.py +58 -0
  224. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_KullbackLeiblerDivergence.py +84 -0
  225. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxAbsoluteError.py +41 -0
  226. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxRelativeError.py +46 -0
  227. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanAbsoluteError.py +41 -0
  228. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanRelativeError.py +46 -0
  229. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RelativeEuclideanDistance.py +46 -0
  230. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RootMeanSquareError.py +40 -0
  231. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_StandardDeviation.py +47 -0
  232. msprobe/msaccucmp/cmp_utils/__init__.py +16 -0
  233. msprobe/msaccucmp/cmp_utils/common.py +113 -0
  234. msprobe/msaccucmp/cmp_utils/constant/__init__.py +16 -0
  235. msprobe/msaccucmp/cmp_utils/constant/compare_error.py +81 -0
  236. msprobe/msaccucmp/cmp_utils/constant/const_manager.py +530 -0
  237. msprobe/msaccucmp/cmp_utils/file_utils.py +497 -0
  238. msprobe/msaccucmp/cmp_utils/log.py +257 -0
  239. msprobe/msaccucmp/cmp_utils/multi_process/__init__.py +16 -0
  240. msprobe/msaccucmp/cmp_utils/multi_process/multi_convert_process.py +140 -0
  241. msprobe/msaccucmp/cmp_utils/multi_process/progress.py +78 -0
  242. msprobe/msaccucmp/cmp_utils/path_check.py +274 -0
  243. msprobe/msaccucmp/cmp_utils/reg_manager.py +98 -0
  244. msprobe/msaccucmp/cmp_utils/tlv_parse.py +279 -0
  245. msprobe/msaccucmp/cmp_utils/utils.py +356 -0
  246. msprobe/msaccucmp/cmp_utils/utils_type.py +63 -0
  247. msprobe/msaccucmp/compare_vector.py +48 -0
  248. msprobe/msaccucmp/conversion/__init__.py +16 -0
  249. msprobe/msaccucmp/conversion/data_conversion.py +277 -0
  250. msprobe/msaccucmp/conversion/dtype_conversion.py +99 -0
  251. msprobe/msaccucmp/conversion/shape_format_conversion.py +477 -0
  252. msprobe/msaccucmp/conversion/tensor_conversion.py +369 -0
  253. msprobe/msaccucmp/dump_data_conversion.py +46 -0
  254. msprobe/msaccucmp/dump_parse/__init__.py +16 -0
  255. msprobe/msaccucmp/dump_parse/big_dump_data.py +317 -0
  256. msprobe/msaccucmp/dump_parse/dump.py +423 -0
  257. msprobe/msaccucmp/dump_parse/dump_data_object.py +322 -0
  258. msprobe/msaccucmp/dump_parse/dump_data_parser.py +436 -0
  259. msprobe/msaccucmp/dump_parse/dump_utils.py +246 -0
  260. msprobe/msaccucmp/dump_parse/ffts_parser.py +137 -0
  261. msprobe/msaccucmp/dump_parse/mapping.py +62 -0
  262. msprobe/msaccucmp/dump_parse/nano_dump_data.py +392 -0
  263. msprobe/msaccucmp/dump_parse/proto_dump_data.py +308 -0
  264. msprobe/msaccucmp/dump_parser.py +90 -0
  265. msprobe/msaccucmp/format_manager/__init__.py +16 -0
  266. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NCHW.py +53 -0
  267. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_ND.py +52 -0
  268. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NHWC.py +53 -0
  269. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_HWCN.py +47 -0
  270. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_NCHW.py +47 -0
  271. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_FRACTAL_Z.py +89 -0
  272. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NCHW.py +37 -0
  273. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NHWC.py +37 -0
  274. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_HWCN.py +43 -0
  275. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NCHW.py +48 -0
  276. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NHWC.py +43 -0
  277. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_FRACTAL_Z.py +87 -0
  278. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_NHWC.py +37 -0
  279. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_NCDHW.py +48 -0
  280. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_ND.py +44 -0
  281. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_FRACTAL_Z.py +87 -0
  282. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_HWCN.py +37 -0
  283. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_NCHW.py +37 -0
  284. msprobe/msaccucmp/format_manager/format_manager.py +307 -0
  285. msprobe/msaccucmp/inplace_layer_process.py +186 -0
  286. msprobe/msaccucmp/msaccucmp.py +532 -0
  287. msprobe/msaccucmp/mscmp_advisor.py +128 -0
  288. msprobe/msaccucmp/overflow/__init__.py +16 -0
  289. msprobe/msaccucmp/overflow/overflow_analyse.py +305 -0
  290. msprobe/msaccucmp/overflow/overflow_detection.py +143 -0
  291. msprobe/msaccucmp/pytorch_cmp/__init__.py +16 -0
  292. msprobe/msaccucmp/pytorch_cmp/compare_pytorch.py +389 -0
  293. msprobe/msaccucmp/pytorch_cmp/hdf5_parser.py +377 -0
  294. msprobe/msaccucmp/pytorch_cmp/pytorch_dump_data.py +461 -0
  295. msprobe/msaccucmp/shape_conversion.py +41 -0
  296. msprobe/msaccucmp/vector_cmp/__init__.py +16 -0
  297. msprobe/msaccucmp/vector_cmp/batch_compare.py +197 -0
  298. msprobe/msaccucmp/vector_cmp/compare_detail/__init__.py +16 -0
  299. msprobe/msaccucmp/vector_cmp/compare_detail/compare_detail.py +245 -0
  300. msprobe/msaccucmp/vector_cmp/compare_detail/detail.py +182 -0
  301. msprobe/msaccucmp/vector_cmp/compare_detail/detail_writer.py +580 -0
  302. msprobe/msaccucmp/vector_cmp/fusion_manager/__init__.py +16 -0
  303. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_fusion_op.py +588 -0
  304. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_npu_vs_npu.py +339 -0
  305. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_result.py +326 -0
  306. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_rule.py +156 -0
  307. msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_op.py +204 -0
  308. msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_rule_parser.py +635 -0
  309. msprobe/msaccucmp/vector_cmp/fusion_manager/quant_filter.py +187 -0
  310. msprobe/msaccucmp/vector_cmp/range_manager/__init__.py +16 -0
  311. msprobe/msaccucmp/vector_cmp/range_manager/range_manager.py +100 -0
  312. msprobe/msaccucmp/vector_cmp/range_manager/range_mode.py +94 -0
  313. msprobe/msaccucmp/vector_cmp/range_manager/select_mode.py +86 -0
  314. msprobe/msaccucmp/vector_cmp/vector_comparison.py +535 -0
  315. msprobe/msprobe.py +101 -130
  316. msprobe/overflow_check/__init__.py +15 -0
  317. msprobe/{nan_analyze → overflow_check}/analyzer.py +38 -27
  318. msprobe/{nan_analyze → overflow_check}/graph.py +28 -27
  319. msprobe/{nan_analyze → overflow_check}/utils.py +15 -14
  320. msprobe/pytorch/__init__.py +20 -14
  321. msprobe/pytorch/aclgraph_dump/__init__.py +45 -0
  322. msprobe/pytorch/aclgraph_dump/_meta.py +26 -0
  323. msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut.py → acc_check/acc_check.py} +50 -45
  324. msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut_utils.py → acc_check/acc_check_utils.py} +201 -30
  325. msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/data_generate.py +56 -16
  326. msprobe/pytorch/api_accuracy_checker/{run_ut/multi_run_ut.py → acc_check/multi_acc_check.py} +32 -47
  327. msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/run_overflow_check.py +19 -18
  328. msprobe/pytorch/api_accuracy_checker/common/config.py +22 -20
  329. msprobe/pytorch/api_accuracy_checker/common/utils.py +72 -13
  330. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -11
  331. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +23 -14
  332. msprobe/pytorch/api_accuracy_checker/compare/compare.py +45 -32
  333. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +12 -11
  334. msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +14 -12
  335. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +14 -12
  336. msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +12 -11
  337. msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +12 -11
  338. msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +21 -19
  339. msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +14 -13
  340. msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +12 -11
  341. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +60 -11
  342. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +27 -16
  343. msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +13 -11
  344. msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +39 -18
  345. msprobe/pytorch/bench_functions/__init__.py +12 -11
  346. msprobe/pytorch/bench_functions/apply_adam.py +12 -11
  347. msprobe/pytorch/bench_functions/apply_adam_w.py +12 -11
  348. msprobe/pytorch/bench_functions/confusion_transpose.py +12 -11
  349. msprobe/pytorch/bench_functions/fast_gelu.py +12 -11
  350. msprobe/pytorch/bench_functions/group_norm_silu.py +12 -11
  351. msprobe/pytorch/bench_functions/layer_norm_eval.py +12 -11
  352. msprobe/pytorch/bench_functions/linear.py +12 -11
  353. msprobe/pytorch/bench_functions/matmul_backward.py +12 -11
  354. msprobe/pytorch/bench_functions/mish.py +12 -11
  355. msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +12 -11
  356. msprobe/pytorch/bench_functions/npu_fusion_attention.py +12 -11
  357. msprobe/pytorch/bench_functions/rms_norm.py +12 -11
  358. msprobe/pytorch/bench_functions/rotary_mul.py +12 -11
  359. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +12 -11
  360. msprobe/pytorch/bench_functions/sort_v2.py +12 -11
  361. msprobe/pytorch/bench_functions/swiglu.py +12 -11
  362. msprobe/pytorch/common/__init__.py +12 -11
  363. msprobe/pytorch/common/log.py +12 -11
  364. msprobe/pytorch/common/parse_json.py +12 -11
  365. msprobe/pytorch/common/utils.py +52 -19
  366. msprobe/pytorch/compare/distributed_compare.py +13 -13
  367. msprobe/pytorch/compare/match.py +12 -11
  368. msprobe/pytorch/compare/pt_compare.py +14 -20
  369. msprobe/pytorch/compare/pt_diff_analyze.py +12 -11
  370. msprobe/pytorch/compare/utils.py +12 -11
  371. msprobe/pytorch/{hook_module → dump/api_dump}/api_register.py +18 -16
  372. msprobe/pytorch/{hook_module → dump/api_dump}/hook_module.py +14 -13
  373. msprobe/pytorch/{hook_module → dump/api_dump}/pt_hook_manager.py +68 -23
  374. msprobe/pytorch/{hook_module → dump/api_dump}/register_optimizer_hook.py +13 -11
  375. msprobe/pytorch/{hook_module → dump/api_dump}/script_wrapper.py +17 -14
  376. msprobe/pytorch/{hook_module → dump/api_dump}/utils.py +12 -11
  377. msprobe/pytorch/{debugger → dump/debugger}/debugger_config.py +23 -38
  378. msprobe/pytorch/dump/debugger/precision_debugger.py +130 -0
  379. msprobe/pytorch/{function_factory.py → dump/function_factory.py} +12 -11
  380. msprobe/pytorch/dump/module_dump/hook_wrapper.py +17 -13
  381. msprobe/pytorch/dump/module_dump/module_dump.py +16 -15
  382. msprobe/pytorch/dump/module_dump/{module_processer.py → module_processor.py} +54 -42
  383. msprobe/pytorch/dump/pt_config.py +128 -0
  384. msprobe/pytorch/{pytorch_service.py → dump/pytorch_service.py} +22 -21
  385. msprobe/pytorch/monitor/csv2tb.py +13 -11
  386. msprobe/pytorch/monitor/data_writers.py +13 -11
  387. msprobe/pytorch/monitor/distributed/wrap_distributed.py +13 -11
  388. msprobe/pytorch/monitor/features.py +12 -11
  389. msprobe/pytorch/monitor/module_hook.py +67 -59
  390. msprobe/pytorch/monitor/module_metric.py +13 -11
  391. msprobe/pytorch/monitor/optimizer_collect.py +37 -35
  392. msprobe/pytorch/monitor/utils.py +13 -11
  393. msprobe/pytorch/monitor/visualizer.py +12 -11
  394. msprobe/pytorch/torchair_dump/__init__.py +17 -0
  395. msprobe/pytorch/torchair_dump/torchair_dump.py +114 -0
  396. msprobe/scripts/atb/config_example.json +10 -0
  397. msprobe/scripts/atb/load_atb_probe.sh +101 -0
  398. msprobe/scripts/atb/unload_atb_probe.sh +27 -0
  399. msprobe/scripts/build_msaccucmp.sh +186 -0
  400. msprobe/scripts/conf/help.info +6 -0
  401. msprobe/scripts/conf/version.info +3 -0
  402. msprobe/scripts/run_script/common.sh +538 -0
  403. msprobe/scripts/run_script/main_msaccucmp.sh +232 -0
  404. msprobe/visualization/__init__.py +12 -11
  405. msprobe/visualization/builder/__init__.py +12 -11
  406. msprobe/visualization/builder/graph_builder.py +45 -30
  407. msprobe/visualization/builder/graph_merger.py +53 -32
  408. msprobe/visualization/builder/msprobe_adapter.py +34 -44
  409. msprobe/visualization/compare/__init__.py +12 -11
  410. msprobe/visualization/compare/graph_comparator.py +63 -51
  411. msprobe/visualization/compare/mode_adapter.py +28 -113
  412. msprobe/visualization/db_utils.py +133 -22
  413. msprobe/visualization/graph/__init__.py +12 -11
  414. msprobe/visualization/graph/base_node.py +15 -27
  415. msprobe/visualization/graph/distributed_analyzer.py +97 -40
  416. msprobe/visualization/graph/graph.py +14 -16
  417. msprobe/visualization/graph/node_colors.py +34 -31
  418. msprobe/visualization/graph/node_op.py +12 -11
  419. msprobe/visualization/graph_service.py +580 -205
  420. msprobe/visualization/utils.py +278 -31
  421. tb_graph_ascend/secure_build.py +175 -0
  422. tb_graph_ascend/server/__init__.py +15 -0
  423. tb_graph_ascend/server/app/__init__.py +15 -0
  424. tb_graph_ascend/server/app/model/__init__.py +15 -0
  425. tb_graph_ascend/server/app/model/hierarchy.py +348 -0
  426. tb_graph_ascend/server/app/model/layout_hierarchy_model.py +69 -0
  427. tb_graph_ascend/server/app/model/match_nodes_model.py +573 -0
  428. tb_graph_ascend/server/app/repositories/__init__.py +15 -0
  429. tb_graph_ascend/server/app/repositories/graph_repo_base.py +32 -0
  430. tb_graph_ascend/server/app/repositories/graph_repo_db.py +879 -0
  431. tb_graph_ascend/server/app/repositories/graph_repo_vis.py +83 -0
  432. tb_graph_ascend/server/app/service/__init__.py +18 -0
  433. tb_graph_ascend/server/app/service/graph_service_base.py +158 -0
  434. tb_graph_ascend/server/app/service/graph_service_db.py +438 -0
  435. tb_graph_ascend/server/app/service/graph_service_factory.py +54 -0
  436. tb_graph_ascend/server/app/service/graph_service_vis.py +480 -0
  437. tb_graph_ascend/server/app/utils/__init__.py +15 -0
  438. tb_graph_ascend/server/app/utils/constant.py +80 -0
  439. tb_graph_ascend/server/app/utils/file_check_wrapper.py +46 -0
  440. tb_graph_ascend/server/app/utils/global_state.py +95 -0
  441. tb_graph_ascend/server/app/utils/graph_utils.py +661 -0
  442. tb_graph_ascend/server/app/utils/i18n.py +153 -0
  443. tb_graph_ascend/server/app/utils/request_method.py +46 -0
  444. tb_graph_ascend/server/app/views/__init__.py +15 -0
  445. tb_graph_ascend/server/app/views/graph_views.py +304 -0
  446. tb_graph_ascend/server/plugin.py +108 -0
  447. tb_graph_ascend/server/static/index.html +9250 -0
  448. tb_graph_ascend/server/static/index.js +21 -0
  449. tb_graph_ascend/setup.py +57 -0
  450. mindstudio_probe-8.3.3.dist-info/LICENSE +0 -201
  451. mindstudio_probe-8.3.3.dist-info/RECORD +0 -491
  452. mindstudio_probe-8.3.3.dist-info/entry_points.txt +0 -2
  453. mindstudio_probe-8.3.3.dist-info/top_level.txt +0 -1
  454. msprobe/CMakeLists.txt +0 -5
  455. msprobe/README.md +0 -203
  456. msprobe/core/advisor/advisor.py +0 -129
  457. msprobe/core/advisor/advisor_const.py +0 -58
  458. msprobe/core/advisor/advisor_result.py +0 -58
  459. msprobe/core/compare/find_first/data_processor.py +0 -35
  460. msprobe/core/compare/highlight.py +0 -390
  461. msprobe/core/data_dump/data_collector.py +0 -356
  462. msprobe/core/grad_probe/constant.py +0 -90
  463. msprobe/core/grad_probe/grad_compare.py +0 -187
  464. msprobe/core/grad_probe/utils.py +0 -105
  465. msprobe/core/kernel_dump/kernel_config.py +0 -33
  466. msprobe/docs/01.installation.md +0 -250
  467. msprobe/docs/02.config_introduction.md +0 -221
  468. msprobe/docs/03.config_examples.md +0 -281
  469. msprobe/docs/04.kernel_dump_PyTorch.md +0 -73
  470. msprobe/docs/05.data_dump_PyTorch.md +0 -518
  471. msprobe/docs/06.data_dump_MindSpore.md +0 -618
  472. msprobe/docs/07.accuracy_checker_PyTorch.md +0 -310
  473. msprobe/docs/09.accuracy_checker_MindSpore.md +0 -120
  474. msprobe/docs/10.accuracy_compare_PyTorch.md +0 -637
  475. msprobe/docs/11.accuracy_compare_MindSpore.md +0 -769
  476. msprobe/docs/12.overflow_check_PyTorch.md +0 -82
  477. msprobe/docs/13.overflow_check_MindSpore.md +0 -33
  478. msprobe/docs/14.data_parse_PyTorch.md +0 -282
  479. msprobe/docs/15.free_benchmarking_PyTorch.md +0 -169
  480. msprobe/docs/16.free_benchmarking_MindSpore.md +0 -159
  481. msprobe/docs/17.grad_probe.md +0 -205
  482. msprobe/docs/18.online_dispatch.md +0 -89
  483. msprobe/docs/19.monitor.md +0 -753
  484. msprobe/docs/20.monitor_performance_baseline.md +0 -52
  485. msprobe/docs/21.visualization_PyTorch.md +0 -519
  486. msprobe/docs/22.visualization_MindSpore.md +0 -515
  487. msprobe/docs/23.generate_operator_PyTorch.md +0 -107
  488. msprobe/docs/24.code_mapping_Mindspore.md +0 -29
  489. msprobe/docs/25.tool_function_introduction.md +0 -29
  490. msprobe/docs/26.data_dump_PyTorch_baseline.md +0 -48
  491. msprobe/docs/27.dump_json_instruction.md +0 -795
  492. msprobe/docs/28.debugger_save_instruction.md +0 -288
  493. msprobe/docs/28.kernel_dump_MindSpore.md +0 -69
  494. msprobe/docs/29.data_dump_MSAdapter.md +0 -235
  495. msprobe/docs/30.overflow_check_MSAdapter.md +0 -31
  496. msprobe/docs/31.config_check.md +0 -107
  497. msprobe/docs/32.ckpt_compare.md +0 -69
  498. msprobe/docs/33.generate_operator_MindSpore.md +0 -181
  499. msprobe/docs/34.RL_collect.md +0 -101
  500. msprobe/docs/35.nan_analyze.md +0 -73
  501. msprobe/docs/36.calculation_result_change.md +0 -75
  502. msprobe/docs/FAQ.md +0 -232
  503. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +0 -146
  504. msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +0 -14
  505. msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +0 -33
  506. msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +0 -217
  507. msprobe/docs/img/BLOOM-7B_1.png +0 -0
  508. msprobe/docs/img/BLOOM-7B_2.png +0 -0
  509. msprobe/docs/img/BLOOM-7B_3.png +0 -0
  510. msprobe/docs/img/BLOOM-7B_4.png +0 -0
  511. msprobe/docs/img/GPT-3_1.png +0 -0
  512. msprobe/docs/img/GPT-3_2.png +0 -0
  513. msprobe/docs/img/GPT-3_3.png +0 -0
  514. msprobe/docs/img/GPT-3_4.png +0 -0
  515. msprobe/docs/img/GPT-3_5.png +0 -0
  516. msprobe/docs/img/GPT-3_6.png +0 -0
  517. msprobe/docs/img/GPT-3_7.png +0 -0
  518. msprobe/docs/img/GPT-3_8.png +0 -0
  519. msprobe/docs/img/YOLOV5S_1.png +0 -0
  520. msprobe/docs/img/YOLOV5S_2.png +0 -0
  521. msprobe/docs/img/accuracy_checking_details.png +0 -0
  522. msprobe/docs/img/accuracy_checking_result.png +0 -0
  523. msprobe/docs/img/api_precision_compare_details.png +0 -0
  524. msprobe/docs/img/api_precision_compare_result.png +0 -0
  525. msprobe/docs/img/auto_analyze_log.png +0 -0
  526. msprobe/docs/img/compare_result.png +0 -0
  527. msprobe/docs/img/compare_result_pkl.png +0 -0
  528. msprobe/docs/img/compare_result_pkl_md5.png.png +0 -0
  529. msprobe/docs/img/cpu_info.png +0 -0
  530. msprobe/docs/img/free_benchmark.png +0 -0
  531. msprobe/docs/img/free_benchmark_framework.png +0 -0
  532. msprobe/docs/img/grad_probe_image-1.png +0 -0
  533. msprobe/docs/img/grad_probe_image-2.png +0 -0
  534. msprobe/docs/img/grad_probe_image-3.png +0 -0
  535. msprobe/docs/img/grad_probe_image-4.png +0 -0
  536. msprobe/docs/img/grad_probe_image.png +0 -0
  537. msprobe/docs/img/merge_result.png +0 -0
  538. msprobe/docs/img/module_compare.png +0 -0
  539. msprobe/docs/img/monitor/cpu_info.png +0 -0
  540. msprobe/docs/img/monitor/step_count_per_record.png +0 -0
  541. msprobe/docs/img/ms_dump.png +0 -0
  542. msprobe/docs/img/ms_layer.png +0 -0
  543. msprobe/docs/img/pt_dump.png +0 -0
  544. msprobe/docs/img/save_compare_result_sample.png +0 -0
  545. msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
  546. msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
  547. msprobe/docs/img/visualization/proxy.png +0 -0
  548. msprobe/docs/img/visualization/tensorboard_1.png +0 -0
  549. msprobe/docs/img/visualization/tensorboard_2.png +0 -0
  550. msprobe/docs/img/visualization/vis_browser_1.png +0 -0
  551. msprobe/docs/img/visualization/vis_browser_2.png +0 -0
  552. msprobe/docs/img/visualization/vis_match_info.png +0 -0
  553. msprobe/docs/img/visualization/vis_precision_info.png +0 -0
  554. msprobe/docs/img/visualization/vis_search_info.png +0 -0
  555. msprobe/docs/img/visualization/vis_show_info.png +0 -0
  556. msprobe/docs/img/visualization/vis_showcase.png +0 -0
  557. msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
  558. msprobe/docs/visualization/GPTModel.png +0 -0
  559. msprobe/docs/visualization/ParallelMLP.png +0 -0
  560. msprobe/docs/visualization/layer_mapping_example.md +0 -132
  561. msprobe/docs/visualization/mapping.png +0 -0
  562. msprobe/docs/visualization/mapping1.png +0 -0
  563. msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
  564. msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
  565. msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
  566. msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
  567. msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
  568. msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
  569. msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
  570. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +0 -59
  571. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
  572. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
  573. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +0 -80
  574. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
  575. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
  576. msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +0 -330
  577. msprobe/docs/visualization/module_name.png +0 -0
  578. msprobe/docs/visualization/module_name1.png +0 -0
  579. msprobe/docs/visualization/no_mapping.png +0 -0
  580. msprobe/docs/visualization/no_mapping1.png +0 -0
  581. msprobe/docs/visualization/no_mapping_analyze.png +0 -0
  582. msprobe/docs/visualization/top_layer.png +0 -0
  583. msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +0 -460
  584. msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +0 -2081
  585. msprobe/mindspore/code_mapping/bind.py +0 -283
  586. msprobe/mindspore/code_mapping/cmd_parser.py +0 -40
  587. msprobe/mindspore/code_mapping/graph.py +0 -49
  588. msprobe/mindspore/code_mapping/graph_parser.py +0 -211
  589. msprobe/mindspore/code_mapping/main.py +0 -24
  590. msprobe/mindspore/code_mapping/processor.py +0 -34
  591. msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +0 -111
  592. msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -52
  593. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +0 -257
  594. msprobe/mindspore/free_benchmark/common/config.py +0 -27
  595. msprobe/mindspore/free_benchmark/common/handler_params.py +0 -31
  596. msprobe/mindspore/free_benchmark/common/utils.py +0 -100
  597. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -638
  598. msprobe/mindspore/free_benchmark/handler/base_handler.py +0 -105
  599. msprobe/mindspore/free_benchmark/handler/check_handler.py +0 -55
  600. msprobe/mindspore/free_benchmark/handler/fix_handler.py +0 -51
  601. msprobe/mindspore/free_benchmark/handler/handler_factory.py +0 -36
  602. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +0 -82
  603. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +0 -45
  604. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +0 -78
  605. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +0 -77
  606. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +0 -56
  607. msprobe/mindspore/free_benchmark/perturbation/no_change.py +0 -27
  608. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +0 -46
  609. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +0 -51
  610. msprobe/mindspore/grad_probe/global_context.py +0 -127
  611. msprobe/mindspore/grad_probe/grad_analyzer.py +0 -260
  612. msprobe/mindspore/grad_probe/grad_monitor.py +0 -42
  613. msprobe/mindspore/grad_probe/grad_stat_csv.py +0 -161
  614. msprobe/mindspore/grad_probe/hook.py +0 -115
  615. msprobe/mindspore/grad_probe/utils.py +0 -43
  616. msprobe/mindspore/mindtorch/__init__.py +0 -18
  617. msprobe/mindspore/ms_config.py +0 -153
  618. msprobe/mindspore/task_handler_factory.py +0 -44
  619. msprobe/nan_analyze/__init__.py +0 -14
  620. msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +0 -9
  621. msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +0 -480
  622. msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +0 -567
  623. msprobe/pytorch/debugger/precision_debugger.py +0 -181
  624. msprobe/pytorch/free_benchmark/__init__.py +0 -23
  625. msprobe/pytorch/free_benchmark/common/constant.py +0 -85
  626. msprobe/pytorch/free_benchmark/common/counter.py +0 -87
  627. msprobe/pytorch/free_benchmark/common/enums.py +0 -80
  628. msprobe/pytorch/free_benchmark/common/params.py +0 -152
  629. msprobe/pytorch/free_benchmark/common/utils.py +0 -143
  630. msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -215
  631. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +0 -121
  632. msprobe/pytorch/free_benchmark/main.py +0 -123
  633. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +0 -28
  634. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +0 -56
  635. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +0 -107
  636. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +0 -121
  637. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +0 -89
  638. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +0 -87
  639. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +0 -43
  640. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +0 -60
  641. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +0 -34
  642. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +0 -252
  643. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +0 -54
  644. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +0 -40
  645. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +0 -45
  646. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -181
  647. msprobe/pytorch/grad_probe/__init__.py +0 -0
  648. msprobe/pytorch/grad_probe/grad_monitor.py +0 -108
  649. msprobe/pytorch/grad_probe/grad_stat_csv.py +0 -160
  650. msprobe/pytorch/hook_module/__init__.py +0 -16
  651. msprobe/pytorch/hook_module/wrap_aten.py +0 -111
  652. msprobe/pytorch/online_dispatch/__init__.py +0 -19
  653. msprobe/pytorch/online_dispatch/compare.py +0 -224
  654. msprobe/pytorch/online_dispatch/dispatch.py +0 -332
  655. msprobe/pytorch/online_dispatch/dump_compare.py +0 -179
  656. msprobe/pytorch/online_dispatch/single_compare.py +0 -412
  657. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +0 -58
  658. msprobe/pytorch/online_dispatch/utils.py +0 -158
  659. msprobe/pytorch/parse_tool/__init__.py +0 -0
  660. msprobe/pytorch/parse_tool/cli.py +0 -31
  661. msprobe/pytorch/parse_tool/lib/__init__.py +0 -0
  662. msprobe/pytorch/parse_tool/lib/compare.py +0 -253
  663. msprobe/pytorch/parse_tool/lib/config.py +0 -50
  664. msprobe/pytorch/parse_tool/lib/file_desc.py +0 -45
  665. msprobe/pytorch/parse_tool/lib/interactive_cli.py +0 -97
  666. msprobe/pytorch/parse_tool/lib/parse_exception.py +0 -54
  667. msprobe/pytorch/parse_tool/lib/parse_tool.py +0 -161
  668. msprobe/pytorch/parse_tool/lib/utils.py +0 -299
  669. msprobe/pytorch/parse_tool/lib/visualization.py +0 -85
  670. msprobe/pytorch/pt_config.py +0 -299
  671. /msprobe/core/{grad_probe → dump}/__init__.py +0 -0
  672. /msprobe/{mindspore/code_mapping → core/dump/api_dump}/__init__.py +0 -0
  673. /msprobe/{mindspore/debugger → core/dump/data_dump}/__init__.py +0 -0
  674. /msprobe/{mindspore/exception_dump → core/dump/data_dump/data_processor}/__init__.py +0 -0
  675. /msprobe/{mindspore/free_benchmark → core/dump/debugger}/__init__.py +0 -0
  676. /msprobe/{mindspore/free_benchmark/common → core/dump/kernel_dump}/__init__.py +0 -0
  677. /msprobe/mindspore/{free_benchmark/handler → dump/debugger}/__init__.py +0 -0
  678. /msprobe/mindspore/{grad_probe → dump/dump_processor}/__init__.py +0 -0
  679. /msprobe/mindspore/{overflow_check → dump/exception_dump}/__init__.py +0 -0
  680. /msprobe/mindspore/{mindtorch → dump/mindtorch}/mindtorch_adaptor.py +0 -0
  681. /msprobe/{pytorch/api_accuracy_checker/run_ut → mindspore/dump/overflow_check}/__init__.py +0 -0
  682. /msprobe/{pytorch/debugger → mindspore/monitor}/__init__.py +0 -0
  683. /msprobe/{pytorch/free_benchmark/common → msaccucmp}/__init__.py +0 -0
  684. /msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/.keep +0 -0
  685. /msprobe/pytorch/{free_benchmark/perturbed_layers → api_accuracy_checker/acc_check}/__init__.py +0 -0
  686. /msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/torch_ut_setting.json +0 -0
  687. /msprobe/pytorch/{free_benchmark/perturbed_layers/npu → dump/api_dump}/__init__.py +0 -0
  688. /msprobe/pytorch/{hook_module → dump/api_dump}/support_wrap_ops.yaml +0 -0
  689. /msprobe/pytorch/{free_benchmark/result_handlers → dump/debugger}/__init__.py +0 -0
@@ -1,40 +1,53 @@
1
- # Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
2
- # All rights reserved.
1
+ # -------------------------------------------------------------------------
2
+ # This file is part of the MindStudio project.
3
+ # Copyright (c) 2025 Huawei Technologies Co.,Ltd.
3
4
  #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
5
+ # MindStudio is licensed under Mulan PSL v2.
6
+ # You can use this software according to the terms and conditions of the Mulan PSL v2.
7
+ # You may obtain a copy of Mulan PSL v2 at:
7
8
  #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
+ # http://license.coscl.org.cn/MulanPSL2
9
10
  #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
11
+ # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12
+ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13
+ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14
+ # See the Mulan PSL v2 for more details.
15
+ # -------------------------------------------------------------------------
15
16
 
16
17
  import ctypes
18
+ import inspect
17
19
  import os
18
20
  import zlib
21
+ import json
22
+ import re
19
23
  from collections.abc import Iterable
20
24
  from concurrent.futures import ThreadPoolExecutor
21
- from dataclasses import asdict
22
- from typing import List
23
25
 
24
26
  import numpy as np
25
27
  import torch
26
28
  from torch import distributed as dist
27
29
  from torch.distributed.distributed_c10d import _get_default_group
28
30
 
31
+ from msprobe.core.common.file_utils import FileOpen, load_json
29
32
  from msprobe.core.common.const import Const
30
33
  from msprobe.core.common.decorator import recursion_depth_decorator
31
34
  from msprobe.core.common.exceptions import MsprobeException
32
35
  from msprobe.core.common.log import logger
33
36
  from msprobe.core.common.utils import convert_tuple, is_int
34
- from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
35
- ModuleForwardInputsOutputs, TensorStatInfo
36
- from msprobe.pytorch.common.utils import save_pt
37
- from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
37
+ from msprobe.core.dump.data_dump.data_processor.base import (
38
+ BaseDataProcessor,
39
+ ModuleBackwardInputsOutputs,
40
+ ModuleForwardInputsOutputs,
41
+ TensorStatInfo
42
+ )
43
+ from msprobe.pytorch.common.utils import (
44
+ Const as PtConst,
45
+ save_pt,
46
+ is_recomputation,
47
+ is_hifloat8_tensor,
48
+ is_float8_tensor
49
+ )
50
+
38
51
 
39
52
  is_gpu = False
40
53
  try:
@@ -49,6 +62,9 @@ class TensorHandler:
49
62
  self.has_fake_tensor = hasattr(torch, "_subclasses") and hasattr(torch._subclasses, "fake_tensor")
50
63
  self.has_async_collective_tensor = hasattr(dist, "_functional_collectives") and \
51
64
  hasattr(dist._functional_collectives, "AsyncCollectiveTensor")
65
+ self.has_nested_tensor = hasattr(torch, "nested") and hasattr(torch.nested, "_internal") and \
66
+ hasattr(torch.nested._internal, "nested_tensor") and \
67
+ hasattr(torch.nested._internal.nested_tensor, "NestedTensor")
52
68
 
53
69
  @staticmethod
54
70
  def free_tensor(tensor, tensor_name):
@@ -57,6 +73,12 @@ class TensorHandler:
57
73
  except Exception as e:
58
74
  logger.warning(f"Failed to free tensor: {tensor_name}, the detail info: {e}.")
59
75
 
76
+ @staticmethod
77
+ def get_tensor_dtype(tensor):
78
+ if is_hifloat8_tensor(tensor):
79
+ return PtConst.HIFLOAT8_TYPE
80
+ return str(tensor.dtype)
81
+
60
82
  def is_dtensor(self, tensor):
61
83
  return self.has_dtensor and isinstance(tensor, dist.tensor.DTensor)
62
84
 
@@ -66,6 +88,10 @@ class TensorHandler:
66
88
  def is_async_collective_tensor(self, tensor):
67
89
  return self.has_async_collective_tensor and \
68
90
  isinstance(tensor, dist._functional_collectives.AsyncCollectiveTensor)
91
+
92
+ def is_nested_tensor(self, tensor):
93
+ return self.has_nested_tensor and \
94
+ isinstance(tensor, torch.nested._internal.nested_tensor.NestedTensor)
69
95
 
70
96
  def is_empty_data(self, tensor):
71
97
  return tensor.is_meta or self.is_fake_tensor(tensor) or self.is_async_collective_tensor(tensor)
@@ -76,6 +102,15 @@ class TensorHandler:
76
102
  if self.is_fake_tensor(tensor):
77
103
  logger.debug("FakeTensor cannot be converted to torch.Tensor type.")
78
104
  return tensor
105
+ if self.is_nested_tensor(tensor):
106
+ logger.debug(f"For NestedTensor, collecting information from the tensor returned by .values().")
107
+ return tensor.values()
108
+ if is_float8_tensor(tensor):
109
+ logger.debug(
110
+ f"The fp8/hifp8 tensor analyzing/saving is unsupported in dump function."
111
+ f"Casting to float for processing."
112
+ )
113
+ tensor = tensor.detach().float()
79
114
  return tensor
80
115
 
81
116
  def get_tensor_type(self, tensor):
@@ -85,6 +120,8 @@ class TensorHandler:
85
120
  return Const.FAKE_TENSOR_TYPE
86
121
  if self.is_async_collective_tensor(tensor):
87
122
  return Const.AC_TENSOR_TYPE
123
+ if self.is_nested_tensor(tensor):
124
+ return Const.NESTED_TENSOR_TYPE
88
125
  return Const.TENSOR_TYPE
89
126
 
90
127
  def get_dtensor_info(self, tensor):
@@ -246,6 +283,39 @@ class PytorchDataProcessor(BaseDataProcessor):
246
283
  return (hasattr(element, "register_hook") and callable(element.register_hook)) and \
247
284
  (hasattr(element, "requires_grad") and element.requires_grad)
248
285
 
286
+ @staticmethod
287
+ def is_recompute(call_stack=None):
288
+ return is_recomputation(call_stack)
289
+
290
+ @staticmethod
291
+ def analyze_api_call_stack(name):
292
+ try:
293
+ call_stack = inspect.stack()
294
+ if name.startswith("Primitive"):
295
+ api_stack = call_stack[4:]
296
+ else:
297
+ api_stack = call_stack[5:]
298
+ except Exception as e:
299
+ logger.warning(f"The call stack of <{name}> failed to retrieve, {e}.")
300
+ api_stack = None
301
+ call_stack = None
302
+
303
+ stack_str = []
304
+ if api_stack:
305
+ for (_, path, line, func, code, _) in api_stack:
306
+ if not code:
307
+ continue
308
+ if any(filter_path in path for filter_path in Const.STACK_FILTER_KEYWORDS) and \
309
+ Const.CALL_STACK_FLAG not in path:
310
+ continue
311
+ stack_line = f"File {path}, line {str(line)}, in {func}, \n {code[0].strip()}"
312
+ stack_str.append(stack_line)
313
+ else:
314
+ stack_str.append(Const.WITHOUT_CALL_STACK)
315
+ is_recompute = PytorchDataProcessor.is_recompute(call_stack)
316
+ del call_stack
317
+ return tuple(stack_str), is_recompute
318
+
249
319
  @staticmethod
250
320
  def _analyze_torch_size(arg):
251
321
  return {"type": "torch.Size", "value": [int(x) for x in list(arg)]}
@@ -358,7 +428,7 @@ class PytorchDataProcessor(BaseDataProcessor):
358
428
  tensor_stat = self.get_stat_info(common_tensor, self.config.async_dump, self.config.precision)
359
429
  tensor_json = {}
360
430
  tensor_json.update({'type': self.tensor_handler.get_tensor_type(tensor)})
361
- tensor_json.update({'dtype': str(common_tensor.dtype)})
431
+ tensor_json.update({'dtype': self.tensor_handler.get_tensor_dtype(tensor)})
362
432
  tensor_json.update({"shape": common_tensor.shape})
363
433
 
364
434
  stat_values = [
@@ -388,18 +458,22 @@ class PytorchDataProcessor(BaseDataProcessor):
388
458
  elif t_cpu.device.type == "npu":
389
459
  t_cpu = t_cpu.to("cpu", non_blocking=True)
390
460
  torch.npu.synchronize()
391
-
392
461
  t_cpu = t_cpu.detach()
393
- if not t_cpu.is_contiguous():
394
- t_cpu = t_cpu.contiguous()
395
462
 
396
- future = self._crc_executor.submit(
397
- PytorchDataProcessor.compute_crc32_from_tensor,
398
- t_cpu
399
- )
463
+ if self.config.task == Const.TENSOR and self.data_writer.bench_dump_file_path is not None:
464
+ tensor_md5 = PytorchDataProcessor.compute_crc32_from_tensor(t_cpu)
465
+ tensor_json.update({Const.MD5: tensor_md5})
466
+ else:
467
+ if not t_cpu.is_contiguous():
468
+ t_cpu = t_cpu.contiguous()
469
+
470
+ future = self._crc_executor.submit(
471
+ PytorchDataProcessor.compute_crc32_from_tensor,
472
+ t_cpu
473
+ )
400
474
 
401
- crc_placeholder = self.data_writer.append_crc32_to_buffer(future)
402
- tensor_json[Const.MD5_INDEX] = crc_placeholder
475
+ crc_placeholder = self.data_writer.append_crc32_to_buffer(future)
476
+ tensor_json[Const.MD5_INDEX] = crc_placeholder
403
477
  else:
404
478
  logger.debug(
405
479
  "Calculating the md5 value of fake tensor or meta tensor is not supported, "
@@ -456,164 +530,316 @@ class TensorDataProcessor(PytorchDataProcessor):
456
530
  return self._analyze_and_save_ndarray(ndarray, suffix)
457
531
 
458
532
 
459
- class OverflowCheckDataProcessor(PytorchDataProcessor):
460
- __slots__ = ["cached_tensors_and_file_paths"]
533
+ class DiffCheckDataProcessor(PytorchDataProcessor):
534
+ __slots__ = [
535
+ "cached_tensors_and_file_paths",
536
+ "_bench_ref_path",
537
+ "_bench_ref_mtime",
538
+ "_bench_map",
539
+ "_bench_state", # 新增:按 API 的对比状态
540
+ ]
461
541
 
462
542
  def __init__(self, config, data_writer):
463
543
  super().__init__(config, data_writer)
464
- self.has_overflow = False
465
- self.support_inf_nan = None
544
+ self.has_diff = False
545
+
466
546
  self.cached_api_info = {}
467
547
  self.cached_tensors_and_file_paths = {}
468
- self.bits_for_overflow = 8
469
- self.real_overflow_nums = 0
470
- self.overflow_nums = config.overflow_nums
548
+ self.bits_for_diff = 8
549
+ self.real_diff_nums = 0
550
+ self.diff_nums = config.diff_nums
551
+
552
+ # 新增:bench 基准缓存初始化
553
+ self._bench_ref_path = None
554
+ self._bench_ref_mtime = None
555
+ self._bench_map = {}
556
+ self._bench_state = {} # key: api_name -> 状态字典
471
557
 
472
558
  @property
473
559
  def is_terminated(self):
474
- if self.overflow_nums == -1:
560
+ if self.diff_nums == -1:
475
561
  return False
476
- if self.real_overflow_nums >= self.overflow_nums:
562
+ if self.real_diff_nums >= self.diff_nums:
477
563
  return True
478
564
  return False
479
565
 
566
+ @staticmethod
567
+ def _parse_data_name(data_name: str):
568
+ """
569
+ 解析 data_name,例如:
570
+ - "Functional.relu.2.forward.input.0.pt"
571
+ - 兼容可选前缀 "name:" -> "name:Functional.relu.2.forward.input.0.pt"
572
+ 返回 (api, io, idx) 或 None
573
+ """
574
+ if not data_name:
575
+ return None
576
+ if data_name.startswith("name:"):
577
+ data_name = data_name.split(":", 1)[1]
578
+
579
+ # api 名本身可能包含若干个 '.',所以用正则从右侧提取 io/idx/扩展名
580
+ m = re.match(
581
+ r"^(?=.{1,1024}$)(?P<api>.+)\.(?P<io>input|output)\.(?P<idx>\d+)\.\w+$",
582
+ data_name
583
+ )
584
+ if not m:
585
+ return None
586
+ api = m.group("api")
587
+ io = m.group("io")
588
+ idx = int(m.group("idx"))
589
+ return api, io, idx
590
+
480
591
  def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
481
- self.has_overflow = False
482
- self._is_support_inf_nan()
592
+ self.has_diff = False
593
+
483
594
  self.cached_api_info = super().analyze_forward_input(name, module, module_input_output)
484
595
  return None
485
596
 
486
597
  def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
487
- self._is_support_inf_nan()
598
+
488
599
  api_info_struct = super().analyze_forward_output(name, module, module_input_output)
489
600
  if name in self.cached_api_info and name in api_info_struct:
490
601
  self.cached_api_info[name].update(api_info_struct[name])
491
602
  elif name in api_info_struct:
492
603
  self.cached_api_info = api_info_struct
493
- self.handle_overflow()
494
- return self.cached_api_info if self.has_overflow else None
604
+ self.handle_diff()
605
+ return self.cached_api_info
495
606
 
496
607
  def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
497
- self.has_overflow = False
498
- self._is_support_inf_nan()
608
+ self.has_diff = False
609
+
499
610
  api_info_struct = super().analyze_forward(name, module, module_input_output)
500
- self.handle_overflow()
501
- return api_info_struct if self.has_overflow else None
611
+ self.handle_diff()
612
+ return api_info_struct
502
613
 
503
614
  def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
504
- self.has_overflow = False
505
- self._is_support_inf_nan()
615
+ self.has_diff = False
616
+
506
617
  api_info_struct = super().analyze_backward(name, module, module_input_output)
507
- self.handle_overflow()
508
- return api_info_struct if self.has_overflow else None
618
+ self.handle_diff()
619
+ return api_info_struct
509
620
 
510
621
  def analyze_params(self, name, param_name, grad):
511
- self.has_overflow = False
512
- self._is_support_inf_nan()
622
+ self.has_diff = False
623
+
513
624
  api_info_struct = super().analyze_params(name, param_name, grad)
514
- self.handle_overflow()
515
- return api_info_struct if self.has_overflow else None
625
+ self.handle_diff()
626
+ return api_info_struct
516
627
 
517
- def handle_overflow(self):
518
- if not self.support_inf_nan:
519
- self._analyze_maybe_overflow_flag()
520
- if self.has_overflow:
628
+ def handle_diff(self):
629
+ if self.has_diff:
521
630
  for file_path, tensor in self.cached_tensors_and_file_paths.items():
522
631
  self.tensor_handler.save_tensor(tensor, file_path)
523
- self.real_overflow_nums += 1
524
- if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums:
525
- logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, "
526
- f"current overflow times: {self.real_overflow_nums}.")
632
+ self.real_diff_nums += 1
633
+ if self.diff_nums != -1 and self.real_diff_nums >= self.diff_nums:
634
+ logger.info(f"[{Const.TOOL_NAME}] Reached the preset diff times, "
635
+ f"current diff times: {self.real_diff_nums}.")
636
+ api = getattr(self, "current_api_or_module_name", None)
637
+ if api and api in self._bench_state:
638
+ self._bench_state.pop(api, None)
639
+
527
640
  self.cached_tensors_and_file_paths = {}
528
641
 
529
- def _is_support_inf_nan(self):
530
- if self.support_inf_nan is not None:
531
- return
532
- try:
533
- self.support_inf_nan = is_gpu or torch_npu.npu.utils.is_support_inf_nan()
534
- except Exception:
535
- logger.warning(f"Unable to determine if the current device supports inf/nan mode, default not supported.")
536
- self.support_inf_nan = False
537
642
 
538
- def _analyze_maybe_overflow_flag(self):
643
+ def _analyze_maybe_diff_flag(self):
539
644
  try:
540
- self.has_overflow = torch_npu.npu.utils.get_npu_overflow_flag()
541
- if self.has_overflow:
542
- torch_npu.npu.utils.clear_npu_overflow_flag()
645
+ self.has_diff = torch_npu.npu.utils.get_npu_diff_flag()
646
+ if self.has_diff:
647
+ torch_npu.npu.utils.clear_npu_diff_flag()
543
648
  except Exception as e:
544
- logger.error(f"Overflow check failed, the current environment may be abnormal.")
545
- raise RuntimeError(f"overflow check failed") from e
649
+ logger.error(f"Diff check failed, the current environment may be abnormal.")
650
+ raise RuntimeError(f"diff check failed") from e
651
+
652
+ def _bench_expected_counts_for_api(self, api: str):
653
+ """统计某 API 在 bench_map 里有多少个 Tensor 输入/输出"""
654
+ n_in = n_out = 0
655
+ for (a, io, _) in self._bench_map.keys():
656
+ if a == api:
657
+ if io == "input":
658
+ n_in += 1
659
+ elif io == "output":
660
+ n_out += 1
661
+ return n_in, n_out
662
+
663
+ def _resolve_bench_json_path(self) -> str:
664
+ p = getattr(self.data_writer, "bench_dump_file_path", None)
665
+ if not p:
666
+ return None
667
+ p = os.path.join(p, "dump.json") if os.path.isdir(p) else p
668
+ return p if os.path.isfile(p) else None
669
+
670
+ def _ensure_bench_map_loaded(self) -> bool:
671
+ """
672
+ 当路径变化或文件 mtime 变化时重载 dump.json,并构建 (api, 'input'/'output', idx) -> {md5, shape} 的索引。
673
+ """
674
+ path = self._resolve_bench_json_path()
675
+ if not path:
676
+ return False
677
+ try:
678
+ mtime = os.path.getmtime(path)
679
+ except Exception as e:
680
+ return False
546
681
 
547
- def _analyze_maybe_overflow_tensor(self, tensor_json):
548
- tensor_stat_index = tensor_json.get(Const.TENSOR_STAT_INDEX)
549
- if tensor_stat_index is None:
550
- logger.warning("tensor_stat_index does not exist in tensor_json.")
551
- return
552
- max_tensor = self.data_writer.get_buffer_values_max(tensor_stat_index)
553
- min_tensor = self.data_writer.get_buffer_values_min(tensor_stat_index)
682
+ need_reload = (path != self._bench_ref_path) or (mtime != self._bench_ref_mtime)
683
+
684
+ if need_reload:
685
+ try:
686
+ obj = load_json(path)
687
+ except Exception as e:
688
+ logger.warning(f"Failed to load bench dump.json: {e}")
689
+ return False
554
690
 
555
- if max_tensor is None or min_tensor is None:
691
+ data = obj.get("data", {})
692
+ self._bench_map = self._build_bench_map_from_json(data)
693
+ self._bench_ref_path = path
694
+ self._bench_ref_mtime = mtime
695
+
696
+ return True
697
+
698
+ def _build_bench_map_from_json(self, data: dict) -> dict:
699
+ """
700
+ data 结构:{ api_name: {input_args: [...], output: [...] } }
701
+ 只收集 Tensor 项:(api, io, idx) -> {"md5": str, "shape": list}
702
+ """
703
+ mp = {}
704
+ total_inputs = 0
705
+ total_outputs = 0
706
+ for api_name, rec in data.items():
707
+ ia = rec.get("input_args", [])
708
+ oa = rec.get("output", [])
709
+ # input_args
710
+ input_count_this_api = 0
711
+ for i, arg in enumerate(ia):
712
+ if isinstance(arg, dict) and arg.get("type") == "torch.Tensor":
713
+ mp[(api_name, "input", i)] = {
714
+ "md5": arg.get("md5"),
715
+ "shape": arg.get("shape"),
716
+ }
717
+ input_count_this_api += 1
718
+ total_inputs += input_count_this_api
719
+
720
+ # output
721
+ output_count_this_api = 0
722
+ for i, out in enumerate(oa):
723
+ if isinstance(out, dict) and out.get("type") == "torch.Tensor":
724
+ mp[(api_name, "output", i)] = {
725
+ "md5": out.get("md5"),
726
+ "shape": out.get("shape"),
727
+ }
728
+ output_count_this_api += 1
729
+ total_outputs += output_count_this_api
730
+
731
+ return mp
732
+
733
+ def _analyze_maybe_diff_tensor(self, tensor_json):
734
+ # 1) bench map 准备
735
+ if not self._ensure_bench_map_loaded():
556
736
  return
557
737
 
558
- if torch.isinf(max_tensor) or torch.isnan(max_tensor):
559
- self.has_overflow = True
738
+ # 2) 解析 data_name -> (api, io, idx)
739
+ data_name = tensor_json.get("data_name")
740
+ parsed = self._parse_data_name(data_name)
741
+ if not parsed:
742
+ logger.debug(f"data_name parse failed: {data_name}")
743
+ return
744
+ api, io, idx = parsed
745
+
746
+ # 3) 取/建 本 API 的状态
747
+ st = self._bench_state.get(api)
748
+ if st is None:
749
+ n_in, _ = self._bench_expected_counts_for_api(api)
750
+ st = {
751
+ "expected_in": n_in, # 标杆中该 API 期望的 Tensor 输入数
752
+ "checked_in": 0, # 已经校验过的“在标杆中存在的输入”个数
753
+ "inputs_equal": True, # 到目前为止,输入是否全部一致
754
+ "seen_input_not_in_ref": False, # 遇到“运行时存在但标杆里没有”的输入
755
+ "any_output_neq": False, # 是否发现过任一输出不一致(shape 同且 md5 不同)
756
+ }
757
+ self._bench_state[api] = st
758
+
759
+ # 4) 找到标杆项
760
+ ref = self._bench_map.get((api, io, idx))
761
+
762
+ # 5) 当前 shape
763
+ cur_shape = tensor_json.get("shape")
764
+ if cur_shape is None:
765
+ return
766
+ try:
767
+ cur_shape = list(cur_shape)
768
+ except Exception as e:
769
+ logger.warning("[BENCH]", "shape to list failed:", repr(e), "-> skip")
560
770
  return
561
771
 
562
- if torch.isinf(min_tensor) or torch.isnan(min_tensor):
563
- self.has_overflow = True
772
+ # 6) 输入与输出分别处理
773
+ if io == "input":
774
+ # —— 输入阶段:只维护“输入是否一致”的状态 —— #
775
+ if ref is None:
776
+ # 运行时有输入,但标杆里没有对应条目 => 不能断言“输入一致”
777
+ st["inputs_equal"] = False
778
+ st["seen_input_not_in_ref"] = True
564
779
 
565
- def _analyze_tensor(self, tensor, suffix):
566
- dump_data_name, file_path = self.get_save_file_path(suffix)
567
- self.cached_tensors_and_file_paths.update({file_path: tensor})
568
- single_arg = super()._analyze_tensor(tensor, suffix)
569
- single_arg.update({"data_name": dump_data_name})
570
- if not self.has_overflow and self.support_inf_nan:
571
- self._analyze_maybe_overflow_tensor(single_arg)
572
- return single_arg
780
+ return
573
781
 
782
+ ref_shape = ref.get("shape")
783
+ ref_md5 = ref.get("md5")
574
784
 
575
- class FreeBenchmarkDataProcessor(PytorchDataProcessor):
785
+ # 标杆有该输入,计入已校验
786
+ st["checked_in"] += 1
576
787
 
577
- def __init__(self, config, data_writer):
578
- super().__init__(config, data_writer)
579
- self.checker = FreeBenchmarkCheck(config=config)
580
- self._return_forward_new_output = None
581
- self._forward_new_output = None
788
+ # shape 必须一致
789
+ if list(ref_shape) != list(cur_shape):
790
+ st["inputs_equal"] = False
582
791
 
583
- def update_iter(self, current_iter):
584
- super().update_iter(current_iter)
585
- self.checker.update_iter(current_iter)
792
+ return
586
793
 
587
- def update_unequal_rows(self, unequal_rows: List[UnequalRow]):
588
- if not unequal_rows:
589
- return
590
- for row in unequal_rows:
591
- data_dict = asdict(row)
592
- self.data_writer.write_data_to_csv(
593
- data_dict.values(),
594
- data_dict.keys(),
595
- self.data_writer.free_benchmark_file_path
596
- )
597
- return
794
+ # 取当前 md5
795
+ cur_md5 = tensor_json.get(Const.MD5) if Const.MD5 in tensor_json else tensor_json.get("md5")
598
796
 
599
- def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
600
- self.checker.pre_forward(name, module, self, module_input_output.args, module_input_output.kwargs)
797
+ if cur_md5 is None or ref_md5 is None:
798
+ # 缺少 md5 信息,无法断言一致
799
+ st["inputs_equal"] = False
800
+ return
601
801
 
602
- def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
603
- new_output, unequal_rows = self.checker.forward(
604
- name,
605
- module,
606
- module_input_output.args,
607
- module_input_output.kwargs,
608
- module_input_output.output,
609
- )
610
- self.update_unequal_rows(unequal_rows)
611
- if self.checker.if_fix():
612
- self._return_forward_new_output = True
613
- self._forward_new_output = new_output
802
+ # md5 必须一致
803
+ if str(cur_md5) != str(ref_md5):
804
+ st["inputs_equal"] = False
805
+ return # 输入阶段不触发 has_diff
614
806
 
615
- def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
616
- self.checker.backward(name, module, module_input_output.grad_input)
807
+ else: # io == "output"
808
+ # —— 输出阶段:仅当“所有输入一致且已校验完所有输入”时,才检查输出不一致以置位 —— #
809
+ # 若标杆无此输出,按照你的规则:不能断言输出不一致,直接跳过
810
+ if ref is None:
811
+ return
812
+
813
+ ref_shape = ref.get("shape")
814
+ ref_md5 = ref.get("md5")
815
+
816
+ # shape 必须一致才比较 md5
817
+ if list(ref_shape) != list(cur_shape):
818
+ return
819
+
820
+ cur_md5 = tensor_json.get(Const.MD5) if Const.MD5 in tensor_json else tensor_json.get("md5")
821
+ if cur_md5 is None or ref_md5 is None:
822
+ return
823
+
824
+ # 只有当“输入全部一致且已校验完所有输入”时,才允许判定输出不一致
825
+ inputs_ok = (
826
+ st["inputs_equal"]
827
+ and (st["checked_in"] == st["expected_in"])
828
+ and (not st["seen_input_not_in_ref"])
829
+ )
830
+
831
+ if inputs_ok and (str(cur_md5) != str(ref_md5)):
832
+ st["any_output_neq"] = True
833
+ self.has_diff = True
834
+
835
+ def _analyze_tensor(self, tensor, suffix):
836
+ dump_data_name, file_path = self.get_save_file_path(suffix)
837
+ self.cached_tensors_and_file_paths.update({file_path: tensor})
838
+ single_arg = super()._analyze_tensor(tensor, suffix)
839
+ single_arg.update({"data_name": dump_data_name})
840
+ if not self.has_diff:
841
+ self._analyze_maybe_diff_tensor(single_arg)
842
+ return single_arg
617
843
 
618
844
 
619
845
  class KernelDumpDataProcessor(PytorchDataProcessor):
@@ -707,6 +933,11 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
707
933
  )
708
934
  def clone_and_detach_tensor(self, input_params):
709
935
  if isinstance(input_params, torch.Tensor):
936
+ if is_float8_tensor(input_params):
937
+ raise MsprobeException(
938
+ MsprobeException.UNSUPPORTED_TYPE_ERROR,
939
+ f"L2 backward dump does not support float8 type."
940
+ )
710
941
  if input_params.requires_grad:
711
942
  return input_params.clone().detach().requires_grad_()
712
943
  return input_params.clone()
@@ -720,6 +951,8 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
720
951
  return input_params
721
952
 
722
953
  def analyze_single_element(self, element, suffix_stack):
954
+ if is_float8_tensor(element):
955
+ return {}
723
956
  if isinstance(element, torch.Tensor):
724
957
  if not self.is_found_output_tensor:
725
958
  if element.requires_grad: