mindstudio-probe 8.3.3__py3-none-any.whl → 26.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (689) hide show
  1. {mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/METADATA +26 -14
  2. mindstudio_probe-26.0.0a1.dist-info/RECORD +498 -0
  3. {mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/WHEEL +1 -1
  4. mindstudio_probe-26.0.0a1.dist-info/entry_points.txt +5 -0
  5. mindstudio_probe-26.0.0a1.dist-info/licenses/LICENSE +124 -0
  6. mindstudio_probe-26.0.0a1.dist-info/top_level.txt +2 -0
  7. msprobe/__init__.py +12 -13
  8. msprobe/config.json +9 -31
  9. msprobe/core/__init__.py +12 -11
  10. msprobe/core/acc_check/acc_check_cli.py +145 -0
  11. msprobe/core/common/const.py +97 -38
  12. msprobe/core/common/db_manager.py +133 -12
  13. msprobe/core/common/decorator.py +12 -11
  14. msprobe/core/common/exceptions.py +12 -11
  15. msprobe/core/common/file_utils.py +101 -25
  16. msprobe/core/common/framework_adapter.py +36 -25
  17. msprobe/core/common/global_lock.py +12 -11
  18. msprobe/core/common/inplace_op_checker.py +12 -11
  19. msprobe/core/common/log.py +22 -11
  20. msprobe/core/common/megatron_utils.py +566 -11
  21. msprobe/core/common/parallel_state.py +12 -11
  22. msprobe/core/common/runtime.py +12 -11
  23. msprobe/core/common/utils.py +41 -41
  24. msprobe/core/compare/acc_compare.py +361 -104
  25. msprobe/core/compare/atb_data_compare.py +422 -0
  26. msprobe/core/compare/auto_compare.py +134 -0
  27. msprobe/core/compare/check.py +14 -17
  28. msprobe/core/compare/compare_cli.py +72 -149
  29. msprobe/core/compare/config.py +12 -13
  30. msprobe/core/compare/diff_analyze/first_diff_analyze.py +28 -15
  31. msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
  32. msprobe/core/compare/find_first/analyzer.py +18 -18
  33. msprobe/core/compare/find_first/graph.py +12 -11
  34. msprobe/core/compare/find_first/utils.py +13 -12
  35. msprobe/core/compare/indicator_analysis/__init__.py +15 -0
  36. msprobe/core/compare/indicator_analysis/algorithm.py +363 -0
  37. msprobe/core/compare/indicator_analysis/api_data.py +141 -0
  38. msprobe/core/compare/indicator_analysis/calculator.py +181 -0
  39. msprobe/core/compare/indicator_analysis/utils.py +116 -0
  40. msprobe/core/compare/layer_mapping/__init__.py +12 -11
  41. msprobe/core/compare/layer_mapping/data_scope_parser.py +20 -11
  42. msprobe/core/compare/layer_mapping/layer_mapping.py +14 -13
  43. msprobe/core/compare/layer_mapping/postprocess_pass.py +13 -11
  44. msprobe/core/compare/merge_result/merge_result.py +12 -11
  45. msprobe/core/compare/merge_result/merge_result_cli.py +12 -11
  46. msprobe/core/compare/merge_result/utils.py +12 -11
  47. msprobe/core/compare/multiprocessing_compute.py +13 -14
  48. msprobe/core/compare/npy_compare.py +13 -11
  49. msprobe/core/compare/offline_data_compare.py +160 -0
  50. msprobe/core/compare/stats_diff_calc.py +39 -0
  51. msprobe/core/compare/torchair_acc_cmp.py +764 -0
  52. msprobe/core/compare/torchair_cmp_utils.py +338 -0
  53. msprobe/core/compare/utils.py +140 -49
  54. msprobe/core/config_check/__init__.py +12 -11
  55. msprobe/core/config_check/checkers/__init__.py +12 -11
  56. msprobe/core/config_check/checkers/base_checker.py +15 -14
  57. msprobe/core/config_check/checkers/dataset_checker.py +13 -12
  58. msprobe/core/config_check/checkers/env_args_checker.py +13 -12
  59. msprobe/core/config_check/checkers/hyperparameter_checker.py +16 -15
  60. msprobe/core/config_check/checkers/pip_checker.py +15 -15
  61. msprobe/core/config_check/checkers/random_checker.py +13 -12
  62. msprobe/core/config_check/checkers/weights_checker.py +14 -12
  63. msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +13 -17
  64. msprobe/core/config_check/ckpt_compare/megatron_loader.py +13 -12
  65. msprobe/core/config_check/ckpt_compare/metrics.py +12 -11
  66. msprobe/core/config_check/config_check_cli.py +18 -17
  67. msprobe/core/config_check/config_checker.py +16 -14
  68. msprobe/core/config_check/resource/dependency.yaml +15 -12
  69. msprobe/core/config_check/resource/env.yaml +12 -11
  70. msprobe/core/config_check/utils/hyperparameter_parser.py +12 -11
  71. msprobe/core/config_check/utils/utils.py +12 -11
  72. msprobe/core/{data_dump → dump/api_dump}/api_registry.py +12 -11
  73. msprobe/core/{common_config.py → dump/common_config.py} +13 -24
  74. msprobe/core/dump/data_dump/data_collector.py +257 -0
  75. msprobe/core/{data_dump → dump/data_dump}/data_processor/base.py +45 -36
  76. msprobe/core/{data_dump → dump/data_dump}/data_processor/factory.py +33 -25
  77. msprobe/core/{data_dump → dump/data_dump}/data_processor/mindspore_processor.py +37 -113
  78. msprobe/core/{data_dump → dump/data_dump}/data_processor/pytorch_processor.py +364 -131
  79. msprobe/core/{data_dump → dump/data_dump}/json_writer.py +24 -31
  80. msprobe/core/{data_dump → dump/data_dump}/scope.py +12 -13
  81. msprobe/core/{debugger → dump/debugger}/precision_debugger.py +15 -23
  82. msprobe/core/dump/dump2db/db_utils.py +215 -0
  83. msprobe/core/dump/dump2db/dump2db.py +409 -0
  84. msprobe/core/{hook_manager.py → dump/hook_manager.py} +38 -87
  85. msprobe/core/dump/kernel_dump/kernel_config.py +34 -0
  86. msprobe/core/{service.py → dump/service.py} +43 -27
  87. msprobe/core/install_deps/install_deps.py +51 -0
  88. msprobe/core/monitor/anomaly_processor.py +13 -11
  89. msprobe/core/monitor/csv2db.py +73 -93
  90. msprobe/core/monitor/db_utils.py +140 -205
  91. msprobe/core/monitor/utils.py +18 -17
  92. msprobe/core/monitor_v2/__init__.py +20 -0
  93. msprobe/core/monitor_v2/base.py +83 -0
  94. msprobe/core/monitor_v2/cc.py +287 -0
  95. msprobe/core/monitor_v2/factory.py +81 -0
  96. msprobe/core/monitor_v2/module.py +201 -0
  97. msprobe/core/monitor_v2/optimizer.py +245 -0
  98. msprobe/core/monitor_v2/param.py +154 -0
  99. msprobe/core/monitor_v2/trainer.py +326 -0
  100. msprobe/core/monitor_v2/utils.py +122 -0
  101. msprobe/core/monitor_v2/weight_grad.py +419 -0
  102. msprobe/core/monitor_v2/writer.py +162 -0
  103. msprobe/core/overflow_check/abnormal_scene.py +12 -11
  104. msprobe/core/overflow_check/api_info.py +12 -11
  105. msprobe/core/overflow_check/checker.py +12 -11
  106. msprobe/core/overflow_check/filter.py +13 -11
  107. msprobe/core/overflow_check/level.py +12 -11
  108. msprobe/core/overflow_check/utils.py +12 -11
  109. msprobe/core/single_save/single_comparator.py +12 -11
  110. msprobe/core/single_save/single_saver.py +12 -11
  111. msprobe/infer/__init__.py +16 -0
  112. msprobe/infer/offline/__init__.py +16 -0
  113. msprobe/infer/offline/compare/__init__.py +16 -0
  114. msprobe/infer/offline/compare/msquickcmp/__init__.py +16 -0
  115. msprobe/infer/offline/compare/msquickcmp/adapter_cli/__init__.py +16 -0
  116. msprobe/infer/offline/compare/msquickcmp/adapter_cli/args_adapter.py +46 -0
  117. msprobe/infer/offline/compare/msquickcmp/atc/__init__.py +16 -0
  118. msprobe/infer/offline/compare/msquickcmp/atc/atc_utils.py +98 -0
  119. msprobe/infer/offline/compare/msquickcmp/cmp_process.py +328 -0
  120. msprobe/infer/offline/compare/msquickcmp/common/__init__.py +16 -0
  121. msprobe/infer/offline/compare/msquickcmp/common/args_check.py +112 -0
  122. msprobe/infer/offline/compare/msquickcmp/common/convert.py +74 -0
  123. msprobe/infer/offline/compare/msquickcmp/common/dump_data.py +121 -0
  124. msprobe/infer/offline/compare/msquickcmp/common/dynamic_argument_bean.py +39 -0
  125. msprobe/infer/offline/compare/msquickcmp/common/utils.py +669 -0
  126. msprobe/infer/offline/compare/msquickcmp/config.ini +6 -0
  127. msprobe/infer/offline/compare/msquickcmp/dump/__init__.py +16 -0
  128. msprobe/infer/offline/compare/msquickcmp/dump/args_adapter.py +50 -0
  129. msprobe/infer/offline/compare/msquickcmp/dump/dump_process.py +91 -0
  130. msprobe/infer/offline/compare/msquickcmp/install_aclruntime_aisbench.sh +180 -0
  131. msprobe/infer/offline/compare/msquickcmp/main.py +199 -0
  132. msprobe/infer/offline/compare/msquickcmp/net_compare/__init__.py +16 -0
  133. msprobe/infer/offline/compare/msquickcmp/net_compare/net_compare.py +277 -0
  134. msprobe/infer/offline/compare/msquickcmp/npu/__init__.py +16 -0
  135. msprobe/infer/offline/compare/msquickcmp/npu/npu_dump_data.py +558 -0
  136. msprobe/infer/offline/compare/msquickcmp/npu/om_parser.py +416 -0
  137. msprobe/infer/offline/compare/msquickcmp/onnx_model/__init__.py +16 -0
  138. msprobe/infer/offline/compare/msquickcmp/onnx_model/onnx_dump_data.py +374 -0
  139. msprobe/infer/utils/__init__.py +15 -0
  140. msprobe/infer/utils/acc_cmp.py +94 -0
  141. msprobe/infer/utils/check/__init__.py +37 -0
  142. msprobe/infer/utils/check/args_checker.py +35 -0
  143. msprobe/infer/utils/check/checker.py +227 -0
  144. msprobe/infer/utils/check/dict_checker.py +78 -0
  145. msprobe/infer/utils/check/func_wrapper.py +96 -0
  146. msprobe/infer/utils/check/list_checker.py +56 -0
  147. msprobe/infer/utils/check/number_checker.py +64 -0
  148. msprobe/infer/utils/check/obj_checker.py +41 -0
  149. msprobe/infer/utils/check/path_checker.py +249 -0
  150. msprobe/infer/utils/check/rule.py +126 -0
  151. msprobe/infer/utils/check/string_checker.py +66 -0
  152. msprobe/infer/utils/cmp_algorithm.py +261 -0
  153. msprobe/infer/utils/constants.py +112 -0
  154. msprobe/infer/utils/file_open_check.py +337 -0
  155. msprobe/infer/utils/util.py +177 -0
  156. msprobe/mindspore/__init__.py +14 -13
  157. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +14 -13
  158. msprobe/mindspore/api_accuracy_checker/api_info.py +12 -11
  159. msprobe/mindspore/api_accuracy_checker/api_runner.py +12 -11
  160. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +12 -11
  161. msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +12 -11
  162. msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +12 -11
  163. msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +12 -11
  164. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +15 -14
  165. msprobe/mindspore/api_accuracy_checker/compute_element.py +12 -11
  166. msprobe/mindspore/api_accuracy_checker/data_manager.py +13 -11
  167. msprobe/mindspore/api_accuracy_checker/main.py +12 -11
  168. msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +14 -12
  169. msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +13 -11
  170. msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +12 -11
  171. msprobe/mindspore/api_accuracy_checker/type_mapping.py +12 -11
  172. msprobe/mindspore/api_accuracy_checker/utils.py +12 -11
  173. msprobe/mindspore/common/const.py +15 -74
  174. msprobe/mindspore/common/log.py +12 -11
  175. msprobe/mindspore/common/utils.py +30 -15
  176. msprobe/mindspore/compare/common_dir_compare.py +21 -23
  177. msprobe/mindspore/compare/distributed_compare.py +18 -16
  178. msprobe/mindspore/compare/ms_compare.py +14 -14
  179. msprobe/mindspore/compare/ms_graph_compare.py +26 -20
  180. msprobe/mindspore/compare/utils.py +14 -12
  181. msprobe/mindspore/{cell_processor.py → dump/cell_processor.py} +15 -14
  182. msprobe/mindspore/{debugger → dump/debugger}/debugger_config.py +12 -30
  183. msprobe/mindspore/{debugger → dump/debugger}/precision_debugger.py +43 -45
  184. msprobe/mindspore/dump/{cell_dump_process.py → dump_processor/cell_dump_process.py} +31 -17
  185. msprobe/mindspore/dump/{cell_dump_with_insert_gradient.py → dump_processor/cell_dump_with_insert_gradient.py} +18 -14
  186. msprobe/mindspore/dump/{dump_tool_factory.py → dump_processor/dump_tool_factory.py} +16 -15
  187. msprobe/mindspore/dump/{graph_mode_cell_dump.py → dump_processor/graph_mode_cell_dump.py} +16 -15
  188. msprobe/mindspore/dump/{graph_tensor_dump.py → dump_processor/graph_tensor_dump.py} +134 -133
  189. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/api_register.py +15 -14
  190. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/hook_cell.py +12 -11
  191. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/ms_hook_manager.py +47 -20
  192. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/primitive_hooks.py +14 -13
  193. msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/support_wrap_ops.yaml +13 -11
  194. msprobe/mindspore/dump/{jit_dump.py → dump_processor/jit_dump.py} +14 -13
  195. msprobe/mindspore/dump/{kernel_graph_dump.py → dump_processor/kernel_graph_dump.py} +13 -12
  196. msprobe/mindspore/dump/{kernel_kbyk_dump.py → dump_processor/kernel_kbyk_dump.py} +13 -12
  197. msprobe/mindspore/{exception_dump → dump/exception_dump}/exception_dump_tool_factory.py +14 -13
  198. msprobe/mindspore/{exception_dump → dump/exception_dump}/kernel_graph_exception_dump.py +13 -12
  199. msprobe/mindspore/{mindspore_service.py → dump/mindspore_service.py} +18 -17
  200. msprobe/mindspore/dump/mindtorch/__init__.py +19 -0
  201. msprobe/mindspore/dump/ms_config.py +105 -0
  202. msprobe/mindspore/{overflow_check → dump/overflow_check}/kernel_graph_overflow_check.py +13 -12
  203. msprobe/mindspore/{overflow_check → dump/overflow_check}/overflow_check_tool_factory.py +14 -13
  204. msprobe/mindspore/dump/task_handler_factory.py +43 -0
  205. msprobe/mindspore/monitor/common_func.py +12 -11
  206. msprobe/mindspore/monitor/data_writers.py +12 -11
  207. msprobe/mindspore/monitor/distributed/wrap_distributed.py +93 -39
  208. msprobe/mindspore/monitor/features.py +12 -11
  209. msprobe/mindspore/monitor/module_hook.py +19 -22
  210. msprobe/mindspore/monitor/optimizer_collect.py +29 -25
  211. msprobe/mindspore/monitor/utils.py +13 -11
  212. msprobe/msaccucmp/advisor/__init__.py +16 -0
  213. msprobe/msaccucmp/advisor/advisor_const.py +65 -0
  214. msprobe/msaccucmp/advisor/advisor_result.py +73 -0
  215. msprobe/msaccucmp/advisor/compare_advisor.py +99 -0
  216. msprobe/msaccucmp/advisor/input_advisor.py +66 -0
  217. msprobe/msaccucmp/advisor/node_advisor.py +68 -0
  218. msprobe/msaccucmp/advisor/overflow_advisor.py +58 -0
  219. msprobe/msaccucmp/algorithm_manager/__init__.py +16 -0
  220. msprobe/msaccucmp/algorithm_manager/algorithm_manager.py +464 -0
  221. msprobe/msaccucmp/algorithm_manager/algorithm_parameter.py +42 -0
  222. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_AccumulatedRelativeError.py +46 -0
  223. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_CosineSimilarity.py +58 -0
  224. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_KullbackLeiblerDivergence.py +84 -0
  225. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxAbsoluteError.py +41 -0
  226. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxRelativeError.py +46 -0
  227. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanAbsoluteError.py +41 -0
  228. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanRelativeError.py +46 -0
  229. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RelativeEuclideanDistance.py +46 -0
  230. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RootMeanSquareError.py +40 -0
  231. msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_StandardDeviation.py +47 -0
  232. msprobe/msaccucmp/cmp_utils/__init__.py +16 -0
  233. msprobe/msaccucmp/cmp_utils/common.py +113 -0
  234. msprobe/msaccucmp/cmp_utils/constant/__init__.py +16 -0
  235. msprobe/msaccucmp/cmp_utils/constant/compare_error.py +81 -0
  236. msprobe/msaccucmp/cmp_utils/constant/const_manager.py +530 -0
  237. msprobe/msaccucmp/cmp_utils/file_utils.py +497 -0
  238. msprobe/msaccucmp/cmp_utils/log.py +257 -0
  239. msprobe/msaccucmp/cmp_utils/multi_process/__init__.py +16 -0
  240. msprobe/msaccucmp/cmp_utils/multi_process/multi_convert_process.py +140 -0
  241. msprobe/msaccucmp/cmp_utils/multi_process/progress.py +78 -0
  242. msprobe/msaccucmp/cmp_utils/path_check.py +274 -0
  243. msprobe/msaccucmp/cmp_utils/reg_manager.py +98 -0
  244. msprobe/msaccucmp/cmp_utils/tlv_parse.py +279 -0
  245. msprobe/msaccucmp/cmp_utils/utils.py +356 -0
  246. msprobe/msaccucmp/cmp_utils/utils_type.py +63 -0
  247. msprobe/msaccucmp/compare_vector.py +48 -0
  248. msprobe/msaccucmp/conversion/__init__.py +16 -0
  249. msprobe/msaccucmp/conversion/data_conversion.py +277 -0
  250. msprobe/msaccucmp/conversion/dtype_conversion.py +99 -0
  251. msprobe/msaccucmp/conversion/shape_format_conversion.py +477 -0
  252. msprobe/msaccucmp/conversion/tensor_conversion.py +369 -0
  253. msprobe/msaccucmp/dump_data_conversion.py +46 -0
  254. msprobe/msaccucmp/dump_parse/__init__.py +16 -0
  255. msprobe/msaccucmp/dump_parse/big_dump_data.py +317 -0
  256. msprobe/msaccucmp/dump_parse/dump.py +423 -0
  257. msprobe/msaccucmp/dump_parse/dump_data_object.py +322 -0
  258. msprobe/msaccucmp/dump_parse/dump_data_parser.py +436 -0
  259. msprobe/msaccucmp/dump_parse/dump_utils.py +246 -0
  260. msprobe/msaccucmp/dump_parse/ffts_parser.py +137 -0
  261. msprobe/msaccucmp/dump_parse/mapping.py +62 -0
  262. msprobe/msaccucmp/dump_parse/nano_dump_data.py +392 -0
  263. msprobe/msaccucmp/dump_parse/proto_dump_data.py +308 -0
  264. msprobe/msaccucmp/dump_parser.py +90 -0
  265. msprobe/msaccucmp/format_manager/__init__.py +16 -0
  266. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NCHW.py +53 -0
  267. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_ND.py +52 -0
  268. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NHWC.py +53 -0
  269. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_HWCN.py +47 -0
  270. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_NCHW.py +47 -0
  271. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_FRACTAL_Z.py +89 -0
  272. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NCHW.py +37 -0
  273. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NHWC.py +37 -0
  274. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_HWCN.py +43 -0
  275. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NCHW.py +48 -0
  276. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NHWC.py +43 -0
  277. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_FRACTAL_Z.py +87 -0
  278. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_NHWC.py +37 -0
  279. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_NCDHW.py +48 -0
  280. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_ND.py +44 -0
  281. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_FRACTAL_Z.py +87 -0
  282. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_HWCN.py +37 -0
  283. msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_NCHW.py +37 -0
  284. msprobe/msaccucmp/format_manager/format_manager.py +307 -0
  285. msprobe/msaccucmp/inplace_layer_process.py +186 -0
  286. msprobe/msaccucmp/msaccucmp.py +532 -0
  287. msprobe/msaccucmp/mscmp_advisor.py +128 -0
  288. msprobe/msaccucmp/overflow/__init__.py +16 -0
  289. msprobe/msaccucmp/overflow/overflow_analyse.py +305 -0
  290. msprobe/msaccucmp/overflow/overflow_detection.py +143 -0
  291. msprobe/msaccucmp/pytorch_cmp/__init__.py +16 -0
  292. msprobe/msaccucmp/pytorch_cmp/compare_pytorch.py +389 -0
  293. msprobe/msaccucmp/pytorch_cmp/hdf5_parser.py +377 -0
  294. msprobe/msaccucmp/pytorch_cmp/pytorch_dump_data.py +461 -0
  295. msprobe/msaccucmp/shape_conversion.py +41 -0
  296. msprobe/msaccucmp/vector_cmp/__init__.py +16 -0
  297. msprobe/msaccucmp/vector_cmp/batch_compare.py +197 -0
  298. msprobe/msaccucmp/vector_cmp/compare_detail/__init__.py +16 -0
  299. msprobe/msaccucmp/vector_cmp/compare_detail/compare_detail.py +245 -0
  300. msprobe/msaccucmp/vector_cmp/compare_detail/detail.py +182 -0
  301. msprobe/msaccucmp/vector_cmp/compare_detail/detail_writer.py +580 -0
  302. msprobe/msaccucmp/vector_cmp/fusion_manager/__init__.py +16 -0
  303. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_fusion_op.py +588 -0
  304. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_npu_vs_npu.py +339 -0
  305. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_result.py +326 -0
  306. msprobe/msaccucmp/vector_cmp/fusion_manager/compare_rule.py +156 -0
  307. msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_op.py +204 -0
  308. msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_rule_parser.py +635 -0
  309. msprobe/msaccucmp/vector_cmp/fusion_manager/quant_filter.py +187 -0
  310. msprobe/msaccucmp/vector_cmp/range_manager/__init__.py +16 -0
  311. msprobe/msaccucmp/vector_cmp/range_manager/range_manager.py +100 -0
  312. msprobe/msaccucmp/vector_cmp/range_manager/range_mode.py +94 -0
  313. msprobe/msaccucmp/vector_cmp/range_manager/select_mode.py +86 -0
  314. msprobe/msaccucmp/vector_cmp/vector_comparison.py +535 -0
  315. msprobe/msprobe.py +101 -130
  316. msprobe/overflow_check/__init__.py +15 -0
  317. msprobe/{nan_analyze → overflow_check}/analyzer.py +38 -27
  318. msprobe/{nan_analyze → overflow_check}/graph.py +28 -27
  319. msprobe/{nan_analyze → overflow_check}/utils.py +15 -14
  320. msprobe/pytorch/__init__.py +20 -14
  321. msprobe/pytorch/aclgraph_dump/__init__.py +45 -0
  322. msprobe/pytorch/aclgraph_dump/_meta.py +26 -0
  323. msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut.py → acc_check/acc_check.py} +50 -45
  324. msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut_utils.py → acc_check/acc_check_utils.py} +201 -30
  325. msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/data_generate.py +56 -16
  326. msprobe/pytorch/api_accuracy_checker/{run_ut/multi_run_ut.py → acc_check/multi_acc_check.py} +32 -47
  327. msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/run_overflow_check.py +19 -18
  328. msprobe/pytorch/api_accuracy_checker/common/config.py +22 -20
  329. msprobe/pytorch/api_accuracy_checker/common/utils.py +72 -13
  330. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -11
  331. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +23 -14
  332. msprobe/pytorch/api_accuracy_checker/compare/compare.py +45 -32
  333. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +12 -11
  334. msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +14 -12
  335. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +14 -12
  336. msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +12 -11
  337. msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +12 -11
  338. msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +21 -19
  339. msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +14 -13
  340. msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +12 -11
  341. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +60 -11
  342. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +27 -16
  343. msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +13 -11
  344. msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +39 -18
  345. msprobe/pytorch/bench_functions/__init__.py +12 -11
  346. msprobe/pytorch/bench_functions/apply_adam.py +12 -11
  347. msprobe/pytorch/bench_functions/apply_adam_w.py +12 -11
  348. msprobe/pytorch/bench_functions/confusion_transpose.py +12 -11
  349. msprobe/pytorch/bench_functions/fast_gelu.py +12 -11
  350. msprobe/pytorch/bench_functions/group_norm_silu.py +12 -11
  351. msprobe/pytorch/bench_functions/layer_norm_eval.py +12 -11
  352. msprobe/pytorch/bench_functions/linear.py +12 -11
  353. msprobe/pytorch/bench_functions/matmul_backward.py +12 -11
  354. msprobe/pytorch/bench_functions/mish.py +12 -11
  355. msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +12 -11
  356. msprobe/pytorch/bench_functions/npu_fusion_attention.py +12 -11
  357. msprobe/pytorch/bench_functions/rms_norm.py +12 -11
  358. msprobe/pytorch/bench_functions/rotary_mul.py +12 -11
  359. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +12 -11
  360. msprobe/pytorch/bench_functions/sort_v2.py +12 -11
  361. msprobe/pytorch/bench_functions/swiglu.py +12 -11
  362. msprobe/pytorch/common/__init__.py +12 -11
  363. msprobe/pytorch/common/log.py +12 -11
  364. msprobe/pytorch/common/parse_json.py +12 -11
  365. msprobe/pytorch/common/utils.py +52 -19
  366. msprobe/pytorch/compare/distributed_compare.py +13 -13
  367. msprobe/pytorch/compare/match.py +12 -11
  368. msprobe/pytorch/compare/pt_compare.py +14 -20
  369. msprobe/pytorch/compare/pt_diff_analyze.py +12 -11
  370. msprobe/pytorch/compare/utils.py +12 -11
  371. msprobe/pytorch/{hook_module → dump/api_dump}/api_register.py +18 -16
  372. msprobe/pytorch/{hook_module → dump/api_dump}/hook_module.py +14 -13
  373. msprobe/pytorch/{hook_module → dump/api_dump}/pt_hook_manager.py +68 -23
  374. msprobe/pytorch/{hook_module → dump/api_dump}/register_optimizer_hook.py +13 -11
  375. msprobe/pytorch/{hook_module → dump/api_dump}/script_wrapper.py +17 -14
  376. msprobe/pytorch/{hook_module → dump/api_dump}/utils.py +12 -11
  377. msprobe/pytorch/{debugger → dump/debugger}/debugger_config.py +23 -38
  378. msprobe/pytorch/dump/debugger/precision_debugger.py +130 -0
  379. msprobe/pytorch/{function_factory.py → dump/function_factory.py} +12 -11
  380. msprobe/pytorch/dump/module_dump/hook_wrapper.py +17 -13
  381. msprobe/pytorch/dump/module_dump/module_dump.py +16 -15
  382. msprobe/pytorch/dump/module_dump/{module_processer.py → module_processor.py} +54 -42
  383. msprobe/pytorch/dump/pt_config.py +128 -0
  384. msprobe/pytorch/{pytorch_service.py → dump/pytorch_service.py} +22 -21
  385. msprobe/pytorch/monitor/csv2tb.py +13 -11
  386. msprobe/pytorch/monitor/data_writers.py +13 -11
  387. msprobe/pytorch/monitor/distributed/wrap_distributed.py +13 -11
  388. msprobe/pytorch/monitor/features.py +12 -11
  389. msprobe/pytorch/monitor/module_hook.py +67 -59
  390. msprobe/pytorch/monitor/module_metric.py +13 -11
  391. msprobe/pytorch/monitor/optimizer_collect.py +37 -35
  392. msprobe/pytorch/monitor/utils.py +13 -11
  393. msprobe/pytorch/monitor/visualizer.py +12 -11
  394. msprobe/pytorch/torchair_dump/__init__.py +17 -0
  395. msprobe/pytorch/torchair_dump/torchair_dump.py +114 -0
  396. msprobe/scripts/atb/config_example.json +10 -0
  397. msprobe/scripts/atb/load_atb_probe.sh +101 -0
  398. msprobe/scripts/atb/unload_atb_probe.sh +27 -0
  399. msprobe/scripts/build_msaccucmp.sh +186 -0
  400. msprobe/scripts/conf/help.info +6 -0
  401. msprobe/scripts/conf/version.info +3 -0
  402. msprobe/scripts/run_script/common.sh +538 -0
  403. msprobe/scripts/run_script/main_msaccucmp.sh +232 -0
  404. msprobe/visualization/__init__.py +12 -11
  405. msprobe/visualization/builder/__init__.py +12 -11
  406. msprobe/visualization/builder/graph_builder.py +45 -30
  407. msprobe/visualization/builder/graph_merger.py +53 -32
  408. msprobe/visualization/builder/msprobe_adapter.py +34 -44
  409. msprobe/visualization/compare/__init__.py +12 -11
  410. msprobe/visualization/compare/graph_comparator.py +63 -51
  411. msprobe/visualization/compare/mode_adapter.py +28 -113
  412. msprobe/visualization/db_utils.py +133 -22
  413. msprobe/visualization/graph/__init__.py +12 -11
  414. msprobe/visualization/graph/base_node.py +15 -27
  415. msprobe/visualization/graph/distributed_analyzer.py +97 -40
  416. msprobe/visualization/graph/graph.py +14 -16
  417. msprobe/visualization/graph/node_colors.py +34 -31
  418. msprobe/visualization/graph/node_op.py +12 -11
  419. msprobe/visualization/graph_service.py +580 -205
  420. msprobe/visualization/utils.py +278 -31
  421. tb_graph_ascend/secure_build.py +175 -0
  422. tb_graph_ascend/server/__init__.py +15 -0
  423. tb_graph_ascend/server/app/__init__.py +15 -0
  424. tb_graph_ascend/server/app/model/__init__.py +15 -0
  425. tb_graph_ascend/server/app/model/hierarchy.py +348 -0
  426. tb_graph_ascend/server/app/model/layout_hierarchy_model.py +69 -0
  427. tb_graph_ascend/server/app/model/match_nodes_model.py +573 -0
  428. tb_graph_ascend/server/app/repositories/__init__.py +15 -0
  429. tb_graph_ascend/server/app/repositories/graph_repo_base.py +32 -0
  430. tb_graph_ascend/server/app/repositories/graph_repo_db.py +879 -0
  431. tb_graph_ascend/server/app/repositories/graph_repo_vis.py +83 -0
  432. tb_graph_ascend/server/app/service/__init__.py +18 -0
  433. tb_graph_ascend/server/app/service/graph_service_base.py +158 -0
  434. tb_graph_ascend/server/app/service/graph_service_db.py +438 -0
  435. tb_graph_ascend/server/app/service/graph_service_factory.py +54 -0
  436. tb_graph_ascend/server/app/service/graph_service_vis.py +480 -0
  437. tb_graph_ascend/server/app/utils/__init__.py +15 -0
  438. tb_graph_ascend/server/app/utils/constant.py +80 -0
  439. tb_graph_ascend/server/app/utils/file_check_wrapper.py +46 -0
  440. tb_graph_ascend/server/app/utils/global_state.py +95 -0
  441. tb_graph_ascend/server/app/utils/graph_utils.py +661 -0
  442. tb_graph_ascend/server/app/utils/i18n.py +153 -0
  443. tb_graph_ascend/server/app/utils/request_method.py +46 -0
  444. tb_graph_ascend/server/app/views/__init__.py +15 -0
  445. tb_graph_ascend/server/app/views/graph_views.py +304 -0
  446. tb_graph_ascend/server/plugin.py +108 -0
  447. tb_graph_ascend/server/static/index.html +9250 -0
  448. tb_graph_ascend/server/static/index.js +21 -0
  449. tb_graph_ascend/setup.py +57 -0
  450. mindstudio_probe-8.3.3.dist-info/LICENSE +0 -201
  451. mindstudio_probe-8.3.3.dist-info/RECORD +0 -491
  452. mindstudio_probe-8.3.3.dist-info/entry_points.txt +0 -2
  453. mindstudio_probe-8.3.3.dist-info/top_level.txt +0 -1
  454. msprobe/CMakeLists.txt +0 -5
  455. msprobe/README.md +0 -203
  456. msprobe/core/advisor/advisor.py +0 -129
  457. msprobe/core/advisor/advisor_const.py +0 -58
  458. msprobe/core/advisor/advisor_result.py +0 -58
  459. msprobe/core/compare/find_first/data_processor.py +0 -35
  460. msprobe/core/compare/highlight.py +0 -390
  461. msprobe/core/data_dump/data_collector.py +0 -356
  462. msprobe/core/grad_probe/constant.py +0 -90
  463. msprobe/core/grad_probe/grad_compare.py +0 -187
  464. msprobe/core/grad_probe/utils.py +0 -105
  465. msprobe/core/kernel_dump/kernel_config.py +0 -33
  466. msprobe/docs/01.installation.md +0 -250
  467. msprobe/docs/02.config_introduction.md +0 -221
  468. msprobe/docs/03.config_examples.md +0 -281
  469. msprobe/docs/04.kernel_dump_PyTorch.md +0 -73
  470. msprobe/docs/05.data_dump_PyTorch.md +0 -518
  471. msprobe/docs/06.data_dump_MindSpore.md +0 -618
  472. msprobe/docs/07.accuracy_checker_PyTorch.md +0 -310
  473. msprobe/docs/09.accuracy_checker_MindSpore.md +0 -120
  474. msprobe/docs/10.accuracy_compare_PyTorch.md +0 -637
  475. msprobe/docs/11.accuracy_compare_MindSpore.md +0 -769
  476. msprobe/docs/12.overflow_check_PyTorch.md +0 -82
  477. msprobe/docs/13.overflow_check_MindSpore.md +0 -33
  478. msprobe/docs/14.data_parse_PyTorch.md +0 -282
  479. msprobe/docs/15.free_benchmarking_PyTorch.md +0 -169
  480. msprobe/docs/16.free_benchmarking_MindSpore.md +0 -159
  481. msprobe/docs/17.grad_probe.md +0 -205
  482. msprobe/docs/18.online_dispatch.md +0 -89
  483. msprobe/docs/19.monitor.md +0 -753
  484. msprobe/docs/20.monitor_performance_baseline.md +0 -52
  485. msprobe/docs/21.visualization_PyTorch.md +0 -519
  486. msprobe/docs/22.visualization_MindSpore.md +0 -515
  487. msprobe/docs/23.generate_operator_PyTorch.md +0 -107
  488. msprobe/docs/24.code_mapping_Mindspore.md +0 -29
  489. msprobe/docs/25.tool_function_introduction.md +0 -29
  490. msprobe/docs/26.data_dump_PyTorch_baseline.md +0 -48
  491. msprobe/docs/27.dump_json_instruction.md +0 -795
  492. msprobe/docs/28.debugger_save_instruction.md +0 -288
  493. msprobe/docs/28.kernel_dump_MindSpore.md +0 -69
  494. msprobe/docs/29.data_dump_MSAdapter.md +0 -235
  495. msprobe/docs/30.overflow_check_MSAdapter.md +0 -31
  496. msprobe/docs/31.config_check.md +0 -107
  497. msprobe/docs/32.ckpt_compare.md +0 -69
  498. msprobe/docs/33.generate_operator_MindSpore.md +0 -181
  499. msprobe/docs/34.RL_collect.md +0 -101
  500. msprobe/docs/35.nan_analyze.md +0 -73
  501. msprobe/docs/36.calculation_result_change.md +0 -75
  502. msprobe/docs/FAQ.md +0 -232
  503. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +0 -146
  504. msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +0 -14
  505. msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +0 -33
  506. msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +0 -217
  507. msprobe/docs/img/BLOOM-7B_1.png +0 -0
  508. msprobe/docs/img/BLOOM-7B_2.png +0 -0
  509. msprobe/docs/img/BLOOM-7B_3.png +0 -0
  510. msprobe/docs/img/BLOOM-7B_4.png +0 -0
  511. msprobe/docs/img/GPT-3_1.png +0 -0
  512. msprobe/docs/img/GPT-3_2.png +0 -0
  513. msprobe/docs/img/GPT-3_3.png +0 -0
  514. msprobe/docs/img/GPT-3_4.png +0 -0
  515. msprobe/docs/img/GPT-3_5.png +0 -0
  516. msprobe/docs/img/GPT-3_6.png +0 -0
  517. msprobe/docs/img/GPT-3_7.png +0 -0
  518. msprobe/docs/img/GPT-3_8.png +0 -0
  519. msprobe/docs/img/YOLOV5S_1.png +0 -0
  520. msprobe/docs/img/YOLOV5S_2.png +0 -0
  521. msprobe/docs/img/accuracy_checking_details.png +0 -0
  522. msprobe/docs/img/accuracy_checking_result.png +0 -0
  523. msprobe/docs/img/api_precision_compare_details.png +0 -0
  524. msprobe/docs/img/api_precision_compare_result.png +0 -0
  525. msprobe/docs/img/auto_analyze_log.png +0 -0
  526. msprobe/docs/img/compare_result.png +0 -0
  527. msprobe/docs/img/compare_result_pkl.png +0 -0
  528. msprobe/docs/img/compare_result_pkl_md5.png.png +0 -0
  529. msprobe/docs/img/cpu_info.png +0 -0
  530. msprobe/docs/img/free_benchmark.png +0 -0
  531. msprobe/docs/img/free_benchmark_framework.png +0 -0
  532. msprobe/docs/img/grad_probe_image-1.png +0 -0
  533. msprobe/docs/img/grad_probe_image-2.png +0 -0
  534. msprobe/docs/img/grad_probe_image-3.png +0 -0
  535. msprobe/docs/img/grad_probe_image-4.png +0 -0
  536. msprobe/docs/img/grad_probe_image.png +0 -0
  537. msprobe/docs/img/merge_result.png +0 -0
  538. msprobe/docs/img/module_compare.png +0 -0
  539. msprobe/docs/img/monitor/cpu_info.png +0 -0
  540. msprobe/docs/img/monitor/step_count_per_record.png +0 -0
  541. msprobe/docs/img/ms_dump.png +0 -0
  542. msprobe/docs/img/ms_layer.png +0 -0
  543. msprobe/docs/img/pt_dump.png +0 -0
  544. msprobe/docs/img/save_compare_result_sample.png +0 -0
  545. msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
  546. msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
  547. msprobe/docs/img/visualization/proxy.png +0 -0
  548. msprobe/docs/img/visualization/tensorboard_1.png +0 -0
  549. msprobe/docs/img/visualization/tensorboard_2.png +0 -0
  550. msprobe/docs/img/visualization/vis_browser_1.png +0 -0
  551. msprobe/docs/img/visualization/vis_browser_2.png +0 -0
  552. msprobe/docs/img/visualization/vis_match_info.png +0 -0
  553. msprobe/docs/img/visualization/vis_precision_info.png +0 -0
  554. msprobe/docs/img/visualization/vis_search_info.png +0 -0
  555. msprobe/docs/img/visualization/vis_show_info.png +0 -0
  556. msprobe/docs/img/visualization/vis_showcase.png +0 -0
  557. msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
  558. msprobe/docs/visualization/GPTModel.png +0 -0
  559. msprobe/docs/visualization/ParallelMLP.png +0 -0
  560. msprobe/docs/visualization/layer_mapping_example.md +0 -132
  561. msprobe/docs/visualization/mapping.png +0 -0
  562. msprobe/docs/visualization/mapping1.png +0 -0
  563. msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
  564. msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
  565. msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
  566. msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
  567. msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
  568. msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
  569. msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
  570. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +0 -59
  571. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
  572. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
  573. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +0 -80
  574. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
  575. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
  576. msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +0 -330
  577. msprobe/docs/visualization/module_name.png +0 -0
  578. msprobe/docs/visualization/module_name1.png +0 -0
  579. msprobe/docs/visualization/no_mapping.png +0 -0
  580. msprobe/docs/visualization/no_mapping1.png +0 -0
  581. msprobe/docs/visualization/no_mapping_analyze.png +0 -0
  582. msprobe/docs/visualization/top_layer.png +0 -0
  583. msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +0 -460
  584. msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +0 -2081
  585. msprobe/mindspore/code_mapping/bind.py +0 -283
  586. msprobe/mindspore/code_mapping/cmd_parser.py +0 -40
  587. msprobe/mindspore/code_mapping/graph.py +0 -49
  588. msprobe/mindspore/code_mapping/graph_parser.py +0 -211
  589. msprobe/mindspore/code_mapping/main.py +0 -24
  590. msprobe/mindspore/code_mapping/processor.py +0 -34
  591. msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +0 -111
  592. msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -52
  593. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +0 -257
  594. msprobe/mindspore/free_benchmark/common/config.py +0 -27
  595. msprobe/mindspore/free_benchmark/common/handler_params.py +0 -31
  596. msprobe/mindspore/free_benchmark/common/utils.py +0 -100
  597. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -638
  598. msprobe/mindspore/free_benchmark/handler/base_handler.py +0 -105
  599. msprobe/mindspore/free_benchmark/handler/check_handler.py +0 -55
  600. msprobe/mindspore/free_benchmark/handler/fix_handler.py +0 -51
  601. msprobe/mindspore/free_benchmark/handler/handler_factory.py +0 -36
  602. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +0 -82
  603. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +0 -45
  604. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +0 -78
  605. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +0 -77
  606. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +0 -56
  607. msprobe/mindspore/free_benchmark/perturbation/no_change.py +0 -27
  608. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +0 -46
  609. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +0 -51
  610. msprobe/mindspore/grad_probe/global_context.py +0 -127
  611. msprobe/mindspore/grad_probe/grad_analyzer.py +0 -260
  612. msprobe/mindspore/grad_probe/grad_monitor.py +0 -42
  613. msprobe/mindspore/grad_probe/grad_stat_csv.py +0 -161
  614. msprobe/mindspore/grad_probe/hook.py +0 -115
  615. msprobe/mindspore/grad_probe/utils.py +0 -43
  616. msprobe/mindspore/mindtorch/__init__.py +0 -18
  617. msprobe/mindspore/ms_config.py +0 -153
  618. msprobe/mindspore/task_handler_factory.py +0 -44
  619. msprobe/nan_analyze/__init__.py +0 -14
  620. msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +0 -9
  621. msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +0 -480
  622. msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +0 -567
  623. msprobe/pytorch/debugger/precision_debugger.py +0 -181
  624. msprobe/pytorch/free_benchmark/__init__.py +0 -23
  625. msprobe/pytorch/free_benchmark/common/constant.py +0 -85
  626. msprobe/pytorch/free_benchmark/common/counter.py +0 -87
  627. msprobe/pytorch/free_benchmark/common/enums.py +0 -80
  628. msprobe/pytorch/free_benchmark/common/params.py +0 -152
  629. msprobe/pytorch/free_benchmark/common/utils.py +0 -143
  630. msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -215
  631. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +0 -121
  632. msprobe/pytorch/free_benchmark/main.py +0 -123
  633. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +0 -28
  634. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +0 -56
  635. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +0 -107
  636. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +0 -121
  637. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +0 -89
  638. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +0 -87
  639. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +0 -43
  640. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +0 -60
  641. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +0 -34
  642. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +0 -252
  643. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +0 -54
  644. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +0 -40
  645. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +0 -45
  646. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -181
  647. msprobe/pytorch/grad_probe/__init__.py +0 -0
  648. msprobe/pytorch/grad_probe/grad_monitor.py +0 -108
  649. msprobe/pytorch/grad_probe/grad_stat_csv.py +0 -160
  650. msprobe/pytorch/hook_module/__init__.py +0 -16
  651. msprobe/pytorch/hook_module/wrap_aten.py +0 -111
  652. msprobe/pytorch/online_dispatch/__init__.py +0 -19
  653. msprobe/pytorch/online_dispatch/compare.py +0 -224
  654. msprobe/pytorch/online_dispatch/dispatch.py +0 -332
  655. msprobe/pytorch/online_dispatch/dump_compare.py +0 -179
  656. msprobe/pytorch/online_dispatch/single_compare.py +0 -412
  657. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +0 -58
  658. msprobe/pytorch/online_dispatch/utils.py +0 -158
  659. msprobe/pytorch/parse_tool/__init__.py +0 -0
  660. msprobe/pytorch/parse_tool/cli.py +0 -31
  661. msprobe/pytorch/parse_tool/lib/__init__.py +0 -0
  662. msprobe/pytorch/parse_tool/lib/compare.py +0 -253
  663. msprobe/pytorch/parse_tool/lib/config.py +0 -50
  664. msprobe/pytorch/parse_tool/lib/file_desc.py +0 -45
  665. msprobe/pytorch/parse_tool/lib/interactive_cli.py +0 -97
  666. msprobe/pytorch/parse_tool/lib/parse_exception.py +0 -54
  667. msprobe/pytorch/parse_tool/lib/parse_tool.py +0 -161
  668. msprobe/pytorch/parse_tool/lib/utils.py +0 -299
  669. msprobe/pytorch/parse_tool/lib/visualization.py +0 -85
  670. msprobe/pytorch/pt_config.py +0 -299
  671. /msprobe/core/{grad_probe → dump}/__init__.py +0 -0
  672. /msprobe/{mindspore/code_mapping → core/dump/api_dump}/__init__.py +0 -0
  673. /msprobe/{mindspore/debugger → core/dump/data_dump}/__init__.py +0 -0
  674. /msprobe/{mindspore/exception_dump → core/dump/data_dump/data_processor}/__init__.py +0 -0
  675. /msprobe/{mindspore/free_benchmark → core/dump/debugger}/__init__.py +0 -0
  676. /msprobe/{mindspore/free_benchmark/common → core/dump/kernel_dump}/__init__.py +0 -0
  677. /msprobe/mindspore/{free_benchmark/handler → dump/debugger}/__init__.py +0 -0
  678. /msprobe/mindspore/{grad_probe → dump/dump_processor}/__init__.py +0 -0
  679. /msprobe/mindspore/{overflow_check → dump/exception_dump}/__init__.py +0 -0
  680. /msprobe/mindspore/{mindtorch → dump/mindtorch}/mindtorch_adaptor.py +0 -0
  681. /msprobe/{pytorch/api_accuracy_checker/run_ut → mindspore/dump/overflow_check}/__init__.py +0 -0
  682. /msprobe/{pytorch/debugger → mindspore/monitor}/__init__.py +0 -0
  683. /msprobe/{pytorch/free_benchmark/common → msaccucmp}/__init__.py +0 -0
  684. /msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/.keep +0 -0
  685. /msprobe/pytorch/{free_benchmark/perturbed_layers → api_accuracy_checker/acc_check}/__init__.py +0 -0
  686. /msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/torch_ut_setting.json +0 -0
  687. /msprobe/pytorch/{free_benchmark/perturbed_layers/npu → dump/api_dump}/__init__.py +0 -0
  688. /msprobe/pytorch/{hook_module → dump/api_dump}/support_wrap_ops.yaml +0 -0
  689. /msprobe/pytorch/{free_benchmark/result_handlers → dump/debugger}/__init__.py +0 -0
@@ -0,0 +1,879 @@
1
+ # -------------------------------------------------------------------------
2
+ # This file is part of the MindStudio project.
3
+ # Copyright (c) 2025 Huawei Technologies Co.,Ltd.
4
+ #
5
+ # MindStudio is licensed under Mulan PSL v2.
6
+ # You can use this software according to the terms and conditions of the Mulan PSL v2.
7
+ # You may obtain a copy of Mulan PSL v2 at:
8
+ #
9
+ # http://license.coscl.org.cn/MulanPSL2
10
+ #
11
+ # THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12
+ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13
+ # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14
+ # See the Mulan PSL v2 for more details.
15
+ # -------------------------------------------------------------------------
16
+
17
+ import os
18
+ import json
19
+ import time
20
+ import sqlite3
21
+ from tensorboard.util import tb_logging
22
+
23
+ from .graph_repo_base import GraphRepo
24
+ from ..utils.graph_utils import GraphUtils
25
+ from ..utils.global_state import GraphState
26
+ from ..utils.constant import SINGLE, NPU, BENCH, DataType
27
+
28
+ logger = tb_logging.get_logger()
29
+ DB_TYPE = DataType.DB.value
30
+
31
+
32
+ class GraphRepoDB(GraphRepo):
33
+
34
+ def __init__(self, db_path):
35
+ self.db_path = db_path
36
+ self.repo_type = DB_TYPE
37
+ self._initialize_db_connection()
38
+
39
+ def get_db_connection(self):
40
+ return self.conn
41
+
42
+ # DB: 查询配置表信息
43
+ def query_config_info(self):
44
+ conn = self._initialize_db_connection()
45
+ if not conn:
46
+ return {}
47
+ query = f"SELECT * FROM tb_config"
48
+ try:
49
+ with conn as c:
50
+ cursor = c.execute(query)
51
+ rows = cursor.fetchall()
52
+
53
+ record = dict(rows[0])
54
+ # 构建最终的 data 对象
55
+ config_info = {
56
+ "microSteps": record.get('micro_steps', 1) or 1,
57
+ "tooltips": GraphUtils.safe_json_loads(record.get('tool_tip')),
58
+ "overflowCheck": bool(record.get('overflow_check', 1)),
59
+ "isSingleGraph": not record.get('graph_type') == 'compare',
60
+ "colors": GraphUtils.safe_json_loads(record.get('node_colors')),
61
+ "matchedConfigFiles": [],
62
+ "task": record.get('task', ''),
63
+ "ranks": GraphUtils.safe_json_loads(record.get('rank_list')),
64
+ "steps": GraphUtils.safe_json_loads(record.get('step_list')),
65
+ }
66
+ return config_info
67
+ except Exception as e:
68
+ logger.error(f"Failed to query config info: {e}")
69
+ return {}
70
+ finally:
71
+ conn.close()
72
+
73
+ # DB:查询根节点信息
74
+ def query_root_nodes(self, graph_type, rank, step):
75
+ conn = self._initialize_db_connection()
76
+ if not conn:
77
+ return {}
78
+ graph_type = graph_type if graph_type != SINGLE else NPU
79
+ query = """
80
+ SELECT
81
+ node_name,
82
+ up_node,
83
+ sub_nodes,
84
+ node_type,
85
+ matched_node_link,
86
+ precision_index,
87
+ overflow_level,
88
+ matched_distributed
89
+ FROM
90
+ tb_nodes
91
+ WHERE
92
+ step = ?
93
+ AND rank = ?
94
+ AND data_source = ?
95
+ AND up_node = ''
96
+ """
97
+ try:
98
+ with conn as c:
99
+ cursor = c.execute(query, (step, rank, graph_type))
100
+ rows = cursor.fetchall()
101
+ if len(rows) > 0:
102
+ return self._convert_db_to_object(dict(rows[0]))
103
+ else:
104
+ return {}
105
+ except Exception as e:
106
+ logger.error(f"Failed to query root nodes: {e}")
107
+ return {}
108
+ finally:
109
+ conn.close()
110
+
111
+ # DB:查询当前节点的所有父节点信息
112
+ def query_up_nodes(self, node_name, graph_type, rank, step):
113
+ conn = self._initialize_db_connection()
114
+ if not conn:
115
+ return {}
116
+ graph_type = graph_type if graph_type != SINGLE else NPU
117
+ # 现根据节点名称查询节点信息,根据up_node字段得到父节点名称
118
+ # 再根据父节点名称查询父节点信息
119
+ # 递归查询父节点,直到根节点
120
+ query = """
121
+ WITH RECURSIVE parent_chain AS (
122
+ SELECT child.id, child.node_name, child.up_node, child.data_source, child.rank, child.step, 0 AS level
123
+ FROM
124
+ tb_nodes child
125
+ WHERE
126
+ child.step = ?
127
+ AND child.rank = ?
128
+ AND child.data_source = ?
129
+ AND child.node_name = ?
130
+
131
+ UNION ALL
132
+
133
+ SELECT
134
+ parent.id,
135
+ parent.node_name,
136
+ parent.up_node,
137
+ parent.data_source,
138
+ parent.rank,
139
+ parent.step,
140
+ pc.level + 1
141
+ FROM
142
+ tb_nodes parent
143
+ INNER JOIN parent_chain pc
144
+ ON parent.data_source = pc.data_source
145
+ AND parent.node_name = pc.up_node
146
+ AND parent.rank = pc.rank
147
+ AND parent.step = pc.step
148
+ WHERE
149
+ pc.up_node IS NOT NULL
150
+ AND pc.up_node != ''
151
+ )
152
+ SELECT
153
+ tb_nodes.id,
154
+ tb_nodes.data_source,
155
+ tb_nodes.node_name,
156
+ tb_nodes.up_node,
157
+ tb_nodes.sub_nodes,
158
+ tb_nodes.node_type,
159
+ tb_nodes.matched_node_link,
160
+ tb_nodes.precision_index,
161
+ tb_nodes.overflow_level,
162
+ tb_nodes.matched_distributed
163
+ FROM
164
+ tb_nodes
165
+ WHERE
166
+ id IN (SELECT id FROM parent_chain)
167
+ ORDER BY (
168
+ SELECT
169
+ level
170
+ FROM
171
+ parent_chain pc
172
+ WHERE
173
+ pc.node_name = tb_nodes.node_name)
174
+ ASC
175
+ """
176
+ try:
177
+ with conn as c:
178
+ cursor = c.execute(query, (step, rank, graph_type, node_name))
179
+ rows = cursor.fetchall()
180
+ up_nodes = {}
181
+ for row in rows:
182
+ dict_row = self._convert_db_to_object(dict(row))
183
+ up_nodes[row['node_name']] = dict_row
184
+ return up_nodes
185
+ except Exception as e:
186
+ logger.error(f"Failed to query up nodes: {e}")
187
+ return {}
188
+ finally:
189
+ conn.close()
190
+
191
+ # DB: 查询待匹配节点的信息,构造graph data
192
+ def query_matched_nodes_info(self, npu_node_name, bench_node_name, rank, step):
193
+ conn = self._initialize_db_connection()
194
+ if not conn:
195
+ return {}
196
+ query = """
197
+ SELECT
198
+ id,
199
+ node_name,
200
+ node_type,
201
+ up_node,
202
+ sub_nodes,
203
+ data_source,
204
+ input_data,
205
+ output_data,
206
+ matched_node_link
207
+ FROM tb_nodes
208
+ WHERE step = ? AND rank = ? AND data_source = ? AND node_name = ?
209
+ """
210
+ npu_nodes = {}
211
+ bench_nodes = {}
212
+ opposite_npu_node_name = GraphUtils.get_opposite_node_name(npu_node_name)
213
+ opposite_bench_node_name = GraphUtils.get_opposite_node_name(bench_node_name)
214
+ # 定义查询参数列表:(graph_type, node_name, target_dict_key)
215
+ queries = [
216
+ (NPU, npu_node_name, 'npu'),
217
+ (NPU, opposite_npu_node_name, 'npu_opposite'),
218
+ (BENCH, bench_node_name, 'bench'),
219
+ (BENCH, opposite_bench_node_name, 'bench_opposite'),
220
+ ]
221
+ # 存储结果的字典
222
+ nodes_dict = {}
223
+ try:
224
+ with conn as c:
225
+ for graph_type, node_name, key in queries:
226
+ if not node_name: # 可选:跳过空 node_name
227
+ continue
228
+ cursor = c.execute(query, (step, rank, graph_type, node_name))
229
+ rows = cursor.fetchall()
230
+ if rows:
231
+ node_obj = self._convert_db_to_object(dict(rows[0]))
232
+ nodes_dict[key] = {node_obj.get('node_name'): node_obj}
233
+ else:
234
+ nodes_dict[key] = {}
235
+
236
+ npu_nodes = nodes_dict.get('npu', {}) | nodes_dict.get('npu_opposite', {})
237
+ bench_nodes = nodes_dict.get('bench', {}) | nodes_dict.get('bench_opposite', {})
238
+ result = self._convert_to_graph_json(npu_nodes, bench_nodes)
239
+ return result
240
+ except Exception as e:
241
+ logger.error(f"Failed to query matched nodes info: {e}")
242
+ return self._convert_to_graph_json({}, {})
243
+ finally:
244
+ conn.close()
245
+
246
+ # DB: 查询待匹配节点及其子节点的信息,递归查询当前节点信息和其所有的子节点信息,一直叶子节点
247
+ def query_node_and_sub_nodes(self, npu_node_name, bench_node_name, rank, step):
248
+ conn = self._initialize_db_connection()
249
+ if not conn:
250
+ return {}
251
+ query = """
252
+ WITH RECURSIVE descendants AS (
253
+ -- 初始节点选择
254
+ SELECT
255
+ id,
256
+ node_name,
257
+ node_type,
258
+ up_node,
259
+ sub_nodes,
260
+ data_source,
261
+ input_data,
262
+ output_data,
263
+ matched_node_link,
264
+ node_order,
265
+ step,
266
+ rank
267
+ FROM tb_nodes
268
+ WHERE step = ? AND rank = ? AND data_source = ? AND node_name = ?
269
+
270
+ UNION ALL
271
+
272
+ -- 递归部分
273
+ SELECT
274
+ child.id,
275
+ child.node_name,
276
+ child.node_type,
277
+ child.up_node,
278
+ child.sub_nodes,
279
+ child.data_source,
280
+ child.input_data,
281
+ child.output_data,
282
+ child.matched_node_link,
283
+ child.node_order,
284
+ child.step,
285
+ child.rank
286
+ FROM descendants d
287
+ JOIN json_each(d.sub_nodes) AS je -- 将 sub_nodes JSON 数组展开为多行
288
+ JOIN tb_nodes child
289
+ ON child.node_name = je.value -- 子节点名称匹配
290
+ AND child.step = d.step
291
+ AND child.rank = d.rank
292
+ AND child.data_source = d.data_source
293
+ WHERE
294
+ d.sub_nodes IS NOT NULL -- 父节点的 sub_nodes 不为 NULL
295
+ AND d.sub_nodes != '' -- 不是空
296
+ AND d.sub_nodes != '[]'
297
+ AND json_type(d.sub_nodes) = 'array' -- 确保是合法 JSON 数组
298
+ )
299
+ SELECT * FROM descendants
300
+ """
301
+ npu_nodes = {}
302
+ bench_nodes = {}
303
+ opposite_npu_node_name = GraphUtils.get_opposite_node_name(npu_node_name)
304
+ opposite_bench_node_name = GraphUtils.get_opposite_node_name(bench_node_name)
305
+ # 定义查询参数列表:(graph_type, node_name, target_dict_key)
306
+ queries = [
307
+ (NPU, npu_node_name, 'npu'),
308
+ (NPU, opposite_npu_node_name, 'npu_opposite'),
309
+ (BENCH, bench_node_name, 'bench'),
310
+ (BENCH, opposite_bench_node_name, 'bench_opposite'),
311
+ ]
312
+ # 存储结果的字典
313
+ nodes_dict = {}
314
+ try:
315
+ with conn as c:
316
+ for graph_type, node_name, key in queries:
317
+ if not node_name: # 可选:跳过空 node_name
318
+ continue
319
+ cursor = c.execute(query, (step, rank, graph_type, node_name))
320
+ nodes_dict[key] = self._fetch_and_convert_rows(cursor)
321
+ npu_nodes = nodes_dict.get('npu', {}) | nodes_dict.get('npu_opposite', {})
322
+ bench_nodes = nodes_dict.get('bench', {}) | nodes_dict.get('bench_opposite', {})
323
+ result = self._convert_to_graph_json(npu_nodes, bench_nodes)
324
+ return result
325
+ except Exception as e:
326
+ logger.error(f"Failed to query node and sub nodes: {e}")
327
+ return {'NPU': {}, 'Bench': {}}
328
+ finally:
329
+ conn.close()
330
+
331
+ # DB:查询配置文件中的待匹配节点信息
332
+ def query_matched_nodes_info_by_config(self, match_node_links, rank, step):
333
+ conn = self._initialize_db_connection()
334
+ if not conn:
335
+ return {}
336
+ query = """
337
+ SELECT
338
+ id,
339
+ node_name,
340
+ node_type,
341
+ up_node,
342
+ sub_nodes,
343
+ data_source,
344
+ input_data,
345
+ output_data,
346
+ matched_node_link
347
+ FROM
348
+ tb_nodes
349
+ WHERE
350
+ step = ?
351
+ AND rank = ?
352
+ AND data_source = ?
353
+ AND node_name IN ({})
354
+ """.format(','.join(['?'] * len(match_node_links)))
355
+ try:
356
+ with conn as c:
357
+ npu_node_names = list(match_node_links.keys())
358
+ bench_node_names = list(match_node_links.values())
359
+ npu_cursor = c.execute(query, (step, rank, NPU, *npu_node_names))
360
+ bench_cursor = c.execute(query, (step, rank, BENCH, *bench_node_names))
361
+ npu_nodes = self._fetch_and_convert_rows(npu_cursor)
362
+ bench_nodes = self._fetch_and_convert_rows(bench_cursor)
363
+ result = self._convert_to_graph_json(npu_nodes, bench_nodes)
364
+ return result
365
+ except Exception as e:
366
+ logger.error(f"Failed to query nodes info: {e}")
367
+ return {}
368
+ finally:
369
+ conn.close()
370
+
371
+ # DB: 查询所有以当前为父节点的子节点
372
+ def query_sub_nodes(self, node_name, graph_type, rank, step):
373
+ conn = self._initialize_db_connection()
374
+ if not conn:
375
+ return {}
376
+ graph_type = graph_type if graph_type != SINGLE else NPU
377
+ query = """
378
+ SELECT
379
+ node_name,
380
+ up_node,
381
+ sub_nodes,
382
+ node_type,
383
+ micro_step_id,
384
+ matched_node_link,
385
+ precision_index,
386
+ overflow_level,
387
+ matched_distributed
388
+ FROM
389
+ tb_nodes
390
+ WHERE
391
+ step = ?
392
+ AND rank = ?
393
+ AND data_source = ?
394
+ AND up_node = ?
395
+ ORDER BY
396
+ node_order ASC
397
+ """
398
+ try:
399
+ with conn as c:
400
+ cursor = c.execute(query, (step, rank, graph_type, node_name))
401
+ rows = cursor.fetchall()
402
+ sub_nodes = {}
403
+ for row in rows:
404
+ dict_row = self._convert_db_to_object(dict(row))
405
+ sub_nodes[row['node_name']] = dict_row
406
+ return sub_nodes
407
+ except Exception as e:
408
+ logger.error(f"Failed to query sub nodes: {e}")
409
+ return {}
410
+ finally:
411
+ conn.close()
412
+
413
+ # DB: 查询当前节点信息
414
+ def query_node_info(self, node_name, graph_type, rank, step):
415
+ conn = self._initialize_db_connection()
416
+ if not conn:
417
+ return {}
418
+ graph_type = graph_type if graph_type != SINGLE else NPU
419
+ query = """
420
+ SELECT
421
+ n.*,
422
+ d.stack_info
423
+ FROM
424
+ tb_nodes n
425
+ JOIN tb_stack d ON n.stack_id = d.id
426
+ WHERE
427
+ n.step = ?
428
+ AND n.rank = ?
429
+ AND n.data_source = ?
430
+ AND n.node_name = ?
431
+ """
432
+ try:
433
+ with conn as c:
434
+ cursor = c.execute(query, (step, rank, graph_type, node_name))
435
+ rows = cursor.fetchall()
436
+ if len(rows) > 0:
437
+ return self._convert_db_to_object(dict(rows[0]))
438
+ else:
439
+ return {}
440
+ except Exception as e:
441
+ logger.error(f"Failed to query node info: {e}")
442
+ return {}
443
+ finally:
444
+ conn.close()
445
+
446
+ # DB: 查询单图节点名称列表
447
+ def query_node_name_list(self, rank, step, micro_step):
448
+ conn = self._initialize_db_connection()
449
+ if not conn:
450
+ return []
451
+ query = """
452
+ SELECT
453
+ node_name
454
+ FROM
455
+ tb_nodes
456
+ WHERE
457
+ step = ?
458
+ AND rank = ?
459
+ AND (? = -1 OR micro_step_id = ?)
460
+ AND data_source = 'NPU'
461
+ ORDER BY
462
+ node_order ASC
463
+ """
464
+ try:
465
+ with conn as c:
466
+ cursor = c.execute(query, (step, rank, micro_step, micro_step))
467
+ rows = cursor.fetchall()
468
+ return [row['node_name'] for row in rows]
469
+ except Exception as e:
470
+ logger.error(f"Failed to query node name list: {e}")
471
+ return []
472
+ finally:
473
+ conn.close()
474
+
475
+ # DB: 查询已匹配节点列表,未匹配节点列表,所有的节点列表
476
+ def query_all_node_info_in_one(self, rank, step, micro_step):
477
+ conn = self._initialize_db_connection()
478
+ if not conn:
479
+ return {}
480
+ try:
481
+ # 查找缓存
482
+ all_node_info_cache = GraphState.get_global_value('all_node_info_cache', {})
483
+ cache = f'{rank}_{step}_{micro_step}'
484
+ if all_node_info_cache.get(cache) is not None:
485
+ return all_node_info_cache.get(cache)
486
+ # 查询数据库
487
+ # 单次查询:获取 node_name 和 matched_node_link
488
+ query = """
489
+ SELECT
490
+ node_name,
491
+ data_source,
492
+ matched_node_link
493
+ FROM
494
+ tb_nodes
495
+ WHERE
496
+ step = ?
497
+ AND rank = ?
498
+ AND (? = -1 OR micro_step_id = ?)
499
+ ORDER BY
500
+ node_order ASC
501
+ """
502
+
503
+ with conn as conn:
504
+ cursor = conn.execute(query, (step, rank, micro_step, micro_step))
505
+ rows = cursor.fetchall()
506
+ # 初始化结果
507
+ npu_node_list = []
508
+ bench_node_list = []
509
+ npu_match_node = {} # {node_name: last_matched_link}
510
+ bench_match_node = {}
511
+ npu_unmatch_node = []
512
+ bench_unmatch_node = []
513
+
514
+ # 一次性遍历结果,分类处理
515
+ for row in rows:
516
+ node_name = row['node_name']
517
+ matched_link_str = row['matched_node_link']
518
+ if row['data_source'] == NPU:
519
+ npu_node_list.append(node_name)
520
+ # 解析 matched_node_link
521
+ matched_link = GraphUtils.safe_json_loads(matched_link_str)
522
+ # 判断是否为有效匹配(非空列表)
523
+ if isinstance(matched_link, list) and len(matched_link) > 0:
524
+ npu_match_node[node_name] = matched_link[-1] # 取最后一个匹配项
525
+ else:
526
+ npu_unmatch_node.append(node_name)
527
+ elif row['data_source'] == BENCH:
528
+ bench_node_list.append(node_name)
529
+ # 解析 matched_node_link
530
+ matched_link = GraphUtils.safe_json_loads(matched_link_str)
531
+ # 判断是否为有效匹配(非空列表)
532
+ if isinstance(matched_link, list) and len(matched_link) > 0:
533
+ bench_match_node[node_name] = matched_link[-1] # 取最后一个匹配项
534
+ else:
535
+ bench_unmatch_node.append(node_name)
536
+ else:
537
+ logger.error(f"Invalid data source: {row['data_source']}")
538
+ all_node_info = {
539
+ 'npu_node_list': npu_node_list,
540
+ 'bench_node_list': bench_node_list,
541
+ 'npu_match_node': npu_match_node,
542
+ 'bench_match_node': bench_match_node,
543
+ 'npu_unmatch_node': npu_unmatch_node,
544
+ 'bench_unmatch_node': bench_unmatch_node
545
+ }
546
+ all_node_info_cache = GraphState.get_global_value('all_node_info_cache', {})
547
+ all_node_info_cache[cache] = all_node_info
548
+ return all_node_info
549
+
550
+ except Exception as e:
551
+ logger.error(f"Failed to query all node info: {e}")
552
+ return {
553
+ 'npu_node_list': [],
554
+ 'bench_node_list': [],
555
+ 'npu_match_node': {},
556
+ 'bench_match_node': {},
557
+ 'npu_unmatch_node': [],
558
+ 'bench_unmatch_node': []
559
+ }
560
+ finally:
561
+ conn.close()
562
+
563
+ # # DB:根据step rank modify match_node_link查询已经修改的匹配成功的节点关系
564
+ def query_modify_matched_nodes_list(self, rank, step):
565
+ conn = self._initialize_db_connection()
566
+ if not conn:
567
+ return {}
568
+ query = """
569
+ SELECT
570
+ node_name,
571
+ matched_node_link
572
+ FROM
573
+ tb_nodes
574
+ WHERE
575
+ step = ?
576
+ AND rank = ?
577
+ AND modified = 1
578
+ AND matched_node_link IS NOT NULL
579
+ AND matched_node_link != '[]'
580
+ AND matched_node_link != ''
581
+ """
582
+ try:
583
+ with conn as c:
584
+ cursor = c.execute(query, (step, rank))
585
+ rows = cursor.fetchall()
586
+ result = {}
587
+ for row in rows:
588
+ matched_node_link = GraphUtils.safe_json_loads(row['matched_node_link'])
589
+ node_name = row['node_name']
590
+ if isinstance(matched_node_link, list) and len(matched_node_link) > 0:
591
+ result[node_name] = matched_node_link[-1] # 取最后一个匹配项
592
+ return result
593
+ except Exception as e:
594
+ logger.error(f"Failed to query modify matched nodes list: {e}")
595
+ return {}
596
+ finally:
597
+ conn.close()
598
+
599
+ # DB: 根据精度误差查询节点信息
600
+ def query_node_list_by_precision(self, step, rank, micro_step, values, is_filter_unmatch_nodes):
601
+ conn = self._initialize_db_connection()
602
+ if not conn:
603
+ return []
604
+ # 准备占位符
605
+ conditions = []
606
+ placeholders = []
607
+ params = []
608
+ conditions.append("step = ?")
609
+ conditions.append("rank = ?")
610
+ conditions.append("data_source = 'NPU'")
611
+ conditions.append("(? = -1 OR micro_step_id = ?)")
612
+ conditions.append("(sub_nodes = '' OR sub_nodes IS NULL OR sub_nodes = '[]')")
613
+ for value in values:
614
+ placeholder = "(precision_index BETWEEN ? AND ?)"
615
+ placeholders.append(placeholder)
616
+ params.extend(value)
617
+
618
+ if is_filter_unmatch_nodes:
619
+ placeholders.append("(matched_node_link = '' OR matched_node_link IS NULL OR matched_node_link = '[]')")
620
+
621
+ if len(placeholders) > 0:
622
+ conditions.append(f"({'OR'.join(placeholders)})")
623
+ query = f"""
624
+ SELECT
625
+ node_name
626
+ FROM
627
+ tb_nodes
628
+ WHERE
629
+ {" AND ".join(conditions)}
630
+ ORDER BY
631
+ node_order ASC
632
+ """
633
+ try:
634
+ with conn as c:
635
+ cursor = c.execute(query, (step, rank, micro_step, micro_step, *params))
636
+ rows = cursor.fetchall()
637
+ node_list = [row['node_name'] for row in rows]
638
+ return node_list
639
+ except Exception as e:
640
+ logger.error(f"Failed to query node list by precision: {e}")
641
+ return []
642
+ finally:
643
+ conn.close()
644
+
645
+ # DB: 根据溢出查询节点信息
646
+ def query_node_list_by_overflow(self, step, rank, micro_step, values):
647
+ conn = self._initialize_db_connection()
648
+ if not conn:
649
+ return []
650
+ # 准备占位符
651
+ conditions = []
652
+ conditions.append("step = ?")
653
+ conditions.append("rank = ?")
654
+ conditions.append("data_source = 'NPU'")
655
+ conditions.append("(? = -1 OR micro_step_id = ?)")
656
+ placeholders = ", ".join(["?"] * len(values))
657
+ query = f"""
658
+ SELECT
659
+ node_name
660
+ FROM
661
+ tb_nodes
662
+ WHERE
663
+ step = ?
664
+ AND rank = ?
665
+ AND data_source = 'NPU'
666
+ AND (? = -1 OR micro_step_id = ?)
667
+ AND (sub_nodes = '' OR sub_nodes IS NULL OR sub_nodes = '[]')
668
+ AND overflow_level IN ({placeholders})
669
+ ORDER BY
670
+ node_order ASC
671
+ """
672
+ try:
673
+ with conn as c:
674
+ cursor = c.execute(query, (step, rank, micro_step, micro_step, *values))
675
+ rows = cursor.fetchall()
676
+ node_list = [row['node_name'] for row in rows]
677
+ return node_list
678
+ except Exception as e:
679
+ logger.error(f"Failed to query node list by overflow: {e}")
680
+ return []
681
+ finally:
682
+ conn.close()
683
+
684
+ # DB:查询节点信息
685
+ def query_node_info_by_data_source(self, step, rank, data_source):
686
+ conn = self._initialize_db_connection()
687
+ if not conn:
688
+ return {}
689
+ query = """
690
+ SELECT
691
+ node_name,
692
+ matched_node_link,
693
+ output_data,
694
+ precision_index,
695
+ sub_nodes
696
+ FROM
697
+ tb_nodes
698
+ WHERE
699
+ step = ?
700
+ AND rank = ?
701
+ AND data_source = ?
702
+ """
703
+ try:
704
+ with conn as c:
705
+ cursor = c.execute(query, (step, rank, data_source))
706
+ nodes = self._fetch_and_convert_rows(cursor)
707
+ return nodes
708
+ except Exception as e:
709
+ logger.error(f"Failed to query node info: {e}")
710
+ return {}
711
+ finally:
712
+ conn.close()
713
+
714
+ # DB:更新config的colors
715
+ def update_config_colors(self, colors):
716
+ conn = self._initialize_db_connection()
717
+ if not conn:
718
+ return False
719
+ query = """
720
+ UPDATE
721
+ tb_config
722
+ SET
723
+ node_colors = ?
724
+ WHERE
725
+ id=1
726
+ """
727
+ try:
728
+ with conn as c:
729
+ c.execute(query, (json.dumps(colors),))
730
+ return True
731
+ except Exception as e:
732
+ logger.error(f"Failed to update config colors: {e}")
733
+ return False
734
+ finally:
735
+ conn.close()
736
+
737
+ # DB:批量更新节点信息
738
+ def update_nodes_info(self, nodes_info, rank, step):
739
+ conn = self._initialize_db_connection()
740
+ if not conn:
741
+ return False
742
+ # 取消匹配和匹配都要走这个逻辑
743
+ try:
744
+ data = [
745
+ (
746
+ json.dumps(node['matched_node_link']),
747
+ json.dumps(node['input_data']),
748
+ json.dumps(node['output_data']),
749
+ node['precision_index'],
750
+ step,
751
+ rank,
752
+ node['graph_type'],
753
+ node['node_name'] # WHERE 条件
754
+ )
755
+ for node in nodes_info
756
+ ]
757
+ query = """
758
+ UPDATE tb_nodes
759
+ SET
760
+ matched_node_link = ?,
761
+ input_data = ?,
762
+ output_data = ?,
763
+ precision_index = ?,
764
+ modified= 1
765
+ WHERE
766
+ step = ?
767
+ AND rank = ?
768
+ AND data_source = ?
769
+ AND node_name = ?
770
+ """
771
+ with conn as c:
772
+ c.executemany(query, data)
773
+ return True
774
+ except Exception as e:
775
+ logger.error(f"Failed to update nodes info: {e}")
776
+ return False
777
+
778
+ def update_nodes_precision_error(self, update_data):
779
+ conn = self._initialize_db_connection()
780
+ if not conn:
781
+ return False
782
+ query = """
783
+ UPDATE
784
+ tb_nodes
785
+ SET
786
+ precision_index = ?
787
+ WHERE
788
+ step = ?
789
+ AND rank = ?
790
+ AND data_source = 'NPU'
791
+ AND node_name = ?
792
+ """
793
+ try:
794
+ conn.executemany(query, update_data)
795
+ conn.commit()
796
+ return True
797
+ except Exception as e:
798
+ logger.error(f"Failed to update precision error: {e}")
799
+ return False
800
+ finally:
801
+ conn.close()
802
+
803
+ def _initialize_db_connection(self):
804
+ try:
805
+ # 目录安全校验
806
+ dir_path = str(os.path.dirname(self.db_path))
807
+ success, error = GraphUtils.safe_check_load_file_path(dir_path, True)
808
+ if not success:
809
+ raise PermissionError(error)
810
+ # 文件安全校验
811
+ success, error = GraphUtils.safe_check_load_file_path(self.db_path)
812
+ if not success:
813
+ raise PermissionError(error)
814
+ conn = sqlite3.connect(self.db_path, check_same_thread=False)
815
+ conn.row_factory = sqlite3.Row
816
+ # 提升性能的 PRAGMA 设置
817
+ conn.execute("PRAGMA journal_mode = WAL;")
818
+ conn.execute("PRAGMA synchronous = NORMAL;") # 或 OFF(不安全)
819
+ conn.execute("PRAGMA cache_size = 40000;")
820
+ conn.execute("PRAGMA wal_autocheckpoint = 0;")
821
+ self.is_db_connected = conn is not None
822
+ self.conn = conn
823
+ return conn
824
+
825
+ except Exception as e:
826
+ logger.error(f"Failed to connect to database: {e}")
827
+ if 'conn' in locals() and conn:
828
+ try:
829
+ conn.close()
830
+ except Exception as close_error:
831
+ logger.error(f"Failed to close database connection: {close_error}")
832
+ self.conn = None
833
+ return None
834
+
835
+ def _fetch_and_convert_rows(self, cursor):
836
+ """
837
+ Helper function to fetch rows from cursor and convert them.
838
+ :param cursor: SQLite cursor object
839
+ :return: Dictionary of nodes keyed by node_name
840
+ """
841
+ nodes = {}
842
+ for row in cursor.fetchall():
843
+ dict_row = self._convert_db_to_object(dict(row))
844
+ nodes[row['node_name']] = dict_row
845
+ return nodes
846
+
847
+ def _convert_to_graph_json(self, npu_nodes, bench_nodes):
848
+ graph_data = {
849
+ "NPU": {
850
+ "node": npu_nodes,
851
+ },
852
+ "Bench": {
853
+ "node": bench_nodes,
854
+ }
855
+ }
856
+ return graph_data
857
+
858
+ def _convert_db_to_object(self, data):
859
+ object_res = {
860
+ "id": data.get('node_name'),
861
+ "node_name": data.get('node_name'),
862
+ "node_type": int(data.get('node_type')) if data.get('node_type') is not None else 0,
863
+ "output_data": GraphUtils.safe_json_loads(data.get('output_data') or "{}"),
864
+ "input_data": GraphUtils.safe_json_loads(data.get('input_data') or "{}"),
865
+ "upnode": data.get('up_node'),
866
+ "subnodes": GraphUtils.safe_json_loads(data.get('sub_nodes') or "[]"),
867
+ "matched_node_link": GraphUtils.safe_json_loads(data.get('matched_node_link') or "[]"),
868
+ "stack_info": GraphUtils.safe_json_loads(data.get('stack_info') or "[]"),
869
+ "micro_step_id": int(data.get('micro_step_id')) if data.get('micro_step_id') is not None else -1,
870
+ "data": {
871
+ "precision_index": data.get('precision_index'),
872
+ 'overflow_level': data.get('overflow_level'),
873
+ },
874
+ "parallel_merge_info": GraphUtils.safe_json_loads(data.get('parallel_merge_info') or "[]"),
875
+ "matched_distributed": GraphUtils.safe_json_loads(data.get('matched_distributed') or "[]"),
876
+ "modified": int(data.get('modified')) if data.get('modified') is not None else 0,
877
+ }
878
+ return object_res
879
+