PyPI - mindstudio-probe - Versions diffs - 8.3.3__py3-none-any.whl → 26.0.0a1__py3-none-any.whl - Mend

mindstudio-probe 8.3.3py3-none-any.whl → 26.0.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (689) hide show

{mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/METADATA +26 -14
mindstudio_probe-26.0.0a1.dist-info/RECORD +498 -0
{mindstudio_probe-8.3.3.dist-info → mindstudio_probe-26.0.0a1.dist-info}/WHEEL +1 -1
mindstudio_probe-26.0.0a1.dist-info/entry_points.txt +5 -0
mindstudio_probe-26.0.0a1.dist-info/licenses/LICENSE +124 -0
mindstudio_probe-26.0.0a1.dist-info/top_level.txt +2 -0
msprobe/__init__.py +12 -13
msprobe/config.json +9 -31
msprobe/core/__init__.py +12 -11
msprobe/core/acc_check/acc_check_cli.py +145 -0
msprobe/core/common/const.py +97 -38
msprobe/core/common/db_manager.py +133 -12
msprobe/core/common/decorator.py +12 -11
msprobe/core/common/exceptions.py +12 -11
msprobe/core/common/file_utils.py +101 -25
msprobe/core/common/framework_adapter.py +36 -25
msprobe/core/common/global_lock.py +12 -11
msprobe/core/common/inplace_op_checker.py +12 -11
msprobe/core/common/log.py +22 -11
msprobe/core/common/megatron_utils.py +566 -11
msprobe/core/common/parallel_state.py +12 -11
msprobe/core/common/runtime.py +12 -11
msprobe/core/common/utils.py +41 -41
msprobe/core/compare/acc_compare.py +361 -104
msprobe/core/compare/atb_data_compare.py +422 -0
msprobe/core/compare/auto_compare.py +134 -0
msprobe/core/compare/check.py +14 -17
msprobe/core/compare/compare_cli.py +72 -149
msprobe/core/compare/config.py +12 -13
msprobe/core/compare/diff_analyze/first_diff_analyze.py +28 -15
msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
msprobe/core/compare/find_first/analyzer.py +18 -18
msprobe/core/compare/find_first/graph.py +12 -11
msprobe/core/compare/find_first/utils.py +13 -12
msprobe/core/compare/indicator_analysis/__init__.py +15 -0
msprobe/core/compare/indicator_analysis/algorithm.py +363 -0
msprobe/core/compare/indicator_analysis/api_data.py +141 -0
msprobe/core/compare/indicator_analysis/calculator.py +181 -0
msprobe/core/compare/indicator_analysis/utils.py +116 -0
msprobe/core/compare/layer_mapping/__init__.py +12 -11
msprobe/core/compare/layer_mapping/data_scope_parser.py +20 -11
msprobe/core/compare/layer_mapping/layer_mapping.py +14 -13
msprobe/core/compare/layer_mapping/postprocess_pass.py +13 -11
msprobe/core/compare/merge_result/merge_result.py +12 -11
msprobe/core/compare/merge_result/merge_result_cli.py +12 -11
msprobe/core/compare/merge_result/utils.py +12 -11
msprobe/core/compare/multiprocessing_compute.py +13 -14
msprobe/core/compare/npy_compare.py +13 -11
msprobe/core/compare/offline_data_compare.py +160 -0
msprobe/core/compare/stats_diff_calc.py +39 -0
msprobe/core/compare/torchair_acc_cmp.py +764 -0
msprobe/core/compare/torchair_cmp_utils.py +338 -0
msprobe/core/compare/utils.py +140 -49
msprobe/core/config_check/__init__.py +12 -11
msprobe/core/config_check/checkers/__init__.py +12 -11
msprobe/core/config_check/checkers/base_checker.py +15 -14
msprobe/core/config_check/checkers/dataset_checker.py +13 -12
msprobe/core/config_check/checkers/env_args_checker.py +13 -12
msprobe/core/config_check/checkers/hyperparameter_checker.py +16 -15
msprobe/core/config_check/checkers/pip_checker.py +15 -15
msprobe/core/config_check/checkers/random_checker.py +13 -12
msprobe/core/config_check/checkers/weights_checker.py +14 -12
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +13 -17
msprobe/core/config_check/ckpt_compare/megatron_loader.py +13 -12
msprobe/core/config_check/ckpt_compare/metrics.py +12 -11
msprobe/core/config_check/config_check_cli.py +18 -17
msprobe/core/config_check/config_checker.py +16 -14
msprobe/core/config_check/resource/dependency.yaml +15 -12
msprobe/core/config_check/resource/env.yaml +12 -11
msprobe/core/config_check/utils/hyperparameter_parser.py +12 -11
msprobe/core/config_check/utils/utils.py +12 -11
msprobe/core/{data_dump → dump/api_dump}/api_registry.py +12 -11
msprobe/core/{common_config.py → dump/common_config.py} +13 -24
msprobe/core/dump/data_dump/data_collector.py +257 -0
msprobe/core/{data_dump → dump/data_dump}/data_processor/base.py +45 -36
msprobe/core/{data_dump → dump/data_dump}/data_processor/factory.py +33 -25
msprobe/core/{data_dump → dump/data_dump}/data_processor/mindspore_processor.py +37 -113
msprobe/core/{data_dump → dump/data_dump}/data_processor/pytorch_processor.py +364 -131
msprobe/core/{data_dump → dump/data_dump}/json_writer.py +24 -31
msprobe/core/{data_dump → dump/data_dump}/scope.py +12 -13
msprobe/core/{debugger → dump/debugger}/precision_debugger.py +15 -23
msprobe/core/dump/dump2db/db_utils.py +215 -0
msprobe/core/dump/dump2db/dump2db.py +409 -0
msprobe/core/{hook_manager.py → dump/hook_manager.py} +38 -87
msprobe/core/dump/kernel_dump/kernel_config.py +34 -0
msprobe/core/{service.py → dump/service.py} +43 -27
msprobe/core/install_deps/install_deps.py +51 -0
msprobe/core/monitor/anomaly_processor.py +13 -11
msprobe/core/monitor/csv2db.py +73 -93
msprobe/core/monitor/db_utils.py +140 -205
msprobe/core/monitor/utils.py +18 -17
msprobe/core/monitor_v2/__init__.py +20 -0
msprobe/core/monitor_v2/base.py +83 -0
msprobe/core/monitor_v2/cc.py +287 -0
msprobe/core/monitor_v2/factory.py +81 -0
msprobe/core/monitor_v2/module.py +201 -0
msprobe/core/monitor_v2/optimizer.py +245 -0
msprobe/core/monitor_v2/param.py +154 -0
msprobe/core/monitor_v2/trainer.py +326 -0
msprobe/core/monitor_v2/utils.py +122 -0
msprobe/core/monitor_v2/weight_grad.py +419 -0
msprobe/core/monitor_v2/writer.py +162 -0
msprobe/core/overflow_check/abnormal_scene.py +12 -11
msprobe/core/overflow_check/api_info.py +12 -11
msprobe/core/overflow_check/checker.py +12 -11
msprobe/core/overflow_check/filter.py +13 -11
msprobe/core/overflow_check/level.py +12 -11
msprobe/core/overflow_check/utils.py +12 -11
msprobe/core/single_save/single_comparator.py +12 -11
msprobe/core/single_save/single_saver.py +12 -11
msprobe/infer/__init__.py +16 -0
msprobe/infer/offline/__init__.py +16 -0
msprobe/infer/offline/compare/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/adapter_cli/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/adapter_cli/args_adapter.py +46 -0
msprobe/infer/offline/compare/msquickcmp/atc/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/atc/atc_utils.py +98 -0
msprobe/infer/offline/compare/msquickcmp/cmp_process.py +328 -0
msprobe/infer/offline/compare/msquickcmp/common/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/common/args_check.py +112 -0
msprobe/infer/offline/compare/msquickcmp/common/convert.py +74 -0
msprobe/infer/offline/compare/msquickcmp/common/dump_data.py +121 -0
msprobe/infer/offline/compare/msquickcmp/common/dynamic_argument_bean.py +39 -0
msprobe/infer/offline/compare/msquickcmp/common/utils.py +669 -0
msprobe/infer/offline/compare/msquickcmp/config.ini +6 -0
msprobe/infer/offline/compare/msquickcmp/dump/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/dump/args_adapter.py +50 -0
msprobe/infer/offline/compare/msquickcmp/dump/dump_process.py +91 -0
msprobe/infer/offline/compare/msquickcmp/install_aclruntime_aisbench.sh +180 -0
msprobe/infer/offline/compare/msquickcmp/main.py +199 -0
msprobe/infer/offline/compare/msquickcmp/net_compare/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/net_compare/net_compare.py +277 -0
msprobe/infer/offline/compare/msquickcmp/npu/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/npu/npu_dump_data.py +558 -0
msprobe/infer/offline/compare/msquickcmp/npu/om_parser.py +416 -0
msprobe/infer/offline/compare/msquickcmp/onnx_model/__init__.py +16 -0
msprobe/infer/offline/compare/msquickcmp/onnx_model/onnx_dump_data.py +374 -0
msprobe/infer/utils/__init__.py +15 -0
msprobe/infer/utils/acc_cmp.py +94 -0
msprobe/infer/utils/check/__init__.py +37 -0
msprobe/infer/utils/check/args_checker.py +35 -0
msprobe/infer/utils/check/checker.py +227 -0
msprobe/infer/utils/check/dict_checker.py +78 -0
msprobe/infer/utils/check/func_wrapper.py +96 -0
msprobe/infer/utils/check/list_checker.py +56 -0
msprobe/infer/utils/check/number_checker.py +64 -0
msprobe/infer/utils/check/obj_checker.py +41 -0
msprobe/infer/utils/check/path_checker.py +249 -0
msprobe/infer/utils/check/rule.py +126 -0
msprobe/infer/utils/check/string_checker.py +66 -0
msprobe/infer/utils/cmp_algorithm.py +261 -0
msprobe/infer/utils/constants.py +112 -0
msprobe/infer/utils/file_open_check.py +337 -0
msprobe/infer/utils/util.py +177 -0
msprobe/mindspore/__init__.py +14 -13
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +14 -13
msprobe/mindspore/api_accuracy_checker/api_info.py +12 -11
msprobe/mindspore/api_accuracy_checker/api_runner.py +12 -11
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +12 -11
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +12 -11
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +12 -11
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +12 -11
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +15 -14
msprobe/mindspore/api_accuracy_checker/compute_element.py +12 -11
msprobe/mindspore/api_accuracy_checker/data_manager.py +13 -11
msprobe/mindspore/api_accuracy_checker/main.py +12 -11
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +14 -12
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +13 -11
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +12 -11
msprobe/mindspore/api_accuracy_checker/type_mapping.py +12 -11
msprobe/mindspore/api_accuracy_checker/utils.py +12 -11
msprobe/mindspore/common/const.py +15 -74
msprobe/mindspore/common/log.py +12 -11
msprobe/mindspore/common/utils.py +30 -15
msprobe/mindspore/compare/common_dir_compare.py +21 -23
msprobe/mindspore/compare/distributed_compare.py +18 -16
msprobe/mindspore/compare/ms_compare.py +14 -14
msprobe/mindspore/compare/ms_graph_compare.py +26 -20
msprobe/mindspore/compare/utils.py +14 -12
msprobe/mindspore/{cell_processor.py → dump/cell_processor.py} +15 -14
msprobe/mindspore/{debugger → dump/debugger}/debugger_config.py +12 -30
msprobe/mindspore/{debugger → dump/debugger}/precision_debugger.py +43 -45
msprobe/mindspore/dump/{cell_dump_process.py → dump_processor/cell_dump_process.py} +31 -17
msprobe/mindspore/dump/{cell_dump_with_insert_gradient.py → dump_processor/cell_dump_with_insert_gradient.py} +18 -14
msprobe/mindspore/dump/{dump_tool_factory.py → dump_processor/dump_tool_factory.py} +16 -15
msprobe/mindspore/dump/{graph_mode_cell_dump.py → dump_processor/graph_mode_cell_dump.py} +16 -15
msprobe/mindspore/dump/{graph_tensor_dump.py → dump_processor/graph_tensor_dump.py} +134 -133
msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/api_register.py +15 -14
msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/hook_cell.py +12 -11
msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/ms_hook_manager.py +47 -20
msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/primitive_hooks.py +14 -13
msprobe/mindspore/dump/{hook_cell → dump_processor/hook_cell}/support_wrap_ops.yaml +13 -11
msprobe/mindspore/dump/{jit_dump.py → dump_processor/jit_dump.py} +14 -13
msprobe/mindspore/dump/{kernel_graph_dump.py → dump_processor/kernel_graph_dump.py} +13 -12
msprobe/mindspore/dump/{kernel_kbyk_dump.py → dump_processor/kernel_kbyk_dump.py} +13 -12
msprobe/mindspore/{exception_dump → dump/exception_dump}/exception_dump_tool_factory.py +14 -13
msprobe/mindspore/{exception_dump → dump/exception_dump}/kernel_graph_exception_dump.py +13 -12
msprobe/mindspore/{mindspore_service.py → dump/mindspore_service.py} +18 -17
msprobe/mindspore/dump/mindtorch/__init__.py +19 -0
msprobe/mindspore/dump/ms_config.py +105 -0
msprobe/mindspore/{overflow_check → dump/overflow_check}/kernel_graph_overflow_check.py +13 -12
msprobe/mindspore/{overflow_check → dump/overflow_check}/overflow_check_tool_factory.py +14 -13
msprobe/mindspore/dump/task_handler_factory.py +43 -0
msprobe/mindspore/monitor/common_func.py +12 -11
msprobe/mindspore/monitor/data_writers.py +12 -11
msprobe/mindspore/monitor/distributed/wrap_distributed.py +93 -39
msprobe/mindspore/monitor/features.py +12 -11
msprobe/mindspore/monitor/module_hook.py +19 -22
msprobe/mindspore/monitor/optimizer_collect.py +29 -25
msprobe/mindspore/monitor/utils.py +13 -11
msprobe/msaccucmp/advisor/__init__.py +16 -0
msprobe/msaccucmp/advisor/advisor_const.py +65 -0
msprobe/msaccucmp/advisor/advisor_result.py +73 -0
msprobe/msaccucmp/advisor/compare_advisor.py +99 -0
msprobe/msaccucmp/advisor/input_advisor.py +66 -0
msprobe/msaccucmp/advisor/node_advisor.py +68 -0
msprobe/msaccucmp/advisor/overflow_advisor.py +58 -0
msprobe/msaccucmp/algorithm_manager/__init__.py +16 -0
msprobe/msaccucmp/algorithm_manager/algorithm_manager.py +464 -0
msprobe/msaccucmp/algorithm_manager/algorithm_parameter.py +42 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_AccumulatedRelativeError.py +46 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_CosineSimilarity.py +58 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_KullbackLeiblerDivergence.py +84 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxAbsoluteError.py +41 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MaxRelativeError.py +46 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanAbsoluteError.py +41 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_MeanRelativeError.py +46 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RelativeEuclideanDistance.py +46 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_RootMeanSquareError.py +40 -0
msprobe/msaccucmp/algorithm_manager/builtin_algorithm/alg_StandardDeviation.py +47 -0
msprobe/msaccucmp/cmp_utils/__init__.py +16 -0
msprobe/msaccucmp/cmp_utils/common.py +113 -0
msprobe/msaccucmp/cmp_utils/constant/__init__.py +16 -0
msprobe/msaccucmp/cmp_utils/constant/compare_error.py +81 -0
msprobe/msaccucmp/cmp_utils/constant/const_manager.py +530 -0
msprobe/msaccucmp/cmp_utils/file_utils.py +497 -0
msprobe/msaccucmp/cmp_utils/log.py +257 -0
msprobe/msaccucmp/cmp_utils/multi_process/__init__.py +16 -0
msprobe/msaccucmp/cmp_utils/multi_process/multi_convert_process.py +140 -0
msprobe/msaccucmp/cmp_utils/multi_process/progress.py +78 -0
msprobe/msaccucmp/cmp_utils/path_check.py +274 -0
msprobe/msaccucmp/cmp_utils/reg_manager.py +98 -0
msprobe/msaccucmp/cmp_utils/tlv_parse.py +279 -0
msprobe/msaccucmp/cmp_utils/utils.py +356 -0
msprobe/msaccucmp/cmp_utils/utils_type.py +63 -0
msprobe/msaccucmp/compare_vector.py +48 -0
msprobe/msaccucmp/conversion/__init__.py +16 -0
msprobe/msaccucmp/conversion/data_conversion.py +277 -0
msprobe/msaccucmp/conversion/dtype_conversion.py +99 -0
msprobe/msaccucmp/conversion/shape_format_conversion.py +477 -0
msprobe/msaccucmp/conversion/tensor_conversion.py +369 -0
msprobe/msaccucmp/dump_data_conversion.py +46 -0
msprobe/msaccucmp/dump_parse/__init__.py +16 -0
msprobe/msaccucmp/dump_parse/big_dump_data.py +317 -0
msprobe/msaccucmp/dump_parse/dump.py +423 -0
msprobe/msaccucmp/dump_parse/dump_data_object.py +322 -0
msprobe/msaccucmp/dump_parse/dump_data_parser.py +436 -0
msprobe/msaccucmp/dump_parse/dump_utils.py +246 -0
msprobe/msaccucmp/dump_parse/ffts_parser.py +137 -0
msprobe/msaccucmp/dump_parse/mapping.py +62 -0
msprobe/msaccucmp/dump_parse/nano_dump_data.py +392 -0
msprobe/msaccucmp/dump_parse/proto_dump_data.py +308 -0
msprobe/msaccucmp/dump_parser.py +90 -0
msprobe/msaccucmp/format_manager/__init__.py +16 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NCHW.py +53 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_ND.py +52 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_NZ_to_NHWC.py +53 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_HWCN.py +47 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_FRACTAL_Z_to_NCHW.py +47 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_FRACTAL_Z.py +89 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NCHW.py +37 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_HWCN_to_NHWC.py +37 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_HWCN.py +43 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NCHW.py +48 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NC1HWC0_to_NHWC.py +43 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_FRACTAL_Z.py +87 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NCHW_to_NHWC.py +37 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_NCDHW.py +48 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NDC1HWC0_to_ND.py +44 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_FRACTAL_Z.py +87 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_HWCN.py +37 -0
msprobe/msaccucmp/format_manager/builtin_format_convert/convert_NHWC_to_NCHW.py +37 -0
msprobe/msaccucmp/format_manager/format_manager.py +307 -0
msprobe/msaccucmp/inplace_layer_process.py +186 -0
msprobe/msaccucmp/msaccucmp.py +532 -0
msprobe/msaccucmp/mscmp_advisor.py +128 -0
msprobe/msaccucmp/overflow/__init__.py +16 -0
msprobe/msaccucmp/overflow/overflow_analyse.py +305 -0
msprobe/msaccucmp/overflow/overflow_detection.py +143 -0
msprobe/msaccucmp/pytorch_cmp/__init__.py +16 -0
msprobe/msaccucmp/pytorch_cmp/compare_pytorch.py +389 -0
msprobe/msaccucmp/pytorch_cmp/hdf5_parser.py +377 -0
msprobe/msaccucmp/pytorch_cmp/pytorch_dump_data.py +461 -0
msprobe/msaccucmp/shape_conversion.py +41 -0
msprobe/msaccucmp/vector_cmp/__init__.py +16 -0
msprobe/msaccucmp/vector_cmp/batch_compare.py +197 -0
msprobe/msaccucmp/vector_cmp/compare_detail/__init__.py +16 -0
msprobe/msaccucmp/vector_cmp/compare_detail/compare_detail.py +245 -0
msprobe/msaccucmp/vector_cmp/compare_detail/detail.py +182 -0
msprobe/msaccucmp/vector_cmp/compare_detail/detail_writer.py +580 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/__init__.py +16 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/compare_fusion_op.py +588 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/compare_npu_vs_npu.py +339 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/compare_result.py +326 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/compare_rule.py +156 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_op.py +204 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/fusion_rule_parser.py +635 -0
msprobe/msaccucmp/vector_cmp/fusion_manager/quant_filter.py +187 -0
msprobe/msaccucmp/vector_cmp/range_manager/__init__.py +16 -0
msprobe/msaccucmp/vector_cmp/range_manager/range_manager.py +100 -0
msprobe/msaccucmp/vector_cmp/range_manager/range_mode.py +94 -0
msprobe/msaccucmp/vector_cmp/range_manager/select_mode.py +86 -0
msprobe/msaccucmp/vector_cmp/vector_comparison.py +535 -0
msprobe/msprobe.py +101 -130
msprobe/overflow_check/__init__.py +15 -0
msprobe/{nan_analyze → overflow_check}/analyzer.py +38 -27
msprobe/{nan_analyze → overflow_check}/graph.py +28 -27
msprobe/{nan_analyze → overflow_check}/utils.py +15 -14
msprobe/pytorch/__init__.py +20 -14
msprobe/pytorch/aclgraph_dump/__init__.py +45 -0
msprobe/pytorch/aclgraph_dump/_meta.py +26 -0
msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut.py → acc_check/acc_check.py} +50 -45
msprobe/pytorch/api_accuracy_checker/{run_ut/run_ut_utils.py → acc_check/acc_check_utils.py} +201 -30
msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/data_generate.py +56 -16
msprobe/pytorch/api_accuracy_checker/{run_ut/multi_run_ut.py → acc_check/multi_acc_check.py} +32 -47
msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/run_overflow_check.py +19 -18
msprobe/pytorch/api_accuracy_checker/common/config.py +22 -20
msprobe/pytorch/api_accuracy_checker/common/utils.py +72 -13
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -11
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +23 -14
msprobe/pytorch/api_accuracy_checker/compare/compare.py +45 -32
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +12 -11
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +14 -12
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +14 -12
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +12 -11
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +12 -11
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +21 -19
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +14 -13
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +12 -11
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +60 -11
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +27 -16
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +13 -11
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +39 -18
msprobe/pytorch/bench_functions/__init__.py +12 -11
msprobe/pytorch/bench_functions/apply_adam.py +12 -11
msprobe/pytorch/bench_functions/apply_adam_w.py +12 -11
msprobe/pytorch/bench_functions/confusion_transpose.py +12 -11
msprobe/pytorch/bench_functions/fast_gelu.py +12 -11
msprobe/pytorch/bench_functions/group_norm_silu.py +12 -11
msprobe/pytorch/bench_functions/layer_norm_eval.py +12 -11
msprobe/pytorch/bench_functions/linear.py +12 -11
msprobe/pytorch/bench_functions/matmul_backward.py +12 -11
msprobe/pytorch/bench_functions/mish.py +12 -11
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +12 -11
msprobe/pytorch/bench_functions/npu_fusion_attention.py +12 -11
msprobe/pytorch/bench_functions/rms_norm.py +12 -11
msprobe/pytorch/bench_functions/rotary_mul.py +12 -11
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +12 -11
msprobe/pytorch/bench_functions/sort_v2.py +12 -11
msprobe/pytorch/bench_functions/swiglu.py +12 -11
msprobe/pytorch/common/__init__.py +12 -11
msprobe/pytorch/common/log.py +12 -11
msprobe/pytorch/common/parse_json.py +12 -11
msprobe/pytorch/common/utils.py +52 -19
msprobe/pytorch/compare/distributed_compare.py +13 -13
msprobe/pytorch/compare/match.py +12 -11
msprobe/pytorch/compare/pt_compare.py +14 -20
msprobe/pytorch/compare/pt_diff_analyze.py +12 -11
msprobe/pytorch/compare/utils.py +12 -11
msprobe/pytorch/{hook_module → dump/api_dump}/api_register.py +18 -16
msprobe/pytorch/{hook_module → dump/api_dump}/hook_module.py +14 -13
msprobe/pytorch/{hook_module → dump/api_dump}/pt_hook_manager.py +68 -23
msprobe/pytorch/{hook_module → dump/api_dump}/register_optimizer_hook.py +13 -11
msprobe/pytorch/{hook_module → dump/api_dump}/script_wrapper.py +17 -14
msprobe/pytorch/{hook_module → dump/api_dump}/utils.py +12 -11
msprobe/pytorch/{debugger → dump/debugger}/debugger_config.py +23 -38
msprobe/pytorch/dump/debugger/precision_debugger.py +130 -0
msprobe/pytorch/{function_factory.py → dump/function_factory.py} +12 -11
msprobe/pytorch/dump/module_dump/hook_wrapper.py +17 -13
msprobe/pytorch/dump/module_dump/module_dump.py +16 -15
msprobe/pytorch/dump/module_dump/{module_processer.py → module_processor.py} +54 -42
msprobe/pytorch/dump/pt_config.py +128 -0
msprobe/pytorch/{pytorch_service.py → dump/pytorch_service.py} +22 -21
msprobe/pytorch/monitor/csv2tb.py +13 -11
msprobe/pytorch/monitor/data_writers.py +13 -11
msprobe/pytorch/monitor/distributed/wrap_distributed.py +13 -11
msprobe/pytorch/monitor/features.py +12 -11
msprobe/pytorch/monitor/module_hook.py +67 -59
msprobe/pytorch/monitor/module_metric.py +13 -11
msprobe/pytorch/monitor/optimizer_collect.py +37 -35
msprobe/pytorch/monitor/utils.py +13 -11
msprobe/pytorch/monitor/visualizer.py +12 -11
msprobe/pytorch/torchair_dump/__init__.py +17 -0
msprobe/pytorch/torchair_dump/torchair_dump.py +114 -0
msprobe/scripts/atb/config_example.json +10 -0
msprobe/scripts/atb/load_atb_probe.sh +101 -0
msprobe/scripts/atb/unload_atb_probe.sh +27 -0
msprobe/scripts/build_msaccucmp.sh +186 -0
msprobe/scripts/conf/help.info +6 -0
msprobe/scripts/conf/version.info +3 -0
msprobe/scripts/run_script/common.sh +538 -0
msprobe/scripts/run_script/main_msaccucmp.sh +232 -0
msprobe/visualization/__init__.py +12 -11
msprobe/visualization/builder/__init__.py +12 -11
msprobe/visualization/builder/graph_builder.py +45 -30
msprobe/visualization/builder/graph_merger.py +53 -32
msprobe/visualization/builder/msprobe_adapter.py +34 -44
msprobe/visualization/compare/__init__.py +12 -11
msprobe/visualization/compare/graph_comparator.py +63 -51
msprobe/visualization/compare/mode_adapter.py +28 -113
msprobe/visualization/db_utils.py +133 -22
msprobe/visualization/graph/__init__.py +12 -11
msprobe/visualization/graph/base_node.py +15 -27
msprobe/visualization/graph/distributed_analyzer.py +97 -40
msprobe/visualization/graph/graph.py +14 -16
msprobe/visualization/graph/node_colors.py +34 -31
msprobe/visualization/graph/node_op.py +12 -11
msprobe/visualization/graph_service.py +580 -205
msprobe/visualization/utils.py +278 -31
tb_graph_ascend/secure_build.py +175 -0
tb_graph_ascend/server/__init__.py +15 -0
tb_graph_ascend/server/app/__init__.py +15 -0
tb_graph_ascend/server/app/model/__init__.py +15 -0
tb_graph_ascend/server/app/model/hierarchy.py +348 -0
tb_graph_ascend/server/app/model/layout_hierarchy_model.py +69 -0
tb_graph_ascend/server/app/model/match_nodes_model.py +573 -0
tb_graph_ascend/server/app/repositories/__init__.py +15 -0
tb_graph_ascend/server/app/repositories/graph_repo_base.py +32 -0
tb_graph_ascend/server/app/repositories/graph_repo_db.py +879 -0
tb_graph_ascend/server/app/repositories/graph_repo_vis.py +83 -0
tb_graph_ascend/server/app/service/__init__.py +18 -0
tb_graph_ascend/server/app/service/graph_service_base.py +158 -0
tb_graph_ascend/server/app/service/graph_service_db.py +438 -0
tb_graph_ascend/server/app/service/graph_service_factory.py +54 -0
tb_graph_ascend/server/app/service/graph_service_vis.py +480 -0
tb_graph_ascend/server/app/utils/__init__.py +15 -0
tb_graph_ascend/server/app/utils/constant.py +80 -0
tb_graph_ascend/server/app/utils/file_check_wrapper.py +46 -0
tb_graph_ascend/server/app/utils/global_state.py +95 -0
tb_graph_ascend/server/app/utils/graph_utils.py +661 -0
tb_graph_ascend/server/app/utils/i18n.py +153 -0
tb_graph_ascend/server/app/utils/request_method.py +46 -0
tb_graph_ascend/server/app/views/__init__.py +15 -0
tb_graph_ascend/server/app/views/graph_views.py +304 -0
tb_graph_ascend/server/plugin.py +108 -0
tb_graph_ascend/server/static/index.html +9250 -0
tb_graph_ascend/server/static/index.js +21 -0
tb_graph_ascend/setup.py +57 -0
mindstudio_probe-8.3.3.dist-info/LICENSE +0 -201
mindstudio_probe-8.3.3.dist-info/RECORD +0 -491
mindstudio_probe-8.3.3.dist-info/entry_points.txt +0 -2
mindstudio_probe-8.3.3.dist-info/top_level.txt +0 -1
msprobe/CMakeLists.txt +0 -5
msprobe/README.md +0 -203
msprobe/core/advisor/advisor.py +0 -129
msprobe/core/advisor/advisor_const.py +0 -58
msprobe/core/advisor/advisor_result.py +0 -58
msprobe/core/compare/find_first/data_processor.py +0 -35
msprobe/core/compare/highlight.py +0 -390
msprobe/core/data_dump/data_collector.py +0 -356
msprobe/core/grad_probe/constant.py +0 -90
msprobe/core/grad_probe/grad_compare.py +0 -187
msprobe/core/grad_probe/utils.py +0 -105
msprobe/core/kernel_dump/kernel_config.py +0 -33
msprobe/docs/01.installation.md +0 -250
msprobe/docs/02.config_introduction.md +0 -221
msprobe/docs/03.config_examples.md +0 -281
msprobe/docs/04.kernel_dump_PyTorch.md +0 -73
msprobe/docs/05.data_dump_PyTorch.md +0 -518
msprobe/docs/06.data_dump_MindSpore.md +0 -618
msprobe/docs/07.accuracy_checker_PyTorch.md +0 -310
msprobe/docs/09.accuracy_checker_MindSpore.md +0 -120
msprobe/docs/10.accuracy_compare_PyTorch.md +0 -637
msprobe/docs/11.accuracy_compare_MindSpore.md +0 -769
msprobe/docs/12.overflow_check_PyTorch.md +0 -82
msprobe/docs/13.overflow_check_MindSpore.md +0 -33
msprobe/docs/14.data_parse_PyTorch.md +0 -282
msprobe/docs/15.free_benchmarking_PyTorch.md +0 -169
msprobe/docs/16.free_benchmarking_MindSpore.md +0 -159
msprobe/docs/17.grad_probe.md +0 -205
msprobe/docs/18.online_dispatch.md +0 -89
msprobe/docs/19.monitor.md +0 -753
msprobe/docs/20.monitor_performance_baseline.md +0 -52
msprobe/docs/21.visualization_PyTorch.md +0 -519
msprobe/docs/22.visualization_MindSpore.md +0 -515
msprobe/docs/23.generate_operator_PyTorch.md +0 -107
msprobe/docs/24.code_mapping_Mindspore.md +0 -29
msprobe/docs/25.tool_function_introduction.md +0 -29
msprobe/docs/26.data_dump_PyTorch_baseline.md +0 -48
msprobe/docs/27.dump_json_instruction.md +0 -795
msprobe/docs/28.debugger_save_instruction.md +0 -288
msprobe/docs/28.kernel_dump_MindSpore.md +0 -69
msprobe/docs/29.data_dump_MSAdapter.md +0 -235
msprobe/docs/30.overflow_check_MSAdapter.md +0 -31
msprobe/docs/31.config_check.md +0 -107
msprobe/docs/32.ckpt_compare.md +0 -69
msprobe/docs/33.generate_operator_MindSpore.md +0 -181
msprobe/docs/34.RL_collect.md +0 -101
msprobe/docs/35.nan_analyze.md +0 -73
msprobe/docs/36.calculation_result_change.md +0 -75
msprobe/docs/FAQ.md +0 -232
msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +0 -146
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +0 -14
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +0 -33
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +0 -217
msprobe/docs/img/BLOOM-7B_1.png +0 -0
msprobe/docs/img/BLOOM-7B_2.png +0 -0
msprobe/docs/img/BLOOM-7B_3.png +0 -0
msprobe/docs/img/BLOOM-7B_4.png +0 -0
msprobe/docs/img/GPT-3_1.png +0 -0
msprobe/docs/img/GPT-3_2.png +0 -0
msprobe/docs/img/GPT-3_3.png +0 -0
msprobe/docs/img/GPT-3_4.png +0 -0
msprobe/docs/img/GPT-3_5.png +0 -0
msprobe/docs/img/GPT-3_6.png +0 -0
msprobe/docs/img/GPT-3_7.png +0 -0
msprobe/docs/img/GPT-3_8.png +0 -0
msprobe/docs/img/YOLOV5S_1.png +0 -0
msprobe/docs/img/YOLOV5S_2.png +0 -0
msprobe/docs/img/accuracy_checking_details.png +0 -0
msprobe/docs/img/accuracy_checking_result.png +0 -0
msprobe/docs/img/api_precision_compare_details.png +0 -0
msprobe/docs/img/api_precision_compare_result.png +0 -0
msprobe/docs/img/auto_analyze_log.png +0 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/compare_result_pkl.png +0 -0
msprobe/docs/img/compare_result_pkl_md5.png.png +0 -0
msprobe/docs/img/cpu_info.png +0 -0
msprobe/docs/img/free_benchmark.png +0 -0
msprobe/docs/img/free_benchmark_framework.png +0 -0
msprobe/docs/img/grad_probe_image-1.png +0 -0
msprobe/docs/img/grad_probe_image-2.png +0 -0
msprobe/docs/img/grad_probe_image-3.png +0 -0
msprobe/docs/img/grad_probe_image-4.png +0 -0
msprobe/docs/img/grad_probe_image.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/module_compare.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +0 -132
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +0 -59
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +0 -80
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +0 -330
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +0 -460
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +0 -2081
msprobe/mindspore/code_mapping/bind.py +0 -283
msprobe/mindspore/code_mapping/cmd_parser.py +0 -40
msprobe/mindspore/code_mapping/graph.py +0 -49
msprobe/mindspore/code_mapping/graph_parser.py +0 -211
msprobe/mindspore/code_mapping/main.py +0 -24
msprobe/mindspore/code_mapping/processor.py +0 -34
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +0 -111
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -52
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +0 -257
msprobe/mindspore/free_benchmark/common/config.py +0 -27
msprobe/mindspore/free_benchmark/common/handler_params.py +0 -31
msprobe/mindspore/free_benchmark/common/utils.py +0 -100
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -638
msprobe/mindspore/free_benchmark/handler/base_handler.py +0 -105
msprobe/mindspore/free_benchmark/handler/check_handler.py +0 -55
msprobe/mindspore/free_benchmark/handler/fix_handler.py +0 -51
msprobe/mindspore/free_benchmark/handler/handler_factory.py +0 -36
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +0 -82
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +0 -45
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +0 -78
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +0 -77
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +0 -56
msprobe/mindspore/free_benchmark/perturbation/no_change.py +0 -27
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +0 -46
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +0 -51
msprobe/mindspore/grad_probe/global_context.py +0 -127
msprobe/mindspore/grad_probe/grad_analyzer.py +0 -260
msprobe/mindspore/grad_probe/grad_monitor.py +0 -42
msprobe/mindspore/grad_probe/grad_stat_csv.py +0 -161
msprobe/mindspore/grad_probe/hook.py +0 -115
msprobe/mindspore/grad_probe/utils.py +0 -43
msprobe/mindspore/mindtorch/__init__.py +0 -18
msprobe/mindspore/ms_config.py +0 -153
msprobe/mindspore/task_handler_factory.py +0 -44
msprobe/nan_analyze/__init__.py +0 -14
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +0 -9
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +0 -480
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +0 -567
msprobe/pytorch/debugger/precision_debugger.py +0 -181
msprobe/pytorch/free_benchmark/__init__.py +0 -23
msprobe/pytorch/free_benchmark/common/constant.py +0 -85
msprobe/pytorch/free_benchmark/common/counter.py +0 -87
msprobe/pytorch/free_benchmark/common/enums.py +0 -80
msprobe/pytorch/free_benchmark/common/params.py +0 -152
msprobe/pytorch/free_benchmark/common/utils.py +0 -143
msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -215
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +0 -121
msprobe/pytorch/free_benchmark/main.py +0 -123
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +0 -28
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +0 -56
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +0 -107
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +0 -121
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +0 -89
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +0 -87
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +0 -43
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +0 -60
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +0 -34
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +0 -252
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +0 -54
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +0 -40
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +0 -45
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -181
msprobe/pytorch/grad_probe/__init__.py +0 -0
msprobe/pytorch/grad_probe/grad_monitor.py +0 -108
msprobe/pytorch/grad_probe/grad_stat_csv.py +0 -160
msprobe/pytorch/hook_module/__init__.py +0 -16
msprobe/pytorch/hook_module/wrap_aten.py +0 -111
msprobe/pytorch/online_dispatch/__init__.py +0 -19
msprobe/pytorch/online_dispatch/compare.py +0 -224
msprobe/pytorch/online_dispatch/dispatch.py +0 -332
msprobe/pytorch/online_dispatch/dump_compare.py +0 -179
msprobe/pytorch/online_dispatch/single_compare.py +0 -412
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +0 -58
msprobe/pytorch/online_dispatch/utils.py +0 -158
msprobe/pytorch/parse_tool/__init__.py +0 -0
msprobe/pytorch/parse_tool/cli.py +0 -31
msprobe/pytorch/parse_tool/lib/__init__.py +0 -0
msprobe/pytorch/parse_tool/lib/compare.py +0 -253
msprobe/pytorch/parse_tool/lib/config.py +0 -50
msprobe/pytorch/parse_tool/lib/file_desc.py +0 -45
msprobe/pytorch/parse_tool/lib/interactive_cli.py +0 -97
msprobe/pytorch/parse_tool/lib/parse_exception.py +0 -54
msprobe/pytorch/parse_tool/lib/parse_tool.py +0 -161
msprobe/pytorch/parse_tool/lib/utils.py +0 -299
msprobe/pytorch/parse_tool/lib/visualization.py +0 -85
msprobe/pytorch/pt_config.py +0 -299
/msprobe/core/{grad_probe → dump}/__init__.py +0 -0
/msprobe/{mindspore/code_mapping → core/dump/api_dump}/__init__.py +0 -0
/msprobe/{mindspore/debugger → core/dump/data_dump}/__init__.py +0 -0
/msprobe/{mindspore/exception_dump → core/dump/data_dump/data_processor}/__init__.py +0 -0
/msprobe/{mindspore/free_benchmark → core/dump/debugger}/__init__.py +0 -0
/msprobe/{mindspore/free_benchmark/common → core/dump/kernel_dump}/__init__.py +0 -0
/msprobe/mindspore/{free_benchmark/handler → dump/debugger}/__init__.py +0 -0
/msprobe/mindspore/{grad_probe → dump/dump_processor}/__init__.py +0 -0
/msprobe/mindspore/{overflow_check → dump/exception_dump}/__init__.py +0 -0
/msprobe/mindspore/{mindtorch → dump/mindtorch}/mindtorch_adaptor.py +0 -0
/msprobe/{pytorch/api_accuracy_checker/run_ut → mindspore/dump/overflow_check}/__init__.py +0 -0
/msprobe/{pytorch/debugger → mindspore/monitor}/__init__.py +0 -0
/msprobe/{pytorch/free_benchmark/common → msaccucmp}/__init__.py +0 -0
/msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/.keep +0 -0
/msprobe/pytorch/{free_benchmark/perturbed_layers → api_accuracy_checker/acc_check}/__init__.py +0 -0
/msprobe/pytorch/api_accuracy_checker/{run_ut → acc_check}/torch_ut_setting.json +0 -0
/msprobe/pytorch/{free_benchmark/perturbed_layers/npu → dump/api_dump}/__init__.py +0 -0
/msprobe/pytorch/{hook_module → dump/api_dump}/support_wrap_ops.yaml +0 -0
/msprobe/pytorch/{free_benchmark/result_handlers → dump/debugger}/__init__.py +0 -0

msprobe/core/{data_dump → dump/data_dump}/data_processor/pytorch_processor.py RENAMED Viewed

@@ -1,40 +1,53 @@
-# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
-# All rights reserved.
+# -------------------------------------------------------------------------
+#  This file is part of the MindStudio project.
+# Copyright (c) 2025 Huawei Technologies Co.,Ltd.
 #
-# Licensed under the Apache License, Version 2.0  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# MindStudio is licensed under Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+#          http://license.coscl.org.cn/MulanPSL2
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+# See the Mulan PSL v2 for more details.
+# -------------------------------------------------------------------------
 import ctypes
+import inspect
 import os
 import zlib
+import json
+import re
 from collections.abc import Iterable
 from concurrent.futures import ThreadPoolExecutor
-from dataclasses import asdict
-from typing import List
 import numpy as np
 import torch
 from torch import distributed as dist
 from torch.distributed.distributed_c10d import _get_default_group
+from msprobe.core.common.file_utils import FileOpen, load_json
 from msprobe.core.common.const import Const
 from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import convert_tuple, is_int
-from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
-    ModuleForwardInputsOutputs, TensorStatInfo
-from msprobe.pytorch.common.utils import save_pt
-from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
+from msprobe.core.dump.data_dump.data_processor.base import (
+    BaseDataProcessor,
+    ModuleBackwardInputsOutputs,
+    ModuleForwardInputsOutputs,
+    TensorStatInfo
+)
+from msprobe.pytorch.common.utils import (
+    Const as PtConst,
+    save_pt,
+    is_recomputation,
+    is_hifloat8_tensor,
+    is_float8_tensor
+)
 is_gpu = False
 try:
@@ -49,6 +62,9 @@ class TensorHandler:
         self.has_fake_tensor = hasattr(torch, "_subclasses") and hasattr(torch._subclasses, "fake_tensor")
         self.has_async_collective_tensor = hasattr(dist, "_functional_collectives") and \
                                            hasattr(dist._functional_collectives, "AsyncCollectiveTensor")
+        self.has_nested_tensor = hasattr(torch, "nested") and hasattr(torch.nested, "_internal") and \
+            hasattr(torch.nested._internal, "nested_tensor") and \
+            hasattr(torch.nested._internal.nested_tensor, "NestedTensor")
     @staticmethod
     def free_tensor(tensor, tensor_name):
@@ -57,6 +73,12 @@ class TensorHandler:
         except Exception as e:
             logger.warning(f"Failed to free tensor: {tensor_name}, the detail info: {e}.")
+    @staticmethod
+    def get_tensor_dtype(tensor):
+        if is_hifloat8_tensor(tensor):
+            return PtConst.HIFLOAT8_TYPE
+        return str(tensor.dtype)
     def is_dtensor(self, tensor):
         return self.has_dtensor and isinstance(tensor, dist.tensor.DTensor)
@@ -66,6 +88,10 @@ class TensorHandler:
     def is_async_collective_tensor(self, tensor):
         return self.has_async_collective_tensor and \
             isinstance(tensor, dist._functional_collectives.AsyncCollectiveTensor)
+    def is_nested_tensor(self, tensor):
+        return self.has_nested_tensor and \
+            isinstance(tensor, torch.nested._internal.nested_tensor.NestedTensor)
     def is_empty_data(self, tensor):
         return tensor.is_meta or self.is_fake_tensor(tensor) or self.is_async_collective_tensor(tensor)
@@ -76,6 +102,15 @@ class TensorHandler:
         if self.is_fake_tensor(tensor):
             logger.debug("FakeTensor cannot be converted to torch.Tensor type.")
             return tensor
+        if self.is_nested_tensor(tensor):
+            logger.debug(f"For NestedTensor, collecting information from the tensor returned by .values().")
+            return tensor.values()
+        if is_float8_tensor(tensor):
+            logger.debug(
+                f"The fp8/hifp8 tensor analyzing/saving is unsupported in dump function."
+                f"Casting to float for processing."
+            )
+            tensor = tensor.detach().float()
         return tensor
     def get_tensor_type(self, tensor):
@@ -85,6 +120,8 @@ class TensorHandler:
             return Const.FAKE_TENSOR_TYPE
         if self.is_async_collective_tensor(tensor):
             return Const.AC_TENSOR_TYPE
+        if self.is_nested_tensor(tensor):
+            return Const.NESTED_TENSOR_TYPE
         return Const.TENSOR_TYPE
     def get_dtensor_info(self, tensor):
@@ -246,6 +283,39 @@ class PytorchDataProcessor(BaseDataProcessor):
         return (hasattr(element, "register_hook") and callable(element.register_hook)) and \
             (hasattr(element, "requires_grad") and element.requires_grad)
+    @staticmethod
+    def is_recompute(call_stack=None):
+        return is_recomputation(call_stack)
+    @staticmethod
+    def analyze_api_call_stack(name):
+        try:
+            call_stack = inspect.stack()
+            if name.startswith("Primitive"):
+                api_stack = call_stack[4:]
+            else:
+                api_stack = call_stack[5:]
+        except Exception as e:
+            logger.warning(f"The call stack of <{name}> failed to retrieve, {e}.")
+            api_stack = None
+            call_stack = None
+        stack_str = []
+        if api_stack:
+            for (_, path, line, func, code, _) in api_stack:
+                if not code:
+                    continue
+                if any(filter_path in path for filter_path in Const.STACK_FILTER_KEYWORDS) and \
+                        Const.CALL_STACK_FLAG not in path:
+                    continue
+                stack_line = f"File {path}, line {str(line)}, in {func}, \n {code[0].strip()}"
+                stack_str.append(stack_line)
+        else:
+            stack_str.append(Const.WITHOUT_CALL_STACK)
+        is_recompute = PytorchDataProcessor.is_recompute(call_stack)
+        del call_stack
+        return tuple(stack_str), is_recompute
     @staticmethod
     def _analyze_torch_size(arg):
         return {"type": "torch.Size", "value": [int(x) for x in list(arg)]}
@@ -358,7 +428,7 @@ class PytorchDataProcessor(BaseDataProcessor):
         tensor_stat = self.get_stat_info(common_tensor, self.config.async_dump, self.config.precision)
         tensor_json = {}
         tensor_json.update({'type': self.tensor_handler.get_tensor_type(tensor)})
-        tensor_json.update({'dtype': str(common_tensor.dtype)})
+        tensor_json.update({'dtype': self.tensor_handler.get_tensor_dtype(tensor)})
         tensor_json.update({"shape": common_tensor.shape})
         stat_values = [
@@ -388,18 +458,22 @@ class PytorchDataProcessor(BaseDataProcessor):
                 elif t_cpu.device.type == "npu":
                     t_cpu = t_cpu.to("cpu", non_blocking=True)
                     torch.npu.synchronize()
                 t_cpu = t_cpu.detach()
-                if not t_cpu.is_contiguous():
-                    t_cpu = t_cpu.contiguous()
-                future = self._crc_executor.submit(
-                    PytorchDataProcessor.compute_crc32_from_tensor,
-                    t_cpu
-                )
+                if self.config.task == Const.TENSOR and self.data_writer.bench_dump_file_path is not None:
+                    tensor_md5 = PytorchDataProcessor.compute_crc32_from_tensor(t_cpu)
+                    tensor_json.update({Const.MD5: tensor_md5})
+                else:
+                    if not t_cpu.is_contiguous():
+                        t_cpu = t_cpu.contiguous()
+                    future = self._crc_executor.submit(
+                        PytorchDataProcessor.compute_crc32_from_tensor,
+                        t_cpu
+                    )
-                crc_placeholder = self.data_writer.append_crc32_to_buffer(future)
-                tensor_json[Const.MD5_INDEX] = crc_placeholder
+                    crc_placeholder = self.data_writer.append_crc32_to_buffer(future)
+                    tensor_json[Const.MD5_INDEX] = crc_placeholder
             else:
                 logger.debug(
                     "Calculating the md5 value of fake tensor or meta tensor is not supported, "
@@ -456,164 +530,316 @@ class TensorDataProcessor(PytorchDataProcessor):
         return self._analyze_and_save_ndarray(ndarray, suffix)
-class OverflowCheckDataProcessor(PytorchDataProcessor):
-    __slots__ = ["cached_tensors_and_file_paths"]
+class DiffCheckDataProcessor(PytorchDataProcessor):
+    __slots__ = [
+        "cached_tensors_and_file_paths",
+        "_bench_ref_path",
+        "_bench_ref_mtime",
+        "_bench_map",
+        "_bench_state",  # 新增：按 API 的对比状态
+    ]
     def __init__(self, config, data_writer):
         super().__init__(config, data_writer)
-        self.has_overflow = False
-        self.support_inf_nan = None
+        self.has_diff = False
         self.cached_api_info = {}
         self.cached_tensors_and_file_paths = {}
-        self.bits_for_overflow = 8
-        self.real_overflow_nums = 0
-        self.overflow_nums = config.overflow_nums
+        self.bits_for_diff = 8
+        self.real_diff_nums = 0
+        self.diff_nums = config.diff_nums
+        # 新增：bench 基准缓存初始化
+        self._bench_ref_path = None
+        self._bench_ref_mtime = None
+        self._bench_map = {}
+        self._bench_state = {}  # key: api_name -> 状态字典
     @property
     def is_terminated(self):
-        if self.overflow_nums == -1:
+        if self.diff_nums == -1:
             return False
-        if self.real_overflow_nums >= self.overflow_nums:
+        if self.real_diff_nums >= self.diff_nums:
             return True
         return False
+    @staticmethod
+    def _parse_data_name(data_name: str):
+        """
+        解析 data_name，例如：
+        - "Functional.relu.2.forward.input.0.pt"
+        - 兼容可选前缀 "name:" -> "name:Functional.relu.2.forward.input.0.pt"
+        返回 (api, io, idx) 或 None
+        """
+        if not data_name:
+            return None
+        if data_name.startswith("name:"):
+            data_name = data_name.split(":", 1)[1]
+        # api 名本身可能包含若干个 '.'，所以用正则从右侧提取 io/idx/扩展名
+        m = re.match(
+            r"^(?=.{1,1024}$)(?P<api>.+)\.(?P<io>input|output)\.(?P<idx>\d+)\.\w+$",
+            data_name
+        )
+        if not m:
+            return None
+        api = m.group("api")
+        io = m.group("io")
+        idx = int(m.group("idx"))
+        return api, io, idx
     def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        self.has_overflow = False
-        self._is_support_inf_nan()
+        self.has_diff = False
         self.cached_api_info = super().analyze_forward_input(name, module, module_input_output)
         return None
     def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        self._is_support_inf_nan()
         api_info_struct = super().analyze_forward_output(name, module, module_input_output)
         if name in self.cached_api_info and name in api_info_struct:
             self.cached_api_info[name].update(api_info_struct[name])
         elif name in api_info_struct:
             self.cached_api_info = api_info_struct
-        self.handle_overflow()
-        return self.cached_api_info if self.has_overflow else None
+        self.handle_diff()
+        return self.cached_api_info
     def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        self.has_overflow = False
-        self._is_support_inf_nan()
+        self.has_diff = False
         api_info_struct = super().analyze_forward(name, module, module_input_output)
-        self.handle_overflow()
-        return api_info_struct if self.has_overflow else None
+        self.handle_diff()
+        return api_info_struct
     def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
-        self.has_overflow = False
-        self._is_support_inf_nan()
+        self.has_diff = False
         api_info_struct = super().analyze_backward(name, module, module_input_output)
-        self.handle_overflow()
-        return api_info_struct if self.has_overflow else None
+        self.handle_diff()
+        return api_info_struct
     def analyze_params(self, name, param_name, grad):
-        self.has_overflow = False
-        self._is_support_inf_nan()
+        self.has_diff = False
         api_info_struct = super().analyze_params(name, param_name, grad)
-        self.handle_overflow()
-        return api_info_struct if self.has_overflow else None
+        self.handle_diff()
+        return api_info_struct
-    def handle_overflow(self):
-        if not self.support_inf_nan:
-            self._analyze_maybe_overflow_flag()
-        if self.has_overflow:
+    def handle_diff(self):
+        if self.has_diff:
             for file_path, tensor in self.cached_tensors_and_file_paths.items():
                 self.tensor_handler.save_tensor(tensor, file_path)
-            self.real_overflow_nums += 1
-            if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums:
-                logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, "
-                            f"current overflow times: {self.real_overflow_nums}.")
+            self.real_diff_nums += 1
+            if self.diff_nums != -1 and self.real_diff_nums >= self.diff_nums:
+                logger.info(f"[{Const.TOOL_NAME}] Reached the preset diff times, "
+                            f"current diff times: {self.real_diff_nums}.")
+        api = getattr(self, "current_api_or_module_name", None)
+        if api and api in self._bench_state:
+            self._bench_state.pop(api, None)
         self.cached_tensors_and_file_paths = {}
-    def _is_support_inf_nan(self):
-        if self.support_inf_nan is not None:
-            return
-        try:
-            self.support_inf_nan = is_gpu or torch_npu.npu.utils.is_support_inf_nan()
-        except Exception:
-            logger.warning(f"Unable to determine if the current device supports inf/nan mode, default not supported.")
-            self.support_inf_nan = False
-    def _analyze_maybe_overflow_flag(self):
+    def _analyze_maybe_diff_flag(self):
         try:
-            self.has_overflow = torch_npu.npu.utils.get_npu_overflow_flag()
-            if self.has_overflow:
-                torch_npu.npu.utils.clear_npu_overflow_flag()
+            self.has_diff = torch_npu.npu.utils.get_npu_diff_flag()
+            if self.has_diff:
+                torch_npu.npu.utils.clear_npu_diff_flag()
         except Exception as e:
-            logger.error(f"Overflow check failed, the current environment may be abnormal.")
-            raise RuntimeError(f"overflow check failed") from e
+            logger.error(f"Diff check failed, the current environment may be abnormal.")
+            raise RuntimeError(f"diff check failed") from e
+    def _bench_expected_counts_for_api(self, api: str):
+        """统计某 API 在 bench_map 里有多少个 Tensor 输入/输出"""
+        n_in = n_out = 0
+        for (a, io, _) in self._bench_map.keys():
+            if a == api:
+                if io == "input":
+                    n_in += 1
+                elif io == "output":
+                    n_out += 1
+        return n_in, n_out
+    def _resolve_bench_json_path(self) -> str:
+        p = getattr(self.data_writer, "bench_dump_file_path", None)
+        if not p:
+            return None
+        p = os.path.join(p, "dump.json") if os.path.isdir(p) else p
+        return p if os.path.isfile(p) else None
+    def _ensure_bench_map_loaded(self) -> bool:
+        """
+        当路径变化或文件 mtime 变化时重载 dump.json，并构建 (api, 'input'/'output', idx) -> {md5, shape} 的索引。
+        """
+        path = self._resolve_bench_json_path()
+        if not path:
+            return False
+        try:
+            mtime = os.path.getmtime(path)
+        except Exception as e:
+            return False
-    def _analyze_maybe_overflow_tensor(self, tensor_json):
-        tensor_stat_index = tensor_json.get(Const.TENSOR_STAT_INDEX)
-        if tensor_stat_index is None:
-            logger.warning("tensor_stat_index does not exist in tensor_json.")
-            return
-        max_tensor = self.data_writer.get_buffer_values_max(tensor_stat_index)
-        min_tensor = self.data_writer.get_buffer_values_min(tensor_stat_index)
+        need_reload = (path != self._bench_ref_path) or (mtime != self._bench_ref_mtime)
+        if need_reload:
+            try:
+                obj = load_json(path)
+            except Exception as e:
+                logger.warning(f"Failed to load bench dump.json: {e}")
+                return False
-        if max_tensor is None or min_tensor is None:
+            data = obj.get("data", {})
+            self._bench_map = self._build_bench_map_from_json(data)
+            self._bench_ref_path = path
+            self._bench_ref_mtime = mtime
+        return True
+    def _build_bench_map_from_json(self, data: dict) -> dict:
+        """
+        data 结构：{ api_name: {input_args: [...], output: [...] } }
+        只收集 Tensor 项：(api, io, idx) -> {"md5": str, "shape": list}
+        """
+        mp = {}
+        total_inputs = 0
+        total_outputs = 0
+        for api_name, rec in data.items():
+            ia = rec.get("input_args", [])
+            oa = rec.get("output", [])
+            # input_args
+            input_count_this_api = 0
+            for i, arg in enumerate(ia):
+                if isinstance(arg, dict) and arg.get("type") == "torch.Tensor":
+                    mp[(api_name, "input", i)] = {
+                        "md5": arg.get("md5"),
+                        "shape": arg.get("shape"),
+                    }
+                    input_count_this_api += 1
+            total_inputs += input_count_this_api
+            # output
+            output_count_this_api = 0
+            for i, out in enumerate(oa):
+                if isinstance(out, dict) and out.get("type") == "torch.Tensor":
+                    mp[(api_name, "output", i)] = {
+                        "md5": out.get("md5"),
+                        "shape": out.get("shape"),
+                    }
+                    output_count_this_api += 1
+            total_outputs += output_count_this_api
+        return mp
+    def _analyze_maybe_diff_tensor(self, tensor_json):
+        # 1) bench map 准备
+        if not self._ensure_bench_map_loaded():
             return
-        if torch.isinf(max_tensor) or torch.isnan(max_tensor):
-            self.has_overflow = True
+        # 2) 解析 data_name -> (api, io, idx)
+        data_name = tensor_json.get("data_name")
+        parsed = self._parse_data_name(data_name)
+        if not parsed:
+            logger.debug(f"data_name parse failed: {data_name}")
+            return
+        api, io, idx = parsed
+        # 3) 取/建 本 API 的状态
+        st = self._bench_state.get(api)
+        if st is None:
+            n_in, _ = self._bench_expected_counts_for_api(api)
+            st = {
+                "expected_in": n_in,  # 标杆中该 API 期望的 Tensor 输入数
+                "checked_in": 0,  # 已经校验过的“在标杆中存在的输入”个数
+                "inputs_equal": True,  # 到目前为止，输入是否全部一致
+                "seen_input_not_in_ref": False,  # 遇到“运行时存在但标杆里没有”的输入
+                "any_output_neq": False,  # 是否发现过任一输出不一致（shape 同且 md5 不同）
+            }
+            self._bench_state[api] = st
+        # 4) 找到标杆项
+        ref = self._bench_map.get((api, io, idx))
+        # 5) 当前 shape
+        cur_shape = tensor_json.get("shape")
+        if cur_shape is None:
+            return
+        try:
+            cur_shape = list(cur_shape)
+        except Exception as e:
+            logger.warning("[BENCH]", "shape to list failed:", repr(e), "-> skip")
             return
-        if torch.isinf(min_tensor) or torch.isnan(min_tensor):
-            self.has_overflow = True
+        # 6) 输入与输出分别处理
+        if io == "input":
+            # —— 输入阶段：只维护“输入是否一致”的状态 —— #
+            if ref is None:
+                # 运行时有输入，但标杆里没有对应条目 => 不能断言“输入一致”
+                st["inputs_equal"] = False
+                st["seen_input_not_in_ref"] = True
-    def _analyze_tensor(self, tensor, suffix):
-        dump_data_name, file_path = self.get_save_file_path(suffix)
-        self.cached_tensors_and_file_paths.update({file_path: tensor})
-        single_arg = super()._analyze_tensor(tensor, suffix)
-        single_arg.update({"data_name": dump_data_name})
-        if not self.has_overflow and self.support_inf_nan:
-            self._analyze_maybe_overflow_tensor(single_arg)
-        return single_arg
+                return
+            ref_shape = ref.get("shape")
+            ref_md5 = ref.get("md5")
-class FreeBenchmarkDataProcessor(PytorchDataProcessor):
+            # 标杆有该输入，计入已校验
+            st["checked_in"] += 1
-    def __init__(self, config, data_writer):
-        super().__init__(config, data_writer)
-        self.checker = FreeBenchmarkCheck(config=config)
-        self._return_forward_new_output = None
-        self._forward_new_output = None
+            # shape 必须一致
+            if list(ref_shape) != list(cur_shape):
+                st["inputs_equal"] = False
-    def update_iter(self, current_iter):
-        super().update_iter(current_iter)
-        self.checker.update_iter(current_iter)
+                return
-    def update_unequal_rows(self, unequal_rows: List[UnequalRow]):
-        if not unequal_rows:
-            return
-        for row in unequal_rows:
-            data_dict = asdict(row)
-            self.data_writer.write_data_to_csv(
-                data_dict.values(),
-                data_dict.keys(),
-                self.data_writer.free_benchmark_file_path
-            )
-        return
+            # 取当前 md5
+            cur_md5 = tensor_json.get(Const.MD5) if Const.MD5 in tensor_json else tensor_json.get("md5")
-    def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        self.checker.pre_forward(name, module, self, module_input_output.args, module_input_output.kwargs)
+            if cur_md5 is None or ref_md5 is None:
+                # 缺少 md5 信息，无法断言一致
+                st["inputs_equal"] = False
+                return
-    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        new_output, unequal_rows = self.checker.forward(
-            name,
-            module,
-            module_input_output.args,
-            module_input_output.kwargs,
-            module_input_output.output,
-        )
-        self.update_unequal_rows(unequal_rows)
-        if self.checker.if_fix():
-            self._return_forward_new_output = True
-            self._forward_new_output = new_output
+            # md5 必须一致
+            if str(cur_md5) != str(ref_md5):
+                st["inputs_equal"] = False
+            return  # 输入阶段不触发 has_diff
-    def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
-        self.checker.backward(name, module, module_input_output.grad_input)
+        else:  # io == "output"
+            # —— 输出阶段：仅当“所有输入一致且已校验完所有输入”时，才检查输出不一致以置位 —— #
+            # 若标杆无此输出，按照你的规则：不能断言输出不一致，直接跳过
+            if ref is None:
+                return
+            ref_shape = ref.get("shape")
+            ref_md5 = ref.get("md5")
+            # shape 必须一致才比较 md5
+            if list(ref_shape) != list(cur_shape):
+                return
+            cur_md5 = tensor_json.get(Const.MD5) if Const.MD5 in tensor_json else tensor_json.get("md5")
+            if cur_md5 is None or ref_md5 is None:
+                return
+            # 只有当“输入全部一致且已校验完所有输入”时，才允许判定输出不一致
+            inputs_ok = (
+                    st["inputs_equal"]
+                    and (st["checked_in"] == st["expected_in"])
+                    and (not st["seen_input_not_in_ref"])
+            )
+            if inputs_ok and (str(cur_md5) != str(ref_md5)):
+                st["any_output_neq"] = True
+                self.has_diff = True
+    def _analyze_tensor(self, tensor, suffix):
+        dump_data_name, file_path = self.get_save_file_path(suffix)
+        self.cached_tensors_and_file_paths.update({file_path: tensor})
+        single_arg = super()._analyze_tensor(tensor, suffix)
+        single_arg.update({"data_name": dump_data_name})
+        if not self.has_diff:
+            self._analyze_maybe_diff_tensor(single_arg)
+        return single_arg
 class KernelDumpDataProcessor(PytorchDataProcessor):
@@ -707,6 +933,11 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
     )
     def clone_and_detach_tensor(self, input_params):
         if isinstance(input_params, torch.Tensor):
+            if is_float8_tensor(input_params):
+                raise MsprobeException(
+                    MsprobeException.UNSUPPORTED_TYPE_ERROR,
+                    f"L2 backward dump does not support float8 type."
+                )
             if input_params.requires_grad:
                 return input_params.clone().detach().requires_grad_()
             return input_params.clone()
@@ -720,6 +951,8 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
             return input_params
     def analyze_single_element(self, element, suffix_stack):
+        if is_float8_tensor(element):
+            return {}
         if isinstance(element, torch.Tensor):
             if not self.is_found_output_tensor:
                 if element.requires_grad:

mindstudio-probe 8.3.3__py3-none-any.whl → 26.0.0a1__py3-none-any.whl

mindstudio-probe 8.3.3py3-none-any.whl → 26.0.0a1py3-none-any.whl