PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (299) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +7 -6
mindstudio_probe-1.2.1.dist-info/RECORD +396 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -1
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +51 -20
msprobe/config.json +2 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +264 -15
msprobe/core/common/exceptions.py +27 -3
msprobe/core/common/file_utils.py +176 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/inplace_ops.yaml +3 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +204 -77
msprobe/core/common_config.py +49 -14
msprobe/core/compare/acc_compare.py +274 -198
msprobe/core/compare/check.py +32 -33
msprobe/core/compare/compare_cli.py +32 -14
msprobe/core/compare/highlight.py +283 -127
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +246 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +249 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +95 -0
msprobe/core/compare/merge_result/merge_result.py +380 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +135 -144
msprobe/core/compare/utils.py +419 -274
msprobe/core/data_dump/data_collector.py +60 -28
msprobe/core/data_dump/data_processor/base.py +84 -36
msprobe/core/data_dump/data_processor/factory.py +5 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +152 -18
msprobe/core/data_dump/data_processor/pytorch_processor.py +267 -110
msprobe/core/data_dump/json_writer.py +29 -1
msprobe/core/data_dump/scope.py +119 -39
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +189 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +96 -7
msprobe/docs/02.config_introduction.md +50 -23
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +93 -61
msprobe/docs/06.data_dump_MindSpore.md +200 -95
msprobe/docs/07.accuracy_checker_PyTorch.md +28 -28
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +114 -50
msprobe/docs/11.accuracy_compare_MindSpore.md +340 -48
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +561 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +466 -0
msprobe/docs/22.visualization_MindSpore.md +481 -0
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/25.tool_function_introduction.md +29 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +521 -0
msprobe/docs/FAQ.md +29 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +25 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -151
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +64 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +64 -31
msprobe/mindspore/api_accuracy_checker/data_manager.py +301 -0
msprobe/mindspore/api_accuracy_checker/main.py +28 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +212 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +60 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +88 -4
msprobe/mindspore/compare/distributed_compare.py +22 -24
msprobe/mindspore/compare/ms_compare.py +333 -268
msprobe/mindspore/compare/ms_graph_compare.py +95 -52
msprobe/mindspore/debugger/debugger_config.py +7 -1
msprobe/mindspore/debugger/precision_debugger.py +87 -12
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +95 -18
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +45 -30
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +36 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +9 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +156 -41
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +50 -24
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +35 -12
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/ms_config.py +27 -16
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +9 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +285 -113
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +48 -10
msprobe/pytorch/__init__.py +8 -6
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +103 -271
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +478 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +63 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +21 -15
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +54 -22
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +140 -71
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +49 -8
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +142 -16
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +7 -6
msprobe/pytorch/common/utils.py +101 -7
msprobe/pytorch/compare/distributed_compare.py +17 -30
msprobe/pytorch/compare/pt_compare.py +44 -22
msprobe/pytorch/debugger/debugger_config.py +46 -27
msprobe/pytorch/debugger/precision_debugger.py +42 -12
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +81 -10
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +10 -2
msprobe/pytorch/free_benchmark/common/utils.py +29 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -5
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +41 -47
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +35 -0
msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -38
msprobe/pytorch/monitor/__init__.py +0 -0
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +425 -0
msprobe/pytorch/monitor/csv2tb.py +166 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +283 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +1076 -0
msprobe/pytorch/monitor/module_metric.py +172 -0
msprobe/pytorch/monitor/module_spec_verifier.py +95 -0
msprobe/pytorch/monitor/optimizer_collect.py +333 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +160 -0
msprobe/pytorch/monitor/utils.py +321 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +58 -27
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +21 -27
msprobe/pytorch/parse_tool/lib/config.py +6 -8
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +12 -12
msprobe/pytorch/parse_tool/lib/utils.py +33 -53
msprobe/pytorch/parse_tool/lib/visualization.py +11 -10
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +188 -108
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +222 -0
msprobe/visualization/builder/msprobe_adapter.py +227 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +180 -0
msprobe/visualization/compare/mode_adapter.py +197 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +119 -0
msprobe/visualization/graph/distributed_analyzer.py +318 -0
msprobe/visualization/graph/graph.py +209 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +288 -0
msprobe/visualization/utils.py +217 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
msprobe/pytorch/functional/module_dump.py +0 -84
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
/msprobe/mindspore/{free_benchmark/decorator → code_mapping}/__init__.py +0 -0
/msprobe/pytorch/{functional → dump/module_dump}/__init__.py +0 -0

msprobe/docs/21.visualization_PyTorch.md ADDED Viewed

@@ -0,0 +1,466 @@
+# PyTorch 场景的分级可视化构图比对
+分级可视化工具将msprobe工具dump的精度数据进行解析，还原模型图结构，实现模型各个层级的精度数据比对，方便用户理解模型结构、分析精度问题。
+工具支持PyTorch版本：2.1/2.2
+## 展示示例
+支持重建模型的层级结构；
+支持两个模型的结构差异比对；
+支持两个模型的精度数据比对，支持疑似有精度问题节点的快速搜索，自动跳转展开节点所在的层级。
+![vis_show](./img/visualization/vis_showcase.png)
+## 1.依赖安装
+分级可视化工具依赖**msprobe工具**和**tensorboard。**
+### 1.1 安装msprobe工具
+[msprobe工具安装](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/docs/01.installation.md)
+### 1.2 安装tb_graph_ascend
+**请安装tb_graph_ascend，否则无法解析构图结果。**
+``pip3 install tb-graph-ascend``即可。
+## 2.模型结构数据采集
+[PyTorch场景的数据采集](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md)
+**需要选择level为L0（module信息）或者mix（module信息+api信息），才能采集到模型结构数据，即采集结果件construct.json内容不为空**。
+## 3.生成图结构文件
+### 3.1 构图命令行说明
+**命令示例如下**：
+```
+msprobe -f pytorch graph -i ./compare.json -o ./output
+```
+**命令行参数说明**：
+| 参数名                    | 说明                                                                                                                                                                                                                                                                  | 是否必选 |
+|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------|
+| -i 或 --input_path      | 指定比对文件，参考[比对文件说明](#313-比对文件说明)                                                                                                                                                                                                                                      | 是    |
+| -o 或 --output_path     | 配置比对结果文件存盘目录，str 类型。文件名称基于时间戳自动生成，格式为：`compare_{timestamp}.vis或build_{timestamp}.vis`。                                                                                                                                                                              | 是    |
+| -lm 或 --layer_mapping  | 跨套件比对，例如同一个模型分别使用了DeepSpeed和Megatron套件的比对场景。配置该参数时表示开启跨套件Layer层的比对功能，指定模型代码中的Layer层后，可以识别对应dump数据中的模块或API。需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件（Layer）](#71-自定义映射文件layer)，如何配置自定义映射文件请参考[模型分级可视化如何配置layer mapping映射文件](./visualization/layer_mapping_example.md)。 | 否    |
+| -oc 或 --overflow_check | 是否开启溢出检测模式，开启后会在输出vis文件中（`compare_{timestamp}.vis或build_{timestamp}.vis`）对每个溢出节点进行标记溢出等级，溢出等级说明参考[溢出等级说明](#312-溢出等级说明)                                                                                                                                              | 否    |
+| -f 或 --fuzzy_match     | 是否开启模糊匹配，bool类型。模糊匹配说明参考[匹配说明](#311-匹配说明)                                                                                                                                                                                                                           | 否    |
+| -cs 或 --complete_stack | 是否使用完整的堆栈信息，bool类型。默认使用精简的堆栈信息，数据量小有助于增加流畅度。完整堆栈和精简堆栈信息参考[堆栈信息说明](#72-堆栈信息说明)                                                                                                                                                                                       | 否    |
+#### 3.1.1 匹配说明
+**注：dump名称 = 名称 + 调用次数**，例如Torch.matmul.2.forward，matmul是名称，2是调用次数
+1.默认匹配
+- 所有节点dump名称一致
+- 节点输入输出参数数量一致，参数type、shape一致
+- 节点的层级一致（父节点们一致）
+2.模糊匹配
+- Module节点dump名称一致，两个匹配上的Module节点, 忽略各自节点下所有api的dump调用次数，按照名称一致+Module节点内的调用顺序进行匹配
+- ![fuzzy_match_pt.png](./img/visualization/fuzzy_match_pt.png)
+- 参数shape一致
+#### 3.1.2 溢出等级说明
+- medium：输入异常，输出正常场景
+- high：输入异常，输出异常；输出norm值相较于输入存在异常增大情况
+- critical：输入正常，输出异常场景
+#### 3.1.3 比对文件说明
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump",
+"is_print_compare_log": true
+}
+```
+**比对文件参数说明**：
+| 参数名               | 说明                                                                         | 是否必选 |
+|-------------------|----------------------------------------------------------------------------|------|
+| npu_path   | 指定待调试侧比对路径，str类型。工具根据路径格式自动进行单rank比对、多rank批量比对或多step批量比对，具体格式参考3.2 图构建和比对。 | 是    |
+| bench_path  | 指定标杆侧比对路径，str类型。单图构建场景可以不配置。                                               | 否    |
+| is_print_compare_log  | 配置是否开启单个算子的日志打屏。可取值 true 或 false，默认为 true。关闭后则只输出常规日志，bool 类型。             | 否    |
+### 3.2 图构建和比对
+**如果只是想查看一个模型的结构，请选择单图构建**；
+**如果想比较两个模型的结构差异和精度数据差异，请选择双图比对**。
+#### 3.2.1 单图构建
+展示模型结构、精度数据、堆栈信息。
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"is_print_compare_log": true
+}
+```
+npu_path格式：必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+```
+├── npu_path
+│   ├── dump_tensor_data（配置dump的task参数选择tensor时存在）
+|   |    ├── Tensor.permute.1.forward.pt
+|   |    ├── MyModule.0.forward.input.pt
+|   |    ...
+|   |    └── Fcuntion.linear.5.backward.output.pt
+|   ├── dump.json         # 数据信息
+|   ├── stack.json        # 调用栈信息
+|   └── construct.json    # 分层分级结构
+```
+**2. 执行命令**：
+```
+msprobe -f pytorch graph -i ./compare.json -o ./output
+```
+#### 3.2.2 双图比对
+展示模型结构、结构差异、精度数据和精度比对指标、精度是否疑似有问题（精度比对指标差异越大颜色越深）。
+当前比对支持三种类型的dump数据，分级可视化工具比对时会自动判断：
+1.统计信息：仅dump了API和Module的输入输出数据统计信息，占用磁盘空间小；
+2.真实数据：不仅dump了API和Module的输入输出数据统计信息，还将tensor进行存盘，占用磁盘空间大，但比对更加准确；
+3.md5：dump了API和Module的输入输出数据统计信息和md5信息。
+dump类型如何配置见[数据采集配置文件介绍](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/docs/02.config_introduction.md)
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump",
+"is_print_compare_log": true
+}
+```
+npu_path或bench_path格式：必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+```
+├── npu_path或bench_path
+│   ├── dump_tensor_data（仅配置dump的task参数选择tensor时存在）
+|   |    ├── Tensor.permute.1.forward.pt
+|   |    ├── MyModule.0.forward.input.pt
+|   |    ...
+|   |    └── Function.linear.5.backward.output.pt
+|   ├── dump.json         # 数据信息
+|   ├── stack.json        # 调用栈信息
+|   └── construct.json    # 分层分级结构，level为L1时，construct.json内容为空
+```
+**2. 执行命令**：
+```
+msprobe -f pytorch graph -i ./compare.json -o ./output
+```
+比对完成后将在**output**下生成一个**vis后缀文件**。
+#### 3.2.3 批量构建或比对
+##### 3.2.3.1 多rank批量构建或比对
+批量构建或比对一个step下的所有rank的数据
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump", # 只进行图构建可不配置
+"is_print_compare_log": true
+}
+```
+npu_path或bench_path格式：必须只包含rank+数字格式的文件夹，且每个rank文件夹中必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+进行批量图比对时，npu_path和bench_path中包含的rank+数字格式的文件夹必须数量一致且能够一一对应。
+```
+├── npu_path或bench_path
+|   ├── rank0
+|   │   ├── dump_tensor_data（仅配置dump的task参数选择tensor时存在）
+|   |   |    ├── Tensor.permute.1.forward.pt
+|   |   |    ├── MyModule.0.forward.input.pt
+|   |   |    ...
+|   |   |    └── Function.linear.5.backward.output.pt
+|   |   ├── dump.json         # 数据信息
+|   |   ├── stack.json        # 算子调用栈信息
+|   |   └── construct.json    # 分层分级结构，level为L1时，construct.json内容为空
+|   ├── rank1
+|   |   ├── dump_tensor_data
+|   |   |   └── ...
+|   |   ├── dump.json
+|   |   ├── stack.json
+|   |   └── construct.json
+|   ├── ...
+|   |
+|   └── rankn
+```
+**2. 执行命令**：
+```
+msprobe -f pytorch graph -i ./compare.json -o ./output
+```
+比对完成后将在**output**下生成n个**vis后缀文件**。
+图构建：
+```
+├── build_rank0_{timestamp}.vis
+├── build_rank1_{timestamp}.vis
+├── build_rank2_{timestamp}.vis
+├── build_rank3_{timestamp}.vis
+├── ...
+├── build_rankn_{timestamp}.vis
+```
+图比对：
+```
+├── compare_rank0_{timestamp}.vis
+├── compare_rank1_{timestamp}.vis
+├── compare_rank2_{timestamp}.vis
+├── compare_rank3_{timestamp}.vis
+├── ...
+├── compare_rankn_{timestamp}.vis
+```
+##### 3.2.3.2 多step批量构建或比对
+批量构建或比对多个step下的所有rank的数据
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump", # 只进行图构建可不配置
+"is_print_compare_log": true
+}
+```
+npu_path或bench_path格式：必须只包含step+数字格式的文件夹，且每个step文件夹中必须只包含rank+数字格式的文件夹，每个rank文件夹中必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+进行批量图比对时，npu_path和bench_path中包含的step+数字格式的文件夹必须数量一致且能够一一对应，每个step文件夹中包含的rank+数字格式的文件夹必须数量一致且能够一一对应。
+```
+├── npu_path或bench_path
+│   ├── step0
+│   |   ├── rank0
+│   |   │   ├── dump_tensor_data（仅配置dump的task参数选择tensor时存在）
+|   |   |   |    ├── Tensor.permute.1.forward.pt
+|   |   |   |    ├── MyModule.0.forward.input.pt
+|   |   |   |    ...
+|   |   |   |    └── Function.linear.5.backward.output.pt
+│   |   |   ├── dump.json             # 数据信息
+│   |   |   ├── stack.json            # 调用栈信息
+│   |   |   └── construct.json        # 分层分级结构，level为L1时，construct.json内容为空
+│   |   ├── rank1
+|   |   |   ├── dump_tensor_data
+|   |   |   |   └── ...
+│   |   |   ├── dump.json
+│   |   |   ├── stack.json
+|   |   |   └── construct.json
+│   |   ├── ...
+│   |   |
+|   |   └── rankn
+│   ├── step1
+│   |   ├── ...
+│   ├── step2
+```
+**2. 执行命令**：
+```
+msprobe -f pytorch graph -i ./compare.json -o ./output
+```
+比对完成后将在**output**下生成若干个**vis后缀文件**。
+图构建：
+```
+├── build_step0_rank0_{timestamp}.vis
+├── build_step0_rank1_{timestamp}.vis
+├── build_step0_rank2_{timestamp}.vis
+├── build_step0_rank3_{timestamp}.vis
+├── build_step1_rank0_{timestamp}.vis
+├── build_step1_rank1_{timestamp}.vis
+├── build_step1_rank2_{timestamp}.vis
+├── build_step1_rank3_{timestamp}.vis
+├── ...
+├── build_stepn_rankn_{timestamp}.vis
+```
+图比对：
+```
+├── compare_step0_rank0_{timestamp}.vis
+├── compare_step0_rank1_{timestamp}.vis
+├── compare_step0_rank2_{timestamp}.vis
+├── compare_step0_rank3_{timestamp}.vis
+├── compare_step1_rank0_{timestamp}.vis
+├── compare_step1_rank1_{timestamp}.vis
+├── compare_step1_rank2_{timestamp}.vis
+├── compare_step1_rank3_{timestamp}.vis
+├── ...
+├── compare_stepn_rankn_{timestamp}.vis
+```
+## 4.启动tensorboard
+### 4.1 可直连的服务器
+将生成vis文件的路径**out_path**传入--logdir
+```
+tensorboard --logdir out_path --bind_all --port [可选，端口号]
+```
+启动后会打印日志:
+![tensorboard_1](./img/visualization/tensorboard_1.png)
+ubuntu是机器地址，6008是端口号。
+**注意，ubuntu需要替换为真实的服务器地址，例如真实的服务器地址为10.123.456.78，则需要在浏览器窗口输入http://10.123.456.78:6008**
+### 4.2 不可直连的服务器
+**如果链接打不开（服务器无法直连需要挂vpn才能连接等场景），可以尝试使用vscode连接服务器，在vscode终端输入：**
+```
+tensorboard --logdir out_path
+```
+![tensorboard_2](./img/visualization/tensorboard_2.png)
+按住CTRL点击链接即可
+## 5.浏览器查看
+### 5.1 浏览器打开图
+推荐使用谷歌浏览器，在浏览器中输入机器地址+端口号回车，出现TensorBoard页面，其中/#graph_ascend会自动拼接。
+![vis_browser_1](./img/visualization/vis_browser_1.png)
+如果您切换了TensorBoard的其他功能，此时想回到模型分级可视化页面，可以点击左上方的**GRAPH_ASCEND**
+![vis_browser_2](./img/visualization/vis_browser_2.png)
+### 5.2 查看图
+![vis_show_info.png](./img/visualization/vis_show_info.png)
+### 5.3 名称搜索
+![vis_search_info.png](./img/visualization/vis_search_info.png)
+### 5.4 精度筛选
+![vis_precision_info.png](./img/visualization/vis_precision_info.png)
+### 5.5 未匹配节点筛选
+节点匹配规则：
+1.名称一致
+2.节点输入输出参数数量一致，参数type、shape一致
+3.节点的层级一致（父节点们一致）
+![vis_unmatch_info.png](./img/visualization/vis_unmatch_info.png)
+## 6.图比对说明
+### 颜色
+颜色越深，精度比对差异越大，越可疑，具体信息可见浏览器页面左下角颜色图例。
+### 疑似有精度问题判定
+#### 真实数据模式
+节点中所有输入的最小双千指标和所有输出的最小双千分之一指标的差值，反映了双千指标的下降情况，**值越大精度差距越大，颜色标记越深**。
+``One Thousandth Err Ratio（双千分之一）精度指标：Tensor中的元素逐个与对应的标杆数据对比，相对误差小于千分之一的比例占总元素个数的比例，比例越接近1越好``
+#### 统计信息模式
+节点中输出的统计量相对误差，**值越大精度差距越大，颜色标记越深**。
+``相对误差：abs（(npu统计值 - bench统计值) / bench统计值)``
+#### md5模式
+节点中任意输入输出的md5值不同。
+## 7.附录
+### 7.1 自定义映射文件（Layer）
+文件名格式：\*.yaml，*为文件名，可自定义。
+文件内容示例：
+```yaml
+PanGuVLMModel:                                    # Layer层名称
+  vision_model: language_model.vision_encoder     # 模型代码中嵌套的Layer层名称
+  vision_projection: language_model.projection
+RadioViTModel:
+  input_conditioner: radio_model.input_conditioner
+  patch_generator: radio_model.patch_generator
+  radio_model: radio_model.transformer
+ParallelTransformerLayer:
+  input_norm: input_layernorm
+  post_attention_norm: post_attention_layernorm
+GPTModel:
+  decoder: encoder
+SelfAttention:
+  linear_qkv: query_key_value
+  core_attention: core_attention_flash
+  linear_proj: dense
+MLP:
+  linear_fc1: dense_h_to_4h
+  linear_fc2: dense_4h_to_h
+```
+Layer层名称需要从模型代码中获取。
+yaml文件中只需配置待调试侧与标杆侧模型代码中功能一致但名称不同的Layer层，名称相同的Layer层会被自动识别并映射。
+模型代码示例：
+![ms_dump](./img/ms_layer.png)
+### 7.2 堆栈信息说明
+**精简堆栈**
+保留一条当前模块或api的调用信息
+```json
+{
+    "Module.layer1.0.bn1.BatchNorm2d.forward.0": [
+        "File /home/torchvision/models/resnet.py, line 93, in forward, \n out = self.bn1(out)"
+       ]
+}
+```
+**完整堆栈**
+当前模块或api完整的调用信息
+```json
+{
+    "Module.layer1.0.bn1.BatchNorm2d.forward.0": [
+        "File /home/torchvision/models/resnet.py, line 93, in forward, \n out = self.bn1(out)",
+        "File /home/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
+        "File /home/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
+        "File /home/torch/nn/modules/container.py, line 215, in forward, \n input = module(input)",
+        "File /home/torch/nn/modules/module.py, line 1568, in _call_impl, \n result = forward_call(*args, **kwargs)",
+        "File /home/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
+        "File /home/torchvision/models/resnet.py, line 273, in _forward_impl, \n x = self.layer1(x)",
+        "File /home/torchvision/models/resnet.py, line 285, in forward, \n return self._forward_impl(x)",
+        "File /home/torch/nn/modules/module.py, line 1527, in _call_impl, \n return forward_call(*args, **kwargs)",
+        "File /home/torch/nn/modules/module.py, line 1518, in _wrapped_call_impl, \n return self._call_impl(*args, **kwargs)",
+        "File /home/visualization/resnet18.py, line 40, in <module>, \n outputs = model(inputs)"
+       ]
+}
+```
+# FAQ
+1. 图比对场景，节点呈现灰色，且没有精度比对数据，怎么处理？
+节点呈现灰色，代表左边待调试侧节点与右边标杆侧节点没有匹配上，可能有以下几点原因：
+- **标杆侧确实没有能与待调试侧匹配上的节点**，属于代码实现上的差异，请确认此差异是否正常，是否会影响到整网精度。
+- **节点的输入或输出type、shape不一致，参数个数不一致，节点所在层级的父层级不一致**，导致节点无法匹配，具体匹配规则见[匹配说明](#311-匹配说明)，可尝试使用模糊匹配功能，如何使用此功能请参考[构图命令行说明](#31-构图命令行说明)。如果是参数shape不一致，即使是模糊匹配功能也无法让节点匹配上，请检查参数shape不一致是否合理。
+- **节点名称不一致**，导致节点无法匹配，可使用layer mapping功能，如何使用此功能请参考[构图命令行说明](#31-构图命令行说明)，如何自定义映射文件请参考[模型分级可视化如何配置layer mapping映射文件](./visualization/layer_mapping_example.md)。

mindstudio-probe 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.2.1py3-none-any.whl