PyPI - mindstudio-probe - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +39 -3
msprobe/config.json +1 -3
msprobe/core/advisor/advisor.py +8 -3
msprobe/core/common/const.py +113 -13
msprobe/core/common/exceptions.py +25 -3
msprobe/core/common/file_utils.py +150 -26
msprobe/core/common/inplace_op_checker.py +15 -0
msprobe/core/common/log.py +27 -9
msprobe/core/common/utils.py +182 -69
msprobe/core/common_config.py +44 -15
msprobe/core/compare/acc_compare.py +207 -142
msprobe/core/compare/check.py +2 -5
msprobe/core/compare/compare_cli.py +21 -4
msprobe/core/compare/highlight.py +124 -55
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/npy_compare.py +52 -23
msprobe/core/compare/utils.py +272 -247
msprobe/core/data_dump/data_collector.py +13 -11
msprobe/core/data_dump/data_processor/base.py +46 -16
msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
msprobe/core/data_dump/scope.py +113 -34
msprobe/core/grad_probe/constant.py +27 -13
msprobe/core/grad_probe/grad_compare.py +18 -1
msprobe/core/grad_probe/utils.py +30 -2
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +10 -0
msprobe/docs/02.config_introduction.md +49 -22
msprobe/docs/03.config_examples.md +2 -9
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +3 -1
msprobe/docs/06.data_dump_MindSpore.md +157 -90
msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
msprobe/docs/17.grad_probe.md +5 -6
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/FAQ.md +3 -0
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/mindspore/__init__.py +15 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +33 -12
msprobe/mindspore/common/const.py +33 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +43 -4
msprobe/mindspore/compare/distributed_compare.py +22 -22
msprobe/mindspore/compare/ms_compare.py +271 -248
msprobe/mindspore/compare/ms_graph_compare.py +81 -47
msprobe/mindspore/debugger/debugger_config.py +4 -1
msprobe/mindspore/debugger/precision_debugger.py +7 -1
msprobe/mindspore/dump/dump_tool_factory.py +3 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
msprobe/mindspore/dump/jit_dump.py +17 -5
msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
msprobe/mindspore/free_benchmark/common/utils.py +19 -4
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
msprobe/mindspore/grad_probe/global_context.py +28 -8
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +36 -30
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +3 -2
msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
msprobe/pytorch/bench_functions/swiglu.py +10 -2
msprobe/pytorch/common/parse_json.py +6 -6
msprobe/pytorch/common/utils.py +56 -5
msprobe/pytorch/compare/distributed_compare.py +8 -9
msprobe/pytorch/compare/pt_compare.py +8 -6
msprobe/pytorch/debugger/debugger_config.py +19 -15
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +15 -0
msprobe/pytorch/free_benchmark/common/params.py +8 -1
msprobe/pytorch/free_benchmark/common/utils.py +26 -4
msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/wrap_functional.py +14 -12
msprobe/pytorch/module_processer.py +2 -5
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/__init__.py +0 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +29 -38
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +53 -32
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
msprobe/pytorch/online_dispatch/utils.py +49 -21
msprobe/pytorch/parse_tool/lib/compare.py +12 -18
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
msprobe/pytorch/parse_tool/lib/utils.py +16 -35
msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
msprobe/pytorch/pt_config.py +31 -8
msprobe/pytorch/service.py +15 -5
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
msprobe/docs/04.acl_config_examples.md +0 -78
msprobe/mindspore/compare/layer_mapping.py +0 -146
msprobe/mindspore/compare/modify_mapping.py +0 -107
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0

msprobe/docs/22.visualization_MindSpore.md ADDED Viewed

@@ -0,0 +1,384 @@
+# MindSpore 场景的分级可视化构图比对
+分级可视化工具将msprobe工具dump的精度数据进行解析，还原模型图结构，实现模型各个层级的精度数据比对，方便用户理解模型结构、分析精度问题。
+工具支持MindSpore版本：2.4.0
+## 1.依赖安装
+分级可视化工具依赖**msprobe工具**和**tensorboard。**
+### 1.1 安装msprobe工具
+现阶段分级可视化工具还未集成在已发布的msprobe工具中，需要从源码安装，请参考从源码安装章节。
+[msprobe工具安装](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/docs/01.installation.md)
+### 1.2 安装tb_graph_ascend
+**请安装tb_graph_ascend，否则无法解析构图结果。**
+[tb_graph_ascend下载](https://mindstudio-sample.obs.cn-north-4.myhuaweicloud.com/GRAPH_ASCEND/tb_graph_ascend-0.1.0-py3-none-any.whl)
+``pip3 install``即可。
+## 2.模型结构数据采集
+[MindSpore场景的精度数据采集](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md)
+**仅支持动态图场景，需要选择level为L0（cell信息）或者mix（cell信息+api信息），才能采集到模型结构数据，即采集结果件construct.json内容不为空**。
+## 3.生成图结构文件
+### 3.1 构图命令行说明
+**命令示例如下**：
+```
+msprobe -f mindspore graph -i ./compare.json -o ./output
+```
+**命令行参数说明**：
+| 参数名               | 说明                                                                                                                                                            | 是否必选 |
+|-------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- |
+| -i 或 --input_path   | 指定比对文件，str 类型。                                                                                                                                                | 是       |
+| -o 或 --output_path  | 配置比对结果文件存盘目录，str 类型。文件名称基于时间戳自动生成，格式为：`compare_{timestamp}.vis`。                                                                                              | 是       |
+| -lm 或 --layer_mapping| 跨框架比对，MindSpore和PyTorch的比对场景。配置该参数时表示开启跨框架Layer层的比对功能，指定模型代码中的Layer层后，可以识别对应dump数据中的模块或API。需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件（Layer）](#71-自定义映射文件layer)。 | 否    |
+**比对文件说明**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump",
+"is_print_compare_log": true
+}
+```
+**比对文件参数说明**：
+| 参数名               | 说明                                                                                                    | 是否必选 |
+|-------------------|-------------------------------------------------------------------------------------------------------|------|
+| npu_path   | 指定待调试侧比对路径，str类型。工具根据路径格式自动进行单rank比对、多rank批量比对或多step批量比对，具体格式参考3.2 图构建和比对。           | 是    |
+| bench_path  | 指定标杆侧比对路径，str类型。单图构建场景可以不配置 | 否    |
+| is_print_compare_log  | 配置是否开启单个算子的日志打屏。可取值 true 或 false，默认为 true。关闭后则只输出常规日志，bool 类型。                                        | 否    |
+### 3.2 图构建和比对
+**如果只是想查看一个模型的结构，请选择单图构建**；
+**如果想比较两个模型的结构差异和精度数据差异，请选择双图比对**。
+#### 3.2.1 单图构建
+展示模型结构、精度数据、堆栈信息。
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"is_print_compare_log": true
+}
+```
+npu_path格式：必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+```
+├── npu_path
+│   ├── dump_tensor_data（配置dump的task参数选择tensor时存在）
+|   |    ├── MintFunctional.relu.0.backward.input.0.npy
+|   |    ├── Mint.abs.0.forward.input.0.npy
+|   |    ...
+|   |    └── Cell.relu.ReLU.forward.0.input.0.npy
+|   ├── dump.json         # 数据信息
+|   ├── stack.json        # 调用栈信息
+|   └── construct.json    # 分层分级结构，level为L1时，construct.json内容为空
+```
+**2. 执行命令**：
+```
+msprobe -f mindspore graph -i ./compare.json -o ./output
+```
+#### 3.2.2 双图比对
+展示模型结构、结构差异、精度数据和精度比对指标、精度是否疑似有问题（精度比对指标差异越大颜色越深）。
+当前比对支持三种类型的dump数据，分级可视化工具比对时会自动判断：
+1.统计信息：仅dump了API和Module的输入输出数据统计信息，占用磁盘空间小；
+2.真实数据：不仅dump了API和Module的输入输出数据统计信息，还将tensor进行存盘，占用磁盘空间大，但比对更加准确；
+3.md5：dump了API和Module的输入输出数据统计信息和md5信息。
+dump类型如何配置见[数据采集配置文件介绍](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/docs/02.config_introduction.md)
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump",
+"is_print_compare_log": true
+}
+```
+npu_path或bench_path格式：必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+```
+├── npu_path或bench_path
+│   ├── dump_tensor_data（配置dump的task参数选择tensor时存在）
+|   |    ├── MintFunctional.relu.0.backward.input.0.npy
+|   |    ├── Mint.abs.0.forward.input.0.npy
+|   |    ...
+|   |    └── Cell.relu.ReLU.forward.0.input.0.npy
+|   ├── dump.json         # 数据信息
+|   ├── stack.json        # 调用栈信息
+|   └── construct.json    # 分层分级结构，level为L1时，construct.json内容为空
+```
+**2. 执行命令**：
+```
+msprobe -f mindspore graph -i ./compare.json -o ./output
+```
+比对完成后将在**output**下生成一个**vis后缀文件**。
+#### 3.2.3 批量构建或比对
+##### 3.2.3.1 多rank批量构建或比对
+批量构建或比对一个step下的所有rank的数据
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump", # 只进行图构建可不配置
+"is_print_compare_log": true
+}
+```
+npu_path或bench_path格式：必须只包含rank+数字格式的文件夹，且每个rank文件夹中必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+进行批量图比对时，npu_path和bench_path中包含的rank+数字格式的文件夹必须数量一致且能够一一对应。
+```
+├── npu_path或bench_path
+|   ├── rank0
+|   │   ├── dump_tensor_data（仅配置dump的task参数选择tensor时存在）
+|   |   |    ├── MintFunctional.relu.0.backward.input.0.npy
+|   |   |    ├── Mint.abs.0.forward.input.0.npy
+|   |   |    ...
+|   |   |    └── Cell.relu.ReLU.forward.0.input.0.npy
+|   |   ├── dump.json         # 数据信息
+|   |   ├── stack.json        # 算子调用栈信息
+|   |   └── construct.json    # 分层分级结构，level为L1时，construct.json内容为空
+|   ├── rank1
+|   |   ├── dump_tensor_data
+|   |   |   └── ...
+|   |   ├── dump.json
+|   |   ├── stack.json
+|   |   └── construct.json
+|   ├── ...
+|   |
+|   └── rankn
+```
+**2. 执行命令**：
+```
+msprobe -f mindspore graph -i ./compare.json -o ./output
+```
+比对完成后将在**output**下生成n个**vis后缀文件**。
+图构建：
+```
+├── build_rank0_{timestamp}.vis
+├── build_rank1_{timestamp}.vis
+├── build_rank2_{timestamp}.vis
+├── build_rank3_{timestamp}.vis
+├── ...
+├── build_rankn_{timestamp}.vis
+```
+图比对：
+```
+├── compare_rank0_{timestamp}.vis
+├── compare_rank1_{timestamp}.vis
+├── compare_rank2_{timestamp}.vis
+├── compare_rank3_{timestamp}.vis
+├── ...
+├── compare_rankn_{timestamp}.vis
+```
+##### 3.2.3.2 多step批量构建或比对
+批量构建或比对多个step下的所有rank的数据
+**1. 准备比对文件**：
+以在当前目录创建 ./compare.json 为例。
+```
+{
+"npu_path": "./npu_dump",
+"bench_path": "./bench_dump", # 只进行图构建可不配置
+"is_print_compare_log": true
+}
+```
+npu_path或bench_path格式：必须只包含step+数字格式的文件夹，且每个step文件夹中必须只包含rank+数字格式的文件夹，每个rank文件夹中必须包含dump.json、stack.json和construct.json，且construct.json不能为空。如果construct.json为空，请检查dump的level参数是否没有选择L0或者mix。
+进行批量图比对时，npu_path和bench_path中包含的step+数字格式的文件夹必须数量一致且能够一一对应，每个step文件夹中包含的rank+数字格式的文件夹必须数量一致且能够一一对应。
+```
+├── npu_path或bench_path
+│   ├── step0
+│   |   ├── rank0
+│   |   │   ├── dump_tensor_data（仅配置dump的task参数选择tensor时存在）
+|   |   |   |    ├── MintFunctional.relu.0.backward.input.0.npy
+|   |   |   |    ├── Mint.abs.0.forward.input.0.npy
+|   |   |   |    ...
+|   |   |   |    └── Cell.relu.ReLU.forward.0.input.0.npy
+│   |   |   ├── dump.json             # 数据信息
+│   |   |   ├── stack.json            # 调用栈信息
+│   |   |   └── construct.json        # 分层分级结构，level为L1时，construct.json内容为空
+│   |   ├── rank1
+|   |   |   ├── dump_tensor_data
+|   |   |   |   └── ...
+│   |   |   ├── dump.json
+│   |   |   ├── stack.json
+|   |   |   └── construct.json
+│   |   ├── ...
+│   |   |
+|   |   └── rankn
+│   ├── step1
+│   |   ├── ...
+│   ├── step2
+```
+**2. 执行命令**：
+```
+msprobe -f mindspore graph -i ./compare.json -o ./output
+```
+比对完成后将在**output**下生成若干个**vis后缀文件**。
+图构建：
+```
+├── build_step0_rank0_{timestamp}.vis
+├── build_step0_rank1_{timestamp}.vis
+├── build_step0_rank2_{timestamp}.vis
+├── build_step0_rank3_{timestamp}.vis
+├── build_step1_rank0_{timestamp}.vis
+├── build_step1_rank1_{timestamp}.vis
+├── build_step1_rank2_{timestamp}.vis
+├── build_step1_rank3_{timestamp}.vis
+├── ...
+├── build_stepn_rankn_{timestamp}.vis
+```
+图比对：
+```
+├── compare_step0_rank0_{timestamp}.vis
+├── compare_step0_rank1_{timestamp}.vis
+├── compare_step0_rank2_{timestamp}.vis
+├── compare_step0_rank3_{timestamp}.vis
+├── compare_step1_rank0_{timestamp}.vis
+├── compare_step1_rank1_{timestamp}.vis
+├── compare_step1_rank2_{timestamp}.vis
+├── compare_step1_rank3_{timestamp}.vis
+├── ...
+├── compare_stepn_rankn_{timestamp}.vis
+```
+## 4.启动tensorboard
+将生成vis文件的路径**out_path**传入--logdir
+```
+tensorboard --logdir out_path --bind_all --port [可选，端口号]
+```
+启动后会打印日志。
+``TensorBoard 2.15.1 at http://localhost.localdomain:6008/#graphs (Press CTRL+C to quit)``
+localhost.localdomain是机器地址，6008是端口号。
+**注意，localhost.localdomain需要替换为真实的服务器地址，例如真实的服务器地址为10.123.456.78，则需要在浏览器窗口输入http://10.123.456.78:6008/#graphs**
+**如果链接打不开(例如服务器无法直连需要挂vpn才能连接的场景)，可以尝试使用vscode连接服务器，在vscode终端输入：**
+```
+tensorboard --logdir out_path
+```
+CTRL+C点击链接即可
+## 5.浏览器查看
+推荐使用谷歌浏览器，在浏览器中输入机器地址+端口号回车，出现TensorBoard页面，右上方选择GRAPHS_ASCEND即可展示模型结构图。
+节点需要双击打开。
+键盘WS可放大缩小，AD可左右移动，鼠标滚轮可上下移动。
+## 6.图比对说明
+### 颜色
+颜色越深，精度比对差异越大，越可疑，具体信息可见浏览器页面左下角颜色图例。
+### 疑似有精度问题判定
+#### 真实数据模式
+节点中所有输入的最小双千指标和所有输出的最小双千分之一指标的差值，反映了双千指标的下降情况，**值越大精度差距越大，颜色标记越深**。
+``One Thousandth Err Ratio（双千分之一）精度指标：Tensor中的元素逐个与对应的标杆数据对比，相对误差小于千分之一的比例占总元素个数的比例，比例越接近1越好``
+#### 统计信息模式
+节点中输出的统计量相对误差，**值越大精度差距越大，颜色标记越深**。
+``相对误差：abs（(npu统计值 - bench统计值) / bench统计值)``
+其中小值不使用相对误差来判断精度差异，而是使用**绝对误差**来判断精度差异
+**判定为小值的阈值：**
+   - float32：e-6
+   - float16：e-3
+   - bfloat16：e-3
+**小值域的绝对误差阈值：**
+   - float32：e-6
+   - float16：e-3
+   - bfloat16：e-3
+#### md5模式
+节点中任意输入输出的md5值不同。
+## 7.附录
+### 7.1 自定义映射文件（Layer）
+文件名格式：\*.yaml，*为文件名，可自定义。
+文件内容示例：
+```yaml
+ParallelAttention:                 # Layer层名称
+  qkv_proj: query_key_value        # 冒号左侧为MindSpore框架模型代码中嵌套的Layer层名称，冒号右侧为PyTorch框架模型代码中嵌套的Layer层名称
+  out_proj: dense
+ParallelTransformerLayer:
+  attention: self_attention
+Embedding:
+  dropout: embedding_dropout
+ParallelMLP:
+  mapping: dense_h_to_4h
+  projection: dense_4h_to_h
+PipelineCell:
+  model: module
+Cell:
+  network_with_loss: module
+layers:                           # 手动映射MindSpore与PyTorch模型代码中的Layer层序号
+  '5': '0'
+  '6': '1'
+  '7': '2'
+  '8': '3'
+  '9': '4'
+```
+Layer层名称需要从模型代码中获取。
+yaml文件中只需配置MindSpore与PyTorch模型代码中功能一致但名称不同的Layer层，名称相同的Layer层会被自动识别并映射。
+模型代码示例：
+![ms_dump](./img/ms_layer.png)

msprobe/docs/23.tool_function_introduction.md ADDED Viewed

@@ -0,0 +1,28 @@
+# msprobe 工具功能模块简介、适用场景和当前版本局限性
+## 1 PyTorch框架
+| 功能名（英文）                                                                            | 简介                                                            | 适用场景/优势                                                                  | 当前版本局限性                                                                                                         |
+|------------------------------------------------------------------------------------|---------------------------------------------------------------|--------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
+| [数据采集<br>（dump）](./05.data_dump_PyTorch.md)                                        | 采集模型训练过程中的API或Module层级的前反向输入输出数据，包括层次关系、统计值信息、真实数据和调用栈等。      | 1、将模型中训练的API或Module的前反向输入输出数据保存下来分析<br> 2、模型出现溢出时，可用于查看哪些API或Module出现了溢出 | 1、API级数据采集仅支持白名单列表上的API<br>2、工具会做一些同步操作，引入工具可能会导致一些同步问题消失<br>3、当前对inplace操作API或Module的支持度有限<br>4、暂不支持参数及参数梯度的采集 |
+| [离线预检<br>（api_accuracy_checker）](./07.accuracy_checker_PyTorch.md)                 | 为网络中每个API创建用例，检验其精度，并根据不同比对算法综合判定API在NPU上的精度是否达标，快速找出精度差异API。 | 1、对模型中所有的API做精度初步排查<br>2、精度排查不受模型累计误差影响                                  | 1、依赖GPU环境<br>2、不支持通信算子<br>3、仅支持部分融合算子                                                                           |
+| [整网比对<br>（compare）](./10.accuracy_compare_PyTorch.md)                              | 计算模型整网NPU和标杆设备的精度误差指标，标记精度异常API或Module，助力快速定位精度问题根因。          | 1、整网比对定位精度可疑算子                                                           | 1、由于使用整网dump数据，定位的可疑算子受累计误差影响<br>2、当模型规模较大时，比对所需时间较长                                                            |
+| [在线预检<br>（online_api_accuracy_checker）](./08.accuracy_checker_online_PyTorch.md)   | 通过TCP通信或共享存储空间的方式，进行在线精度预检，解决离线预检大数据量落盘、传输困难痛点。               | 1、使用离线预检，数据量较大落盘困难或传输耗时长时，可通过在线预检进行精度排查                                  | 1、依赖GPU环境，NPU和GPU能够通信<br>2、重计算模式下，不支持反向aten算子预检                                                                 |
+| [溢出检查<br>（overflow_checker）](./12.overflow_check_PyTorch.md)                       | 检测模型计算过程的输入输出，并在溢出时落盘数据，助力用户快速定位溢出位置。                         | 1、当模型出现溢出时，用于快速定位最先溢出的API或Module<br>2、相比数据采集，性能更优，磁盘压力更小                 | 1、局限性同数据采集                                                                                                      |
+| [数据解析<br>（parse_tool）](./14.data_parse_PyTorch.md)                                 | 互交式界面处理解析kernel层级dump数据，便于查看分析。                               | 1、比对kernel层级dump数据的一致性                                                   | 1、仅限于NPU                                                                                                        |
+| [无标杆比对<br>（free_benchmark）](./15.free_benchmarking_PyTorch.md)                     | 不依赖标杆数据，通过对算子输入增加微小扰动，计算扰动后输出与原始输出的相对误差，识别有精度风险算子。            | 1、无标杆数据场景下的算子精度排查<br>2、对个别算子进行升精度、“to cpu”等操作，以验证其对模型loss的影响             | 1、由于需要拷贝输入进行二次执行，所以在遇到大张量的输入时容易发生显存OOM的问题, 特别是反向比对过程。建议结合白名单使用<br>2、比对会延长训练时间，整网比对可能会造成严重的耗时膨胀，建议结合白名单使用        |
+| [梯度状态监测<br>（grad_probe）](./17.grad_probe.md)                                       | 可导出模型权重梯度数据并对比相似度，助力确认训练过程精度问题step和反向中的异常。                    | 1、需要分析梯度数据时<br>2、需要定位发生问题的step时                                          | 暂无                                                                                                              |
+| [在线精度比对<br>（online_dispatch）](./18.online_dispatch.md)                             | 训练过程中直接完成NPU和CPU的精度比对并输出比对结果。                                 | 1、执行一次就可获取NPU和CPU分别执行后的精度比对结果                                            | 暂无                                                                                                              |
+| [训练状态监控<br>（monitor）](./19.monitor.md)                                             | 收集模型训练过程中的激活值、梯度和优化器状态，助力分析计算、通信、优化器各部分异常情况。                  | 1、通过监控模块级统计量指标，快速定位异常模块位置，如loss出现nan                                     | 1、仅支持模块级别统计量指标分析<br>2、仅支持megatron、deepspeed框架<br>3、少量增加时间和显存膨胀                                                  |
+| [可视化比对<br>（visualization） ](./21.visualization_PyTorch.md)                         | 解析dump的精度数据，还原模型图结构，比对各层级精度数据，助力理解模型结构、分析精度问题。                | 1、整网精度比对定位可疑算子，通过浏览器展示比对结果，支持快速搜索到可疑算子<br>2、支持查看模型层级结果，比对模型层级结构差异        | 1、由于使用整网dump数据，定位的可疑算子受累计误差影响<br>2、当模型规模较大时，比对所需时间较长                                                            |
+## 2 MindSpore框架
+| 功能名（英文）                                                              | 简介                                                                | 适用场景/优势                                                                      | 当前版本局限性                                                                                                                                           |
+|----------------------------------------------------------------------|-------------------------------------------------------------------|------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------|
+| [数据采集<br>（dump）](./06.data_dump_MindSpore.md)                        | 采集模型训练过程中的API或Cell层级的前反向输入输出数据，包括层次关系、统计值信息、真实数据和调用栈等。            | 1、将模型中训练的API或Cell的前反向输入输出数据保存下来分析<br> 2、模型出现溢出时，可用于查看哪些API或Cell出现了溢出         | 1、API级数据采集仅支持白名单列表上的API<br>2、当前对inplace操作API或Cell的支持度有限<br>3、暂不支持参数及参数梯度的采集                                                                       |
+| [离线预检<br>（api_accuracy_checker）](./09.accuracy_checker_MindSpore.md) | 为网络中每个API创建用例，检验其精度，并根据不同比对算法综合判定API在NPU上的精度是否达标，快速找出精度差异API。     | 1、对模型中所有的API做精度初步排查<br>2、精度排查不受模型累计误差影响                                      | 1、仅针对MindSpore.mint API                                                                                                                           |
+| [整网比对<br>（compare）](./11.accuracy_compare_MindSpore.md)              | NPU精度数据与标杆数据的比对，支持MindSpore框架内和与PyTorch跨框架的比对，助力快速定位精度异常API或Cell。 | 1、MindSpore同框架静态图比对<br>2、MindSpore同框架动态图比对<br>3、MindSpore vs PyTorch跨框架动态图比对 | 1、部分PyTorch的API关联不到MindSpore，需要手动配置映射关系                                                                                                           |
+| [溢出检查<br>（overflow_checker）](./13.overflow_check_MindSpore.md)       | 检测模型计算过程的输入输出，并在溢出时落盘数据，助力用户快速定位溢出位置。                             | 1、当模型出现溢出时，可用于定位最先溢出的API或Cell或kernel<br>2、相比数据采集，性能更优，磁盘压力更小                 | 1、除具有与数据采集功能相同的局限性外，动态图场景下，不支持 Primitive 和 Jit 类 API 的检测<br>2、动态图场景下，仅支持检测API或Cell级别溢出<br>3、静态图场景下，仅支持检测kernel级别溢出                                |
+| [无标杆比对<br>（free_benchmark）](./16.free_benchmarking_MindSpore.md)     | 不依赖标杆数据，通过对算子输入增加微小扰动，计算扰动后输出与原始输出的相对误差，识别有精度风险算子。                | 1、无标杆数据场景下的算子精度排查<br>2、对个别算子进行升精度修复，验证其对模型loss的影响                            | 1、仅支持动态图场景<br>2、由于需要拷贝输入进行二次执行，所以在遇到大张量的输入时容易发生显存OOM的问题, 特别是反向比对过程。建议结合白名单使用<br>3、比对会延长训练时间，整网比对可能会造成严重的耗时膨胀，建议结合白名单使用<br>4、不支持“to cpu”操作，不支持预热功能 |
+| [可视化比对<br>（visualization） ](./22.visualization_MindSpore.md)         | 解析dump的精度数据，还原模型图结构，比对各层级精度数据，助力理解模型结构、分析精度问题。                    | 1、整网精度比对定位可疑算子，通过浏览器展示比对结果，支持快速搜索到可疑算子<br>2、支持查看模型层级结果，比对模型层级结构差异            | 1、由于使用整网dump数据，定位的可疑算子受累计误差影响<br>2、当模型规模较大时，比对所需时间较长                                                                                              |

msprobe/docs/FAQ.md CHANGED Viewed

@@ -10,6 +10,9 @@
    - 输入参数或输出参数类型当前工具不支持，会有日志打印提醒。
    - 输入或者输出tensor的dtype为bool时，Mean和Norm等字段为null。
+2. 如果存在namedtuple类型的数据作为nn.Module的输出，工具会将各字段数据dump下来，但是输出数据类型会被转成tuple，原因是什么？
+   - 这是由于pytorch框架自身，在注册module的backward hook时，会将namedtuple类型转成tuple类型。
 # 2 精度预检(PyTorch)
 1. 预检工具在 dump 和 run_ut 的过程中，是否需要同时开启或关闭 jit 编译（jit_compile）？

msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md ADDED Viewed

@@ -0,0 +1,211 @@
+# 动态图精度数据采集快速入门示例
+本示例将展示如何在 MindSpore 动态图模式下使用 msprobe 工具进行精度数据采集。
+## 1. 配置文件
+请在当前目录下创建一个名为 `config.json` 的配置文件，内容如下：
+```json
+{
+    "task": "statistics",
+    "dump_path": "./output",
+    "rank": [],
+    "step": ["0-2"],
+    "level": "L1",
+    "statistics": {
+        "scope": [],
+        "list": [],
+        "data_mode": [
+            "all"
+        ],
+        "summary_mode": "statistics"
+    }
+}
+```
+以上配置参数详细介绍和使用请参见[《config.json 配置文件介绍》](../02.config_introduction.md)和[《config.json 配置示例》](../03.config_examples.md#3-mindspore-动态图场景) 中的“MindSpore动态图场景”。
+## 2. 模型脚本
+在当前目录下创建一个 Python 脚本文件，例如 `alexnet_model.py`，将以下代码粘贴进去：
+```python
+import os
+import numpy as np
+import mindspore as ms
+from mindspore import nn, ops
+from mindspore import context
+from mindspore import Tensor
+from msprobe.mindspore import PrecisionDebugger, seed_all
+# 设置随机种子以确保结果可重现
+seed_all(seed=1234, mode=False, rm_dropout=True)
+# 配置文件路径
+script_dir = os.path.dirname(os.path.abspath(__file__))
+config_path = os.path.join(script_dir, 'config.json')
+# 初始化精度调试器
+debugger = PrecisionDebugger(config_path=config_path)
+# 设置 MindSpore 设备上下文
+context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend", device_id=0)
+# 定义卷积层
+def conv_layer(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid", has_bias=True):
+    return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding,
+                     has_bias=has_bias, pad_mode=pad_mode)
+# 定义全连接层
+def fc_layer(input_channels, out_channels, has_bias=True):
+    return nn.Dense(input_channels, out_channels, has_bias=has_bias)
+class AlexNet(nn.Cell):
+    """
+    AlexNet 模型定义
+    参数:
+    - num_classes: 分类数量
+    - channel: 输入通道数（图像的颜色通道数）
+    - phase: 模型运行阶段（'train' 或 'test'）
+    - include_top: 是否包含全连接层的顶部（最后的分类层）
+    """
+    def __init__(self, num_classes=10, channel=3, phase='train', include_top=True):
+        super(AlexNet, self).__init__()
+        # 卷积层
+        self.conv1 = conv_layer(channel, 64, 11, stride=4, pad_mode="same")
+        self.conv2 = conv_layer(64, 128, 5, pad_mode="same")
+        self.conv3 = conv_layer(128, 192, 3, pad_mode="same")
+        self.conv4 = conv_layer(192, 256, 3, pad_mode="same")
+        self.conv5 = conv_layer(256, 256, 3, pad_mode="same")
+        # 激活函数和池化层
+        self.relu = nn.ReLU()
+        self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='valid')
+        # 如果包括顶部（全连接层）
+        self.include_top = include_top
+        if self.include_top:
+            self.flatten = nn.Flatten()
+            self.fc1 = fc_layer(256 * 28 * 28, 4096)
+            self.fc2 = fc_layer(4096, 4096)
+            self.fc3 = fc_layer(4096, num_classes)
+        # 数学操作
+        self.add = ops.Add()
+        self.mul = ops.Mul()
+    def construct(self, x):
+        """定义前向传播过程"""
+        x = self.conv1(x)
+        x = self.add(x, 0.1)  # 偏置加法
+        x = self.mul(x, 2.0)  # 乘法操作
+        x = self.relu(x)  # ReLU 激活函数
+        x = ops.celu(x)
+        x = x + 2
+        # 打印每层输出形状，调试时可使用
+        print(f"After Conv1: {x.shape}")
+        x = self.max_pool2d(x)  # Max pooling 操作
+        print(f"After MaxPool: {x.shape}")  # 打印池化后的形状
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.conv3(x)
+        x = self.relu(x)
+        x = self.conv4(x)
+        x = self.relu(x)
+        x = self.conv5(x)
+        x = self.relu(x)
+        # 打印卷积层后的形状，调试时使用
+        print(f"After Conv5: {x.shape}")
+        # 可选的全连接层部分
+        if self.include_top:
+            x = self.flatten(x)
+            x = self.fc1(x)
+            x = self.fc2(x)
+            x = self.fc3(x)
+        return x
+# 前向函数
+def forward_fn(data, label):
+    out = net(data)
+    loss = criterion(out, label)
+    return loss
+# 训练步骤
+def train_step(data, label):
+    loss, grads = grad_fn(data, label)
+    optimizer(grads)
+    return loss
+# 测试模型
+if __name__ == "__main__":
+    net = AlexNet()
+    optimizer = nn.SGD(net.trainable_params(), learning_rate=0.01)
+    criterion = nn.MSELoss()
+    grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters)
+    # 生成数据和标签
+    batch_size = 1
+    num_classes = 10
+    data = np.random.normal(1, 1, (batch_size, 3, 227, 227)).astype(np.float32)
+    label = np.random.randint(0, num_classes, (batch_size,)).astype(np.float32)  # 注意此处类型应为 float32
+    # 转换为 MindSpore 张量
+    data = Tensor(data)
+    label = Tensor(label)
+    steps = 5
+    for i in range(steps):
+        debugger.start(net)  # 启动调试器
+        loss = train_step(data, label)  # 执行训练步骤
+        print(f"Step {i}, Loss: {loss}")
+        debugger.stop()  # 停止调试器
+        debugger.step()  # 计数步数
+```
+## 3. 运行训练脚本
+在命令行中执行以下命令：
+```bash
+python alexnet_model.py
+```
+## 4. 查看采集结果
+执行训练命令后，工具会将模型训练过程中的精度数据采集下来。
+日志中打印出现如下信息表示数据采集成功，即可手动停止模型训练查看采集数据。
+```markdown
+****************************************************************************
+*                        msprobe ends successfully.                        *
+****************************************************************************
+```
+## 5. 数据分析
+在 `dump_path` 参数指定的路径下（本例中为 `./output`），会出现如下目录结构，后续精度数据分析操作可使用 msprobe 工具的精度预检和精度比对等功能，详细流程请参见[《msprobe使用手册》](../../README.md#2-精度预检)。：
+```bash
+output/
+└── step0
+    └── rank
+        ├── construct.json             # level为L0时，保存Cell的层级关系信息。当前场景为空
+        ├── dump.json                  # 保存API前反向输入输出数据的统计量信息
+        └── stack.json                 # 保存API的调用栈
+```

msprobe/docs/img/compare_result.png ADDED Viewed

Binary file

msprobe/docs/img/monitor/cpu_info.png ADDED Viewed

Binary file

msprobe/mindspore/__init__.py CHANGED Viewed

@@ -1,2 +1,17 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger
 from msprobe.mindspore.common.utils import seed_all

mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.1.0py3-none-any.whl → 1.1.1py3-none-any.whl