PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/METADATA +4 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/RECORD +243 -191
msprobe/README.md +57 -21
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +224 -82
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +5 -3
msprobe/core/common/file_utils.py +274 -40
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +148 -72
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +640 -462
msprobe/core/compare/check.py +36 -107
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +217 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +4 -1
msprobe/core/compare/merge_result/merge_result.py +12 -6
msprobe/core/compare/multiprocessing_compute.py +227 -107
msprobe/core/compare/npy_compare.py +32 -16
msprobe/core/compare/utils.py +218 -244
msprobe/{mindspore/runtime.py → core/config_check/__init__.py} +2 -4
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{pytorch/parse.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +239 -0
msprobe/core/data_dump/data_collector.py +36 -9
msprobe/core/data_dump/data_processor/base.py +74 -53
msprobe/core/data_dump/data_processor/mindspore_processor.py +119 -78
msprobe/core/data_dump/data_processor/pytorch_processor.py +134 -96
msprobe/core/data_dump/json_writer.py +146 -57
msprobe/core/debugger/precision_debugger.py +143 -0
msprobe/core/grad_probe/constant.py +2 -1
msprobe/core/grad_probe/grad_compare.py +2 -2
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/core/service.py +356 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +157 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +89 -30
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +184 -50
msprobe/docs/06.data_dump_MindSpore.md +193 -28
msprobe/docs/07.accuracy_checker_PyTorch.md +13 -3
msprobe/docs/08.accuracy_checker_online_PyTorch.md +72 -10
msprobe/docs/09.accuracy_checker_MindSpore.md +19 -7
msprobe/docs/10.accuracy_compare_PyTorch.md +266 -102
msprobe/docs/11.accuracy_compare_MindSpore.md +117 -43
msprobe/docs/12.overflow_check_PyTorch.md +5 -3
msprobe/docs/13.overflow_check_MindSpore.md +6 -4
msprobe/docs/14.data_parse_PyTorch.md +4 -10
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +3 -3
msprobe/docs/19.monitor.md +211 -103
msprobe/docs/21.visualization_PyTorch.md +100 -28
msprobe/docs/22.visualization_MindSpore.md +103 -31
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +190 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +3 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -55
msprobe/mindspore/api_accuracy_checker/api_runner.py +25 -11
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +580 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +4 -3
msprobe/mindspore/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +451 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +11 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +73 -2
msprobe/mindspore/common/utils.py +157 -29
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +18 -398
msprobe/mindspore/compare/ms_graph_compare.py +20 -10
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +59 -7
msprobe/mindspore/debugger/precision_debugger.py +83 -90
msprobe/mindspore/dump/cell_dump_process.py +902 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +889 -0
msprobe/mindspore/dump/dump_tool_factory.py +18 -8
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +176 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +22 -12
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +42 -26
msprobe/mindspore/dump/jit_dump.py +35 -27
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -16
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +22 -12
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +9 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/mindspore_service.py +111 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/features.py +13 -1
msprobe/mindspore/monitor/module_hook.py +568 -444
msprobe/mindspore/monitor/optimizer_collect.py +331 -0
msprobe/mindspore/monitor/utils.py +71 -9
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +15 -13
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +206 -4
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +9 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +6 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +31 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -20
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +154 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +53 -19
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +50 -96
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +15 -61
msprobe/pytorch/dump/module_dump/module_processer.py +150 -114
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +155 -0
msprobe/pytorch/hook_module/hook_module.py +18 -22
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +193 -75
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +14 -4
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +336 -241
msprobe/pytorch/monitor/module_metric.py +17 -0
msprobe/pytorch/monitor/optimizer_collect.py +244 -224
msprobe/pytorch/monitor/utils.py +84 -4
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +13 -2
msprobe/pytorch/online_dispatch/dump_compare.py +8 -2
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +5 -4
msprobe/pytorch/pt_config.py +16 -11
msprobe/pytorch/pytorch_service.py +70 -0
msprobe/visualization/builder/graph_builder.py +69 -10
msprobe/visualization/builder/msprobe_adapter.py +24 -12
msprobe/visualization/compare/graph_comparator.py +63 -51
msprobe/visualization/compare/mode_adapter.py +22 -20
msprobe/visualization/graph/base_node.py +11 -4
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +2 -13
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +251 -104
msprobe/visualization/utils.py +26 -44
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -140
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -543
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -470
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/docs/27.dump_json_instruction.md CHANGED Viewed

@@ -1,8 +1,8 @@
 # dump.json文件说明及示例
-## 1. dump.json文件示例（PyTorch）
+## 1. PyTorch 场景下的 dump.json 文件
-### 1.1 L0级别
+### 1.1 L0 级别
 L0级别的dump.json文件包括模块的前反向的输入输出，以及模块的参数和参数梯度。以PyTorch的Conv2d模块为例，网络中模块调用代码为:
 `output = self.conv2(input) # self.conv2 = torch.nn.Conv2d(64, 128, 5, padding=2, bias=True)`
@@ -168,7 +168,7 @@ dump.json文件中包含以下数据名称：
 }
 ```
-### 1.2 L1级别
+### 1.2 L1 级别
 L1级别的dump.json文件包括API的前反向的输入输出。以PyTorch的relu函数为例，网络中API调用代码为:
 `output = torch.nn.functional.relu(input)`
@@ -264,13 +264,13 @@ dump.json文件中包含以下数据名称：
 }
 ```
-### 1.3 mix级别
+### 1.3 mix 级别
 mix级别的dump.json文件同时包括L0和L1级别的dump数据，文件格式与上述示例相同。
-## 2. dump.json文件示例（MindSpore）
+## 2. MindSpore 场景下的 dump.json 文件
-### 2.1 L0级别
+### 2.1 L0 级别
 L0级别的dump.json文件包括模块的前反向的输入输出，以及模块的参数和参数梯度。
 以MindSpore的Conv2d模块为例，dump.json文件中使用的模块调用代码为:
@@ -429,7 +429,7 @@ dump.json文件中包含以下数据名称：
 }
 ```
-### 2.2 L1级别
+### 2.2 L1 级别
 L1级别的dump.json文件包括API的前反向的输入输出，以MindSpore的relu函数为例，网络中API调用代码为:
  `output = mindspore.ops.relu(input)`
@@ -521,5 +521,275 @@ L1级别的dump.json文件包括API的前反向的输入输出，以MindSpore的
 }
 ```
-### 2.3 mix级别
+### 2.3 mix 级别
 mix级别的dump.json文件同时包括L0和L1级别的dump数据，文件格式与上述示例相同。
+## 3. MSAdapter 场景下的 dump.json 文件
+### 3.1 L0 级别
+L0 级别的 dump.json 文件包括模块的前反向的输入输出，以及模块的参数和参数梯度。以 Conv2d 模块为例，网络中模块调用代码为:
+`output = self.conv2(input) # self.conv2 = torch.nn.Conv2d(64, 128, 5, padding=2, bias=True)`
+dump.json文件中包含以下数据名称：
+- `Module.conv2.Conv2d.forward.0`：模块的前向数据，其中input_args为模块的输入数据（位置参数），input_kwargs为模块的输入数据（关键字参数），output为模块的输出数据，parameters为模块的参数数据，包括权重（weight）和偏置（bias）。
+- `Module.conv2.Conv2d.parameters_grad`：模块的参数梯度数据，包括权重（weight）和偏置（bias）的梯度。
+- `Module.conv2.Conv2d.backward.0`：模块的反向数据，其中input为模块反向的输入梯度（对应前向输出的梯度），output为模块的反向输出梯度（对应前向输入的梯度）。
+**说明**：当dump时传入的model参数为List[torch.nn.Module]或Tuple[torch.nn.Module]时，模块级数据的命名中包含该模块在列表中的索引index，命名格式为`{Module}.{index}.*`，*表示以上三种模块级数据的命名格式，例如：`Module.0.conv1.Conv2d.forward.0`。
+```json
+{
+ "task": "tensor",
+ "level": "L0",
+ "framework": "mindtorch",
+ "dump_data_dir": "/dump/path",
+ "data": {
+  "Module.conv2.Conv2d.forward.0": {
+   "input_args": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      8,
+      16,
+      14,
+      14
+     ],
+     "Max": 1.638758659362793,
+     "Min": 0.0,
+     "Mean": 0.2544615864753723,
+     "Norm": 70.50277709960938,
+     "requires_grad": true,
+     "data_name": "Module.conv2.Conv2d.forward.0.input.0.npy"
+    }
+   ],
+   "input_kwargs": {},
+   "output": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      8,
+      32,
+      10,
+      10
+     ],
+     "Max": 1.6815717220306396,
+     "Min": -1.5120246410369873,
+     "Mean": -0.025344856083393097,
+     "Norm": 149.65576171875,
+     "requires_grad": true,
+     "data_name": "Module.conv2.Conv2d.forward.0.output.0.npy"
+    }
+   ],
+   "parameters": {
+    "weight": {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32,
+      16,
+      5,
+      5
+     ],
+     "Max": 0.05992485210299492,
+     "Min": -0.05999220535159111,
+     "Mean": -0.0006165213999338448,
+     "Norm": 3.421217441558838,
+     "requires_grad": true,
+     "data_name": "Module.conv2.Conv2d.forward.0.parameters.weight.npy"
+    },
+    "bias": {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32
+     ],
+     "Max": 0.05744686722755432,
+     "Min": -0.04894155263900757,
+     "Mean": 0.006410328671336174,
+     "Norm": 0.17263513803482056,
+     "requires_grad": true,
+     "data_name": "Module.conv2.Conv2d.forward.0.parameters.bias.npy"
+    }
+   }
+  },
+  "Module.conv2.Conv2d.parameters_grad": {
+   "weight": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32,
+      16,
+      5,
+      5
+     ],
+     "Max": 0.018550323322415352,
+     "Min": -0.008627401664853096,
+     "Mean": 0.0006675920449197292,
+     "Norm": 0.26084786653518677,
+     "requires_grad": false,
+     "data_name": "Module.conv2.Conv2d.parameters_grad.weight.npy"
+    }
+   ],
+   "bias": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32
+     ],
+     "Max": 0.014914230443537235,
+     "Min": -0.006656786892563105,
+     "Mean": 0.002657240955159068,
+     "Norm": 0.029451673850417137,
+     "requires_grad": false,
+     "data_name": "Module.conv2.Conv2d.parameters_grad.bias.npy"
+    }
+   ]
+  },
+  "Module.conv2.Conv2d.backward.0": {
+   "input": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      8,
+      32,
+      10,
+      10
+     ],
+     "Max": 0.0015069986693561077,
+     "Min": -0.001139344065450132,
+     "Mean": 3.3215508210560074e-06,
+     "Norm": 0.020567523315548897,
+     "requires_grad": false,
+     "data_name": "Module.conv2.Conv2d.backward.0.input.0.npy"
+    }
+   ],
+   "output": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      8,
+      16,
+      14,
+      14
+     ],
+     "Max": 0.0007466732058674097,
+     "Min": -0.00044813455315306783,
+     "Mean": 6.814070275140693e-06,
+     "Norm": 0.01474067009985447,
+     "requires_grad": false,
+     "data_name": "Module.conv2.Conv2d.backward.0.output.0.npy"
+    }
+   ]
+  }
+ }
+}
+```
+### 3.2 L1 级别
+L1级别的dump.json文件包括API的前反向的输入输出。以 relu API 为例，网络中 API 调用代码为:
+`output = torch.nn.functional.relu(input)`
+dump.json文件中包含以下数据名称：
+- `Functional.relu.0.forward`：API的前向数据，其中input_args为API的输入数据（位置参数），input_kwargs为API的输入数据（关键字参数），output为API的输出数据。
+- `Functional.relu.0.backward`：API的反向数据，其中input为API的反向输入梯度（对应前向输出的梯度），output为API的反向输出梯度（对应前向输入的梯度）。
+```json
+{
+ "task": "tensor",
+ "level": "L1",
+ "framework": "mindtorch",
+ "dump_data_dir":"/dump/path",
+ "data": {
+  "Functional.relu.0.forward": {
+   "input_args": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32,
+      16,
+      28,
+      28
+     ],
+     "Max": 1.3864083290100098,
+     "Min": -1.3364859819412231,
+     "Mean": 0.03711778670549393,
+     "Norm": 236.20692443847656,
+     "requires_grad": true,
+     "data_name": "Functional.relu.0.forward.input.0.npy"
+    }
+   ],
+   "input_kwargs": {},
+   "output": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32,
+      16,
+      28,
+      28
+     ],
+     "Max": 1.3864083290100098,
+     "Min": 0.0,
+     "Mean": 0.16849493980407715,
+     "Norm": 175.23345947265625,
+     "requires_grad": true,
+     "data_name": "Functional.relu.0.forward.output.0.npy"
+    }
+   ]
+  },
+  "Functional.relu.0.backward": {
+   "input": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32,
+      16,
+      28,
+      28
+     ],
+     "Max": 0.0001815402356442064,
+     "Min": -0.00013352684618439525,
+     "Mean": 0.00011915402356442064,
+     "Norm": 0.007598237134516239,
+     "requires_grad": false,
+     "data_name": "Functional.relu.0.backward.input.0.npy"
+    }
+   ],
+   "output": [
+    {
+     "type": "mindspore.Tensor",
+     "dtype": "Float32",
+     "shape": [
+      32,
+      16,
+      28,
+      28
+     ],
+     "Max": 0.0001815402356442064,
+     "Min": -0.00012117840378778055,
+     "Mean": 2.0098118724831693e-08,
+     "Norm": 0.006532244384288788,
+     "requires_grad": false,
+     "data_name": "Functional.relu.0.backward.output.0.npy"
+    }
+   ]
+  }
+ }
+}
+```
+### 3.3 mix 级别
+mix级别的dump.json文件同时包括L0和L1级别的dump数据，文件格式与上述示例相同。

msprobe/docs/28.debugger_save_instruction.md CHANGED Viewed

@@ -1,28 +1,33 @@
-# 单点保存工具 README
+# 单点保存工具
 ## 简介
-L0, L1, mix dump存在盲区，网络中的非api/module的输入输出不会被批量dump下来。单点保存提供类似np.save和print的功能和使用体验，可以保存指定的变量。同时针对大模型场景进行了增强，具备以下特性：
+L0, L1, mix级别的dump能力存在盲区，网络中的非API或module的输入输出不会被批量dump下来。单点保存提供类似np.save和print的功能和使用体验，可以保存指定的变量。同时针对大模型场景进行了增强，具备以下特性：
 - 可保存变量的反向梯度结果。
 - 能直接保存嵌套结构数据（如 list、dict），无需手动遍历。
-- 自动分 rank 保存。
+- 自动分 Rank 保存。
+- 可分 Step 保存数据。
 - 多次调用时会自动计数。
-- 可配置保存统计值或者张量。
+- 可配置保存统计值（MindSpore静态图暂不支持）或者张量。
+- 支持异步保存。
 ## 支持场景
-仅支持 PyTorch 与 MindSpore 的动态图场景。
-## 使能方式
+## 动态图场景（Pytorch&MindSpore）
-### 配置文件说明
+###  使能方式
-通用配置：
+#### 配置文件说明
+通用配置 （细节详见[通用配置说明](./02.config_introduction.md#11-通用配置) ）：
 | 参数     | 解释                                       | 是否必选 |
 | -------- |-------------------------------------------| -------- |
 | task     | dump 的任务类型，str 类型。 单点保存场景仅支持传入"statistics", "tensor"。    |  是     |
 | level    | dump 级别，str 类型，根据不同级别采集不同数据。单点保存场景传入"debug"。  | 是  |
-| dump_path  | 设置 dump 数据目录路径，str 类型。细节详见[通用配置说明](./02.config_introduction.md#11-通用配置)  | 是       |
-| rank        | 指定对某张卡上的数据进行采集，list[Union[int, str]] 类型。细节详见[通用配置说明](./02.config_introduction.md#11-通用配置)  | 否       |
+| dump_path  | 设置 dump 数据目录路径，str 类型。 | 是       |
+| rank        | 指定对某张卡上的数据进行采集，list[Union[int, str]] 类型。  | 否       |
+| step    |   指定采集某个 Step 的数据，list[Union[int, str]] 类型。  | 否       |
+| async_dump        | 异步 dump 开关，bool 类型。  | 否       |
 "statistics" 任务子配置项：
 | 参数     | 解释                                       | 是否必选 |
@@ -31,19 +36,21 @@ L0, L1, mix dump存在盲区，网络中的非api/module的输入输出不会被
 "tensor" 任务无子配置项。
-### 接口调用说明
-调用PrecisionDebugger.save，传入需要保存的变量，指定变量名称以及是否需要保存反向数据。接口入参说明详见[pytorch单点保存接口](./05.data_dump_PyTorch.md#19-save)，[mindspore单点保存接口](./06.data_dump_MindSpore.md#615-save)
+#### 接口调用说明
-### 实例（以pytorch场景为例）
+调用PrecisionDebugger.save，传入需要保存的变量，指定变量名称以及是否需要保存反向数据。接口入参说明详见[PyTorch单点保存接口](./05.data_dump_PyTorch.md#19-save)，[MindSpore单点保存接口](./06.data_dump_MindSpore.md#615-save)
+#### 实例
+（以PyTorch场景为例，MindSpore场景只需要从msprobe.mindspore模块导包即可）
 配置文件
 ```json
 {
     "task": "statistics",
     "dump_path": "./dump_path",
     "rank": [],
+    "step": [],
     "level": "debug",
+    "async_dump": false,
     "statistics": {
         "summary_mode": "statistics"
     }
@@ -53,7 +60,7 @@ L0, L1, mix dump存在盲区，网络中的非api/module的输入输出不会被
 初始化
 ```python
 # 训练启动py脚本
-from mindspore.pytorch import PrecisionDebugger
+from msprobe.pytorch import PrecisionDebugger
 debugger = PrecisionDebugger("./config.json")
 for data, label in data_loader:
     # 执行模型训练
@@ -64,7 +71,7 @@ for data, label in data_loader:
 初始化（无配置文件）
 ```python
 # 训练启动py脚本
-from mindspore.pytorch import PrecisionDebugger
+from msprobe.pytorch import PrecisionDebugger
 debugger = PrecisionDebugger(dump_path="dump_path", level="debug")
 for data, label in data_loader:
     # 执行模型训练
@@ -75,20 +82,104 @@ for data, label in data_loader:
 调用保存接口
 ```python
 # 训练过程中被调用py文件
-from mindspore.pytorch import PrecisionDebugger
+from msprobe.pytorch import PrecisionDebugger
 dict_variable = {"key1": "value1", "key2": [1, 2]}
 PrecisionDebugger.save(dict_variable, "dict_variable", save_backward=False)
 ```
+## 静态图场景（MindSpore）
+### 使能方式
+#### 接口调用说明
+工具提供两个对外接口`save`和`save_grad`，分别用于保存训练中的tensor以及tensor对应的反向数据
+| 接口名称 | 入参   | device         | MindSpore版本 |备注                                                         |
+| ------- | ------ | -------------- | --------------|--------------------------------------------------- |
+| save     | save_dir name, data    | Ascend   | >= 2.6.0   |  （主流场景）图模式下只支持Ascend，pynative下支持Ascend/GPU/CPU。         |
+| save_grad | save_dir, name, data | Ascend     | >= 2.6.0   |  （主流场景）图模式下只支持Ascend，pynative下支持Ascend/GPU/CPU。         |
+----
+> 函数原型:
+`save(save_dir:str, name:str, data)`
+- save_dir：表示要保存的目录。
+- name    ：表示要保存的文件标志名称。
+- data    ：表示数据入参，可以是`mindspore.Tensor`或者是`List`,`Tuple`,`Dict`等嵌套结构。
+> 函数原型：
+`save_grad(save_dir:str, name:str, data)`
+- save_dir：表示要保存的目录。
+- name    ：表示要保存的文件标志名称。
+- data    ：表示数据入参，**只能**是`mindspore.Tensor`。
+#### 实例
+- save接口使用：
+```python
+# save api usage
+# **first import**
+from msprobe.mindspore import save
+class Net(nn.Cell):
+    def construct(self, x, y, z):
+        # **use save api**
+        save("./test_dump", 'x', x)
+        return x * y * z
+x = Tensor([1, 2], ms.float32)
+y = Tensor([-2, 3], ms.float32)
+z = Tensor([0, 3], ms.float32)
+net = Net()
+output = grad(net, grad_position=(1, 2))(x, y, z)
+time.sleep(1)
+# then will generate **./test_dump/step0/rank0/x_float32_0.npy**
+```
+- save_grad接口使用：
+```python
+# save_grad usage
+# **first import**
+from msprobe.mindspore import save_grad
+class Net(nn.Cell):
+    def construct(self, x, y, z):
+        # **use save api** the return value of save_grad must be received by origin
+        z = save_grad("./test_dump", 'z', z)
+        return x * y * z
+x = Tensor([1, 2], ms.float32)
+y = Tensor([-2, 3], ms.float32)
+z = Tensor([0, 3], ms.float32)
+net = Net()
+output = grad(net, grad_position=(1, 2))(x, y, z)
+time.sleep(1)
+# then will generate **./test_dump/step0/rank0/z_grad_float32_0.npy**
+```
+**注意**save_grad需要将返回值回传给原tensor，此操作不会有精度影响，只会传递原值。
 ## 输出结果
+### 动态图场景（Pytorch&MindSpore）
   * **"task" 配置为 "statistics" 场景** ：在 dump 目录下会生成包含变量统计值信息的 `debug.json` 文件。
-  * **"task" 配置为 "tensor" 场景** ：除了在 dump 目录下生成包含变量统计值信息的 `debug.json` 文件外，还会在 dump 子目录 `dump_tensor_data` 中保存张量二进制文件，文件名称格式为 `{variable_name}{grad_flag}.{count}.tensor.{indexes}.{file_suffix}`。
+  `debug.json` 中统计值的key命名格式为 `{variable_name}{grad_flag}.{count}.debug`。
+  * **"task" 配置为 "tensor" 场景** ：除了在 dump 目录下生成包含变量统计值信息的 `debug.json` 文件外，还会在 dump 子目录 `dump_tensor_data` 中保存张量二进制文件，文件名称格式为 `{variable_name}{grad_flag}.{count}.debug.{indexes}.{file_suffix}`。
     - variable_name： 传入save接口的变量名称。
     - grad_flag： 反向数据标识，反向数据为"_grad"，正向数据为""。
     - count： 调用计数，多次以相同变量名称调用时的计数。
-    - indexes： 索引，在保存嵌套结构数据时的索引。例如：嵌套结构为`{"key1": "value1", "key2": ["value2", "value3"]}`，"value2"的索引为"key2.0"
-    - file_suffix：文件后缀，pytorch场景为"pt"，mindspore场景为"npy"
+    - indexes： 索引，在保存嵌套结构数据时的索引。例如：嵌套结构为`{"key1": "value1", "key2": ["value2", "value3"]}`，"value2"的索引为"key2.0"。
+    - file_suffix：文件后缀，PyTorch场景为"pt"，MindSpore场景为"npy"。
+### 静态图场景（MindSpore）
+在指定目录`save_dir`下生成`{step}/{rank}`目录，目录下生成指定`{name}`的npy文件，如果是save_grad接口调用，则会生成`{name}_grad`的npy文件。
+如`save("./test_dump", 'x', x)` -> `./test_dump/step0/rank0/x_float32_0.npy`。
+或如`z = save_grad("./test_dump", 'z', z)` -> `./test_dump/step0/rank0/z_grad_float32_0.npy`。

msprobe/docs/28.kernel_dump_MindSpore.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# MindSpore 场景的 kernel dump 说明
+# MindSpore 动态图场景的 kernel dump 说明
 当使用 msprobe 数据采集功能时，level 配置为 "L2" 表示采集 kernel 层级的算子数据，仅支持昇腾 NPU 平台。

mindstudio-probe 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl