PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
msprobe/README.md +84 -18
msprobe/__init__.py +16 -1
msprobe/config.json +1 -5
msprobe/core/advisor/advisor.py +16 -11
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +164 -3
msprobe/core/common/exceptions.py +26 -4
msprobe/core/common/file_utils.py +196 -27
msprobe/core/common/inplace_op_checker.py +53 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +46 -18
msprobe/core/common/utils.py +308 -209
msprobe/core/common_config.py +60 -38
msprobe/core/compare/acc_compare.py +332 -94
msprobe/core/compare/check.py +104 -22
msprobe/core/compare/compare_cli.py +42 -5
msprobe/core/compare/highlight.py +162 -57
msprobe/core/compare/layer_mapping/__init__.py +19 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +73 -29
msprobe/core/compare/utils.py +306 -247
msprobe/core/data_dump/data_collector.py +44 -43
msprobe/core/data_dump/data_processor/base.py +88 -35
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +14 -8
msprobe/core/data_dump/data_processor/pytorch_processor.py +180 -66
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +143 -48
msprobe/core/grad_probe/constant.py +31 -13
msprobe/core/grad_probe/grad_compare.py +20 -4
msprobe/core/grad_probe/utils.py +44 -3
msprobe/core/overflow_check/abnormal_scene.py +185 -0
msprobe/core/overflow_check/api_info.py +55 -0
msprobe/core/overflow_check/checker.py +138 -0
msprobe/core/overflow_check/filter.py +157 -0
msprobe/core/overflow_check/ignore_rules.yaml +55 -0
msprobe/core/overflow_check/level.py +22 -0
msprobe/core/overflow_check/utils.py +28 -0
msprobe/docs/01.installation.md +29 -9
msprobe/docs/02.config_introduction.md +83 -84
msprobe/docs/03.config_examples.md +3 -20
msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
msprobe/docs/05.data_dump_PyTorch.md +143 -13
msprobe/docs/06.data_dump_MindSpore.md +197 -88
msprobe/docs/07.accuracy_checker_PyTorch.md +69 -46
msprobe/docs/08.accuracy_checker_online_PyTorch.md +52 -17
msprobe/docs/09.accuracy_checker_MindSpore.md +51 -15
msprobe/docs/10.accuracy_compare_PyTorch.md +187 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +253 -31
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/13.overflow_check_MindSpore.md +6 -6
msprobe/docs/15.free_benchmarking_PyTorch.md +60 -55
msprobe/docs/16.free_benchmarking_MindSpore.md +159 -0
msprobe/docs/17.grad_probe.md +19 -22
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/19.monitor.md +468 -0
msprobe/docs/20.monitor_performance_baseline.md +52 -0
msprobe/docs/21.visualization_PyTorch.md +386 -0
msprobe/docs/22.visualization_MindSpore.md +384 -0
msprobe/docs/23.tool_function_introduction.md +28 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +25 -10
msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/monitor/cpu_info.png +0 -0
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +16 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +130 -138
msprobe/mindspore/api_accuracy_checker/api_info.py +27 -5
msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
msprobe/mindspore/api_accuracy_checker/main.py +27 -3
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
msprobe/mindspore/cell_processor.py +58 -13
msprobe/mindspore/common/const.py +35 -13
msprobe/mindspore/common/log.py +5 -9
msprobe/mindspore/common/utils.py +60 -5
msprobe/mindspore/compare/distributed_compare.py +15 -28
msprobe/mindspore/compare/ms_compare.py +319 -158
msprobe/mindspore/compare/ms_graph_compare.py +99 -49
msprobe/mindspore/debugger/debugger_config.py +20 -14
msprobe/mindspore/debugger/precision_debugger.py +43 -13
msprobe/mindspore/dump/dump_tool_factory.py +18 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +23 -3
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +203 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +107 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +56 -20
msprobe/mindspore/dump/kernel_graph_dump.py +19 -5
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -6
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +162 -41
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -1
msprobe/mindspore/free_benchmark/common/utils.py +37 -8
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
msprobe/mindspore/free_benchmark/handler/base_handler.py +20 -5
msprobe/mindspore/free_benchmark/handler/check_handler.py +21 -7
msprobe/mindspore/free_benchmark/handler/fix_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -6
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +23 -8
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +29 -5
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +25 -10
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +45 -19
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +29 -8
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +22 -7
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +17 -2
msprobe/mindspore/grad_probe/global_context.py +44 -14
msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
msprobe/mindspore/grad_probe/hook.py +24 -10
msprobe/mindspore/grad_probe/utils.py +18 -5
msprobe/mindspore/ms_config.py +22 -15
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -6
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/runtime.py +15 -0
msprobe/mindspore/service.py +75 -150
msprobe/mindspore/task_handler_factory.py +15 -0
msprobe/msprobe.py +24 -7
msprobe/pytorch/__init__.py +23 -3
msprobe/pytorch/api_accuracy_checker/common/config.py +81 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +50 -25
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +73 -33
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +44 -18
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +32 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +122 -172
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +158 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +30 -24
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +68 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +115 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +26 -9
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +20 -1
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +33 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +280 -157
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +32 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +29 -6
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +31 -16
msprobe/pytorch/common/utils.py +96 -40
msprobe/pytorch/compare/distributed_compare.py +13 -14
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +44 -10
msprobe/pytorch/debugger/debugger_config.py +69 -52
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/constant.py +15 -0
msprobe/pytorch/free_benchmark/common/counter.py +15 -0
msprobe/pytorch/free_benchmark/common/enums.py +43 -0
msprobe/pytorch/free_benchmark/common/params.py +23 -1
msprobe/pytorch/free_benchmark/common/utils.py +43 -5
msprobe/pytorch/free_benchmark/compare/grad_saver.py +47 -9
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +17 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +18 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +21 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +28 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +19 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +65 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +21 -5
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +21 -20
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +18 -6
msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
msprobe/pytorch/monitor/anomaly_detect.py +340 -0
msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
msprobe/pytorch/monitor/features.py +108 -0
msprobe/pytorch/monitor/module_hook.py +870 -0
msprobe/pytorch/monitor/module_metric.py +193 -0
msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
msprobe/pytorch/monitor/optimizer_collect.py +295 -0
msprobe/pytorch/monitor/unittest/__init__.py +0 -0
msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
msprobe/pytorch/monitor/utils.py +250 -0
msprobe/pytorch/monitor/visualizer.py +59 -0
msprobe/pytorch/online_dispatch/__init__.py +2 -3
msprobe/pytorch/online_dispatch/compare.py +38 -48
msprobe/pytorch/online_dispatch/dispatch.py +50 -25
msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
msprobe/pytorch/online_dispatch/single_compare.py +60 -39
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +9 -1
msprobe/pytorch/online_dispatch/utils.py +48 -23
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +19 -26
msprobe/pytorch/parse_tool/lib/config.py +1 -1
msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -2
msprobe/pytorch/parse_tool/lib/utils.py +40 -55
msprobe/pytorch/parse_tool/lib/visualization.py +3 -1
msprobe/pytorch/pt_config.py +192 -40
msprobe/pytorch/service.py +110 -35
msprobe/visualization/__init__.py +14 -0
msprobe/visualization/builder/__init__.py +14 -0
msprobe/visualization/builder/graph_builder.py +165 -0
msprobe/visualization/builder/msprobe_adapter.py +205 -0
msprobe/visualization/compare/__init__.py +14 -0
msprobe/visualization/compare/graph_comparator.py +130 -0
msprobe/visualization/compare/mode_adapter.py +211 -0
msprobe/visualization/graph/__init__.py +14 -0
msprobe/visualization/graph/base_node.py +124 -0
msprobe/visualization/graph/graph.py +200 -0
msprobe/visualization/graph/node_colors.py +95 -0
msprobe/visualization/graph/node_op.py +39 -0
msprobe/visualization/graph_service.py +214 -0
msprobe/visualization/utils.py +232 -0
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/docs/04.acl_config_examples.md +0 -76
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -43
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -107
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
/msprobe/pytorch/{functional/data_processor.py → monitor/distributed/__init__.py} +0 -0

msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md ADDED Viewed

@@ -0,0 +1,211 @@
+# 动态图精度数据采集快速入门示例
+本示例将展示如何在 MindSpore 动态图模式下使用 msprobe 工具进行精度数据采集。
+## 1. 配置文件
+请在当前目录下创建一个名为 `config.json` 的配置文件，内容如下：
+```json
+{
+    "task": "statistics",
+    "dump_path": "./output",
+    "rank": [],
+    "step": ["0-2"],
+    "level": "L1",
+    "statistics": {
+        "scope": [],
+        "list": [],
+        "data_mode": [
+            "all"
+        ],
+        "summary_mode": "statistics"
+    }
+}
+```
+以上配置参数详细介绍和使用请参见[《config.json 配置文件介绍》](../02.config_introduction.md)和[《config.json 配置示例》](../03.config_examples.md#3-mindspore-动态图场景) 中的“MindSpore动态图场景”。
+## 2. 模型脚本
+在当前目录下创建一个 Python 脚本文件，例如 `alexnet_model.py`，将以下代码粘贴进去：
+```python
+import os
+import numpy as np
+import mindspore as ms
+from mindspore import nn, ops
+from mindspore import context
+from mindspore import Tensor
+from msprobe.mindspore import PrecisionDebugger, seed_all
+# 设置随机种子以确保结果可重现
+seed_all(seed=1234, mode=False, rm_dropout=True)
+# 配置文件路径
+script_dir = os.path.dirname(os.path.abspath(__file__))
+config_path = os.path.join(script_dir, 'config.json')
+# 初始化精度调试器
+debugger = PrecisionDebugger(config_path=config_path)
+# 设置 MindSpore 设备上下文
+context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend", device_id=0)
+# 定义卷积层
+def conv_layer(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid", has_bias=True):
+    return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding,
+                     has_bias=has_bias, pad_mode=pad_mode)
+# 定义全连接层
+def fc_layer(input_channels, out_channels, has_bias=True):
+    return nn.Dense(input_channels, out_channels, has_bias=has_bias)
+class AlexNet(nn.Cell):
+    """
+    AlexNet 模型定义
+    参数:
+    - num_classes: 分类数量
+    - channel: 输入通道数（图像的颜色通道数）
+    - phase: 模型运行阶段（'train' 或 'test'）
+    - include_top: 是否包含全连接层的顶部（最后的分类层）
+    """
+    def __init__(self, num_classes=10, channel=3, phase='train', include_top=True):
+        super(AlexNet, self).__init__()
+        # 卷积层
+        self.conv1 = conv_layer(channel, 64, 11, stride=4, pad_mode="same")
+        self.conv2 = conv_layer(64, 128, 5, pad_mode="same")
+        self.conv3 = conv_layer(128, 192, 3, pad_mode="same")
+        self.conv4 = conv_layer(192, 256, 3, pad_mode="same")
+        self.conv5 = conv_layer(256, 256, 3, pad_mode="same")
+        # 激活函数和池化层
+        self.relu = nn.ReLU()
+        self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='valid')
+        # 如果包括顶部（全连接层）
+        self.include_top = include_top
+        if self.include_top:
+            self.flatten = nn.Flatten()
+            self.fc1 = fc_layer(256 * 28 * 28, 4096)
+            self.fc2 = fc_layer(4096, 4096)
+            self.fc3 = fc_layer(4096, num_classes)
+        # 数学操作
+        self.add = ops.Add()
+        self.mul = ops.Mul()
+    def construct(self, x):
+        """定义前向传播过程"""
+        x = self.conv1(x)
+        x = self.add(x, 0.1)  # 偏置加法
+        x = self.mul(x, 2.0)  # 乘法操作
+        x = self.relu(x)  # ReLU 激活函数
+        x = ops.celu(x)
+        x = x + 2
+        # 打印每层输出形状，调试时可使用
+        print(f"After Conv1: {x.shape}")
+        x = self.max_pool2d(x)  # Max pooling 操作
+        print(f"After MaxPool: {x.shape}")  # 打印池化后的形状
+        x = self.conv2(x)
+        x = self.relu(x)
+        x = self.conv3(x)
+        x = self.relu(x)
+        x = self.conv4(x)
+        x = self.relu(x)
+        x = self.conv5(x)
+        x = self.relu(x)
+        # 打印卷积层后的形状，调试时使用
+        print(f"After Conv5: {x.shape}")
+        # 可选的全连接层部分
+        if self.include_top:
+            x = self.flatten(x)
+            x = self.fc1(x)
+            x = self.fc2(x)
+            x = self.fc3(x)
+        return x
+# 前向函数
+def forward_fn(data, label):
+    out = net(data)
+    loss = criterion(out, label)
+    return loss
+# 训练步骤
+def train_step(data, label):
+    loss, grads = grad_fn(data, label)
+    optimizer(grads)
+    return loss
+# 测试模型
+if __name__ == "__main__":
+    net = AlexNet()
+    optimizer = nn.SGD(net.trainable_params(), learning_rate=0.01)
+    criterion = nn.MSELoss()
+    grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters)
+    # 生成数据和标签
+    batch_size = 1
+    num_classes = 10
+    data = np.random.normal(1, 1, (batch_size, 3, 227, 227)).astype(np.float32)
+    label = np.random.randint(0, num_classes, (batch_size,)).astype(np.float32)  # 注意此处类型应为 float32
+    # 转换为 MindSpore 张量
+    data = Tensor(data)
+    label = Tensor(label)
+    steps = 5
+    for i in range(steps):
+        debugger.start(net)  # 启动调试器
+        loss = train_step(data, label)  # 执行训练步骤
+        print(f"Step {i}, Loss: {loss}")
+        debugger.stop()  # 停止调试器
+        debugger.step()  # 计数步数
+```
+## 3. 运行训练脚本
+在命令行中执行以下命令：
+```bash
+python alexnet_model.py
+```
+## 4. 查看采集结果
+执行训练命令后，工具会将模型训练过程中的精度数据采集下来。
+日志中打印出现如下信息表示数据采集成功，即可手动停止模型训练查看采集数据。
+```markdown
+****************************************************************************
+*                        msprobe ends successfully.                        *
+****************************************************************************
+```
+## 5. 数据分析
+在 `dump_path` 参数指定的路径下（本例中为 `./output`），会出现如下目录结构，后续精度数据分析操作可使用 msprobe 工具的精度预检和精度比对等功能，详细流程请参见[《msprobe使用手册》](../../README.md#2-精度预检)。：
+```bash
+output/
+└── step0
+    └── rank
+        ├── construct.json             # level为L0时，保存Cell的层级关系信息。当前场景为空
+        ├── dump.json                  # 保存API前反向输入输出数据的统计量信息
+        └── stack.json                 # 保存API的调用栈
+```

msprobe/docs/img/compare_result.png ADDED Viewed

Binary file

msprobe/docs/img/monitor/cpu_info.png ADDED Viewed

Binary file

msprobe/docs/img/ms_dump.png ADDED Viewed

Binary file

msprobe/docs/img/ms_layer.png ADDED Viewed

Binary file

msprobe/docs/img/pt_dump.png ADDED Viewed

Binary file

msprobe/mindspore/__init__.py CHANGED Viewed

@@ -1 +1,17 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger
+from msprobe.mindspore.common.utils import seed_all

msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py CHANGED Viewed

@@ -1,16 +1,34 @@
-import json
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
+from tqdm import tqdm
-from msprobe.core.common.file_utils import FileOpen, create_directory, write_csv
-from msprobe.core.common.utils import add_time_as_suffix
 from msprobe.core.common.const import Const, CompareConst, MsCompareConst
-from msprobe.mindspore.common.log import logger
+from msprobe.core.common.file_utils import FileOpen, create_directory, write_csv, load_json, load_yaml
+from msprobe.core.common.utils import add_time_as_suffix
 from msprobe.mindspore.api_accuracy_checker.api_info import ApiInfo
 from msprobe.mindspore.api_accuracy_checker.api_runner import api_runner, ApiInputAggregation
 from msprobe.mindspore.api_accuracy_checker.base_compare_algorithm import compare_algorithms
+from msprobe.mindspore.api_accuracy_checker.data_manager import DataManager
 from msprobe.mindspore.api_accuracy_checker.utils import (check_and_get_from_json_dict, global_context,
                                                           trim_output_compute_element_list)
+from msprobe.mindspore.common.log import logger
+cur_path = os.path.dirname(os.path.realpath(__file__))
+yaml_path = os.path.join(cur_path, MsCompareConst.SUPPORTED_API_LIST_FILE)
 class BasicInfoAndStatus:
     def __init__(self, api_name, bench_dtype, tested_dtype, shape, status, err_msg) -> None:
@@ -21,6 +39,7 @@ class BasicInfoAndStatus:
         self.status = status
         self.err_msg = err_msg
 class ResultCsvEntry:
     def __init__(self) -> None:
         self.forward_pass_status = None
@@ -31,9 +50,9 @@ class ResultCsvEntry:
 class ApiAccuracyChecker:
-    def __init__(self):
+    def __init__(self, args):
         self.api_infos = dict()
-        self.results = dict()
+        self.data_manager = DataManager(args.out_path, args.result_csv_path)  # 在初始化时实例化 DataManager
     @staticmethod
     def run_and_compare_helper(api_info, api_name_str, api_input_aggregation, forward_or_backward):
@@ -80,25 +99,64 @@ class ApiAccuracyChecker:
                 compare_result_dict[compare_algorithm_name] = compare_result
             if compare_result_dict.get(CompareConst.COSINE).pass_status == CompareConst.PASS and \
-                compare_result_dict.get(CompareConst.MAX_ABS_ERR).pass_status == CompareConst.PASS:
+                    compare_result_dict.get(CompareConst.MAX_ABS_ERR).pass_status == CompareConst.PASS:
                 status = CompareConst.PASS
                 err_msg = ""
             else:
                 status = CompareConst.ERROR
                 err_msg = compare_result_dict.get(CompareConst.COSINE).err_msg + \
-                    compare_result_dict.get(CompareConst.MAX_ABS_ERR).err_msg
+                          compare_result_dict.get(CompareConst.MAX_ABS_ERR).err_msg
             basic_info_status = \
                 BasicInfoAndStatus(api_name_with_slot, bench_dtype, tested_dtype, shape, status, err_msg)
             output_list.append(tuple([api_name_str, forward_or_backward, basic_info_status, compare_result_dict]))
         return output_list
+    @staticmethod
+    def prepare_api_input_aggregation(api_info, forward_or_backward=Const.FORWARD):
+        '''
+        Args:
+            api_info: ApiInfo
+            forward_or_backward: str
+        Returns:
+            ApiInputAggregation
+        '''
+        forward_inputs = api_info.get_compute_element_list(Const.FORWARD, Const.INPUT)
+        kwargs = api_info.get_kwargs()
+        if forward_or_backward == Const.FORWARD:
+            gradient_inputs = None
+        else:
+            gradient_inputs = api_info.get_compute_element_list(Const.BACKWARD, Const.INPUT)
+        return ApiInputAggregation(forward_inputs, kwargs, gradient_inputs)
+    @staticmethod
+    def is_api_checkable(api_name_str):
+        '''
+        Args:
+            api_name_str: str, e.g. "MintFunctional.relu.0.forward", key in data field of api_info.json
+        Returns:
+            is_checkable: bool
+        Description:
+            tell whether this api is checkable based on the key in "data" dict in api_info.json
+        '''
+        api_name_str_list = api_name_str.split(Const.SEP)
+        if len(api_name_str_list) < MsCompareConst.API_NAME_STR_LENGTH:
+            return False
+        api_type_str = api_name_str_list[0]
+        real_api_str = Const.SEP.join(api_name_str_list[1:-2])
+        api_list = load_yaml(yaml_path)
+        supported_tensor_api_list = api_list.get(MsCompareConst.SUPPORTED_TENSOR_LIST_KEY)
+        if api_type_str in (MsCompareConst.MINT, MsCompareConst.MINT_FUNCTIONAL):
+            return True
+        if api_type_str == MsCompareConst.TENSOR_API and real_api_str in supported_tensor_api_list:
+            return True
+        return False
     def parse(self, api_info_path):
-        with FileOpen(api_info_path, "r") as f:
-            api_info_dict = json.load(f)
+        api_info_dict = load_json(api_info_path)
         # init global context
         task = check_and_get_from_json_dict(api_info_dict, MsCompareConst.TASK_FIELD,
-                                            "task field in api_info.json",accepted_type=str,
+                                            "task field in api_info.json", accepted_type=str,
                                             accepted_value=(MsCompareConst.STATISTICS_TASK,
                                                             MsCompareConst.TENSOR_TASK))
         is_constructed = task == MsCompareConst.STATISTICS_TASK
@@ -112,14 +170,12 @@ class ApiAccuracyChecker:
         api_info_data = check_and_get_from_json_dict(api_info_dict, MsCompareConst.DATA_FIELD,
                                                      "data field in api_info.json", accepted_type=dict)
         for api_name, api_info in api_info_data.items():
-            is_mint = api_name.split(Const.SEP)[0] in \
-                (MsCompareConst.MINT, MsCompareConst.MINT_FUNCTIONAL)
-            if not is_mint:
+            if not self.is_api_checkable(api_name):
                 continue
             forbackward_str = api_name.split(Const.SEP)[-1]
             if forbackward_str not in (Const.FORWARD, Const.BACKWARD):
                 logger.warning(f"api: {api_name} is not recognized as forward api or backward api, skip this.")
-            api_name = Const.SEP.join(api_name.split(Const.SEP)[:-1]) # www.xxx.yyy.zzz --> www.xxx.yyy
+            api_name = Const.SEP.join(api_name.split(Const.SEP)[:-1])  # www.xxx.yyy.zzz --> www.xxx.yyy
             if api_name not in self.api_infos:
                 self.api_infos[api_name] = ApiInfo(api_name)
@@ -128,128 +184,64 @@ class ApiAccuracyChecker:
             else:
                 self.api_infos[api_name].load_backward_info(api_info)
+    def process_forward(self, api_name_str, api_info):
+        """处理前向检查"""
+        if not api_info.check_forward_info():
+            logger.debug(f"api: {api_name_str} is lack of forward information, skip forward check.")
+            return Const.EXCEPTION_NONE
+        try:
+            forward_inputs_aggregation = self.prepare_api_input_aggregation(api_info, Const.FORWARD)
+        except Exception as e:
+            logger.warning(f"Exception occurs when getting inputs for {api_name_str} forward api. "
+                           f"Skipping forward check. Detailed exception information: {e}.")
+            return Const.EXCEPTION_NONE
+        forward_output_list = None
+        try:
+            forward_output_list = self.run_and_compare_helper(api_info, api_name_str, forward_inputs_aggregation, Const.FORWARD)
+        except Exception as e:
+            logger.warning(f"Exception occurs when running and comparing {api_name_str} forward api. "
+                           f"Detailed exception information: {e}.")
+        return forward_output_list
+    def process_backward(self, api_name_str, api_info):
+        """处理反向检查"""
+        if not api_info.check_backward_info():
+            logger.debug(f"api: {api_name_str} is lack of backward information, skipping backward check.")
+            return Const.EXCEPTION_NONE
+        try:
+            backward_inputs_aggregation = self.prepare_api_input_aggregation(api_info, Const.BACKWARD)
+        except Exception as e:
+            logger.warning(f"Exception occurs when getting inputs for {api_name_str} backward api. "
+                           f"Skipping backward check. Detailed exception information: {e}.")
+            return Const.EXCEPTION_NONE
+        backward_output_list = None
+        try:
+            backward_output_list = self.run_and_compare_helper(api_info, api_name_str, backward_inputs_aggregation, Const.BACKWARD)
+        except Exception as e:
+            logger.warning(f"Exception occurs when running and comparing {api_name_str} backward api. "
+                           f"Detailed exception information: {e}.")
+        return backward_output_list
     def run_and_compare(self):
-        for api_name_str, api_info in self.api_infos.items():
-            if not api_info.check_forward_info():
-                logger.warning(f"api: {api_name_str} is lack of forward infomation, skip forward and backward check")
-                continue
-            forward_inputs = api_info.get_compute_element_list(Const.FORWARD, Const.INPUT)
-            kwargs = api_info.get_kwargs()
-            forward_inputs_aggregation = ApiInputAggregation(forward_inputs, kwargs, None)
-            forward_output_list = None
-            try:
-                forward_output_list = \
-                    self.run_and_compare_helper(api_info, api_name_str, forward_inputs_aggregation, Const.FORWARD)
-            except Exception as e:
-                logger.warning(f"exception occurs when running and comparing {api_name_str} forward api"
-                               f"detailed exception information: {e}")
-            self.record(forward_output_list)
-            if not api_info.check_backward_info():
-                logger.warning(f"api: {api_name_str} is lack of backward infomation, skip backward check")
+        for api_name_str, api_info in tqdm(self.api_infos.items()):
+            if not self.data_manager.is_unique_api(api_name_str):
                 continue
-            gradient_inputs = api_info.get_compute_element_list(Const.BACKWARD, Const.INPUT)
-            backward_inputs_aggregation = ApiInputAggregation(forward_inputs, kwargs, gradient_inputs)
-            backward_output_list = None
-            try:
-                backward_output_list = \
-                    self.run_and_compare_helper(api_info, api_name_str, backward_inputs_aggregation, Const.BACKWARD)
-            except Exception as e:
-                logger.warning(f"exception occurs when running and comparing {api_name_str} backward api"
-                               f"detailed exception information: {e}")
-            self.record(backward_output_list)
-    def record(self, output_list):
-        if output_list is None:
-            return
-        for output in output_list:
-            api_real_name, forward_or_backward, basic_info, compare_result_dict = output
-            key = tuple([api_real_name, forward_or_backward])
-            if key not in self.results:
-                self.results[key] = []
-            self.results[key].append(tuple([basic_info, compare_result_dict]))
-    def to_detail_csv(self, csv_dir):
-        # detail_csv
-        detail_csv = []
-        detail_csv_header_basic_info = [
-            MsCompareConst.DETAIL_CSV_API_NAME,
-            MsCompareConst.DETAIL_CSV_BENCH_DTYPE,
-            MsCompareConst.DETAIL_CSV_TESTED_DTYPE,
-            MsCompareConst.DETAIL_CSV_SHAPE,
-        ]
-        detail_csv_header_compare_result = list(compare_algorithms.keys())
-        detail_csv_header_status = [
-            MsCompareConst.DETAIL_CSV_PASS_STATUS,
-            MsCompareConst.DETAIL_CSV_MESSAGE,
-        ]
-        detail_csv_header = detail_csv_header_basic_info + detail_csv_header_compare_result + detail_csv_header_status
-        detail_csv.append(detail_csv_header)
-        for _, results in self.results.items():
-            # detail csv
-            for res in results:
-                basic_info, compare_result_dict = res
-                csv_row_basic_info = \
-                    [basic_info.api_name, basic_info.bench_dtype, basic_info.tested_dtype, basic_info.shape]
-                csv_row_compare_result = list(compare_result_dict.get(algorithm_name).compare_value \
-                                            for algorithm_name in detail_csv_header_compare_result)
-                csv_row_status = [basic_info.status, basic_info.err_msg]
-                csv_row = csv_row_basic_info  + csv_row_compare_result + csv_row_status
-                detail_csv.append(csv_row)
-        file_name = os.path.join(csv_dir, add_time_as_suffix(MsCompareConst.DETAIL_CSV_FILE_NAME))
-        create_directory(csv_dir)
-        write_csv(detail_csv, file_name, mode="w")
-    def to_result_csv(self, csv_dir):
-        result_csv_dict = dict()
-        for key, results in self.results.items():
-            api_real_name, forward_or_backward = key
-            forward_or_backward_pass_status = CompareConst.PASS
-            forward_or_backward_overall_err_msg = ""
-            # detail csv
-            for res in results:
-                basic_info, _ = res
-                if basic_info.status != CompareConst.PASS:
-                    forward_or_backward_pass_status = CompareConst.ERROR
-                forward_or_backward_overall_err_msg += basic_info.err_msg
-            forward_or_backward_overall_err_msg = \
-                "" if forward_or_backward_pass_status == CompareConst.PASS else forward_or_backward_overall_err_msg
-            #result_csv_dict
-            if api_real_name not in result_csv_dict:
-                result_csv_dict[api_real_name] = ResultCsvEntry()
-            if forward_or_backward == Const.FORWARD:
-                result_csv_dict[api_real_name].forward_pass_status = forward_or_backward_pass_status
-                result_csv_dict[api_real_name].forward_err_msg = forward_or_backward_overall_err_msg
-            else:
-                result_csv_dict[api_real_name].backward_pass_status = forward_or_backward_pass_status
-                result_csv_dict[api_real_name].backward_err_msg = forward_or_backward_overall_err_msg
-        #result_csv
-        result_csv = []
-        result_csv_header = [
-            MsCompareConst.DETAIL_CSV_API_NAME,
-            MsCompareConst.RESULT_CSV_FORWARD_TEST_SUCCESS,
-            MsCompareConst.RESULT_CSV_BACKWARD_TEST_SUCCESS,
-            MsCompareConst.DETAIL_CSV_MESSAGE,
-        ]
-        result_csv.append(result_csv_header)
-        for api_name, result_csv_entry in result_csv_dict.items():
-            if result_csv_entry.forward_pass_status == CompareConst.PASS and \
-                result_csv_entry.backward_pass_status == CompareConst.PASS:
-                overall_err_msg = ""
-            else:
-                overall_err_msg = result_csv_entry.forward_err_msg + result_csv_entry.backward_err_msg
-            row = [api_name, result_csv_entry.forward_pass_status,
-                   result_csv_entry.backward_pass_status, overall_err_msg]
-            result_csv.append(row)
-        file_name = os.path.join(csv_dir, add_time_as_suffix(MsCompareConst.RESULT_CSV_FILE_NAME))
-        create_directory(csv_dir)
-        write_csv(result_csv, file_name, mode="w")
+            # 处理前向
+            forward_output_list = self.process_forward(api_name_str, api_info)
+            if forward_output_list is not Const.EXCEPTION_NONE:
+                self.data_manager.record(forward_output_list)
+            # 处理反向
+            backward_output_list = self.process_backward(api_name_str, api_info)
+            if backward_output_list is not Const.EXCEPTION_NONE:
+                self.data_manager.record(backward_output_list)
+            self.data_manager.save_results(api_name_str)

msprobe/mindspore/api_accuracy_checker/api_info.py CHANGED Viewed

@@ -1,11 +1,34 @@
-from msprobe.mindspore.api_accuracy_checker.compute_element import ComputeElement
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from msprobe.core.common.const import Const
-from msprobe.mindspore.api_accuracy_checker.utils import check_and_get_from_json_dict
 from msprobe.core.common.exceptions import ApiAccuracyCheckerException
+from msprobe.core.common.utils import is_invalid_pattern
+from msprobe.mindspore.api_accuracy_checker.compute_element import ComputeElement
+from msprobe.mindspore.api_accuracy_checker.utils import check_and_get_from_json_dict
 from msprobe.mindspore.common.log import logger
 class ApiInfo:
     def __init__(self, api_name):
+        if not isinstance(api_name, str):
+            err_msg = "ApiInfo.__init__ failed: api_name is not a string"
+            logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed))
+        if is_invalid_pattern(api_name):
+            err_msg = "ApiInfo.__init__ failed: api_name contain illegal character"
+            logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed))
         self.api_name = api_name
         self.forward_info = None
         self.backward_info = None
@@ -59,11 +82,10 @@ class ApiInfo:
                 err_msg = "ApiInfo.get_kwargs failed: compute_element_dict key is not a string"
                 logger.error_log_with_exp(err_msg,
                                           ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed))
-            if not isinstance(compute_element_info, (list, dict)):
-                err_msg = "ApiInfo.get_kwargs failed: compute_element_dict value is not a list or dict"
+            if not (isinstance(compute_element_info, (list, dict)) or compute_element_info is None):
+                err_msg = "ApiInfo.get_kwargs failed: compute_element_dict value is not a list, dict or null"
                 logger.error_log_with_exp(err_msg,
                                           ApiAccuracyCheckerException(ApiAccuracyCheckerException.ParseJsonFailed))
         kwargs_compute_element_dict = {key_str: ComputeElement(compute_element_info=compute_element_info)
                                        for key_str, compute_element_info in kwargs_dict.items()}
         return kwargs_compute_element_dict

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.1py3-none-any.whl