PyPI - mindstudio-probe - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +3 -2
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/RECORD +196 -141
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +14 -19
msprobe/config.json +1 -0
msprobe/core/common/const.py +155 -6
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +33 -7
msprobe/core/common/inplace_ops.yaml +3 -0
msprobe/core/common/utils.py +28 -14
msprobe/core/common_config.py +6 -0
msprobe/core/compare/acc_compare.py +139 -128
msprobe/core/compare/check.py +31 -29
msprobe/core/compare/compare_cli.py +17 -16
msprobe/core/compare/highlight.py +186 -99
msprobe/core/compare/layer_mapping/data_scope_parser.py +18 -7
msprobe/core/compare/layer_mapping/layer_mapping.py +21 -14
msprobe/core/compare/layer_mapping/postprocess_pass.py +4 -3
msprobe/core/compare/merge_result/merge_result.py +380 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +109 -147
msprobe/core/compare/utils.py +189 -69
msprobe/core/data_dump/data_collector.py +51 -21
msprobe/core/data_dump/data_processor/base.py +38 -20
msprobe/core/data_dump/data_processor/factory.py +5 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +154 -20
msprobe/core/data_dump/data_processor/pytorch_processor.py +118 -58
msprobe/core/data_dump/json_writer.py +29 -1
msprobe/core/data_dump/scope.py +19 -18
msprobe/core/overflow_check/abnormal_scene.py +9 -5
msprobe/core/overflow_check/checker.py +1 -1
msprobe/core/overflow_check/utils.py +1 -1
msprobe/docs/01.installation.md +96 -17
msprobe/docs/02.config_introduction.md +5 -5
msprobe/docs/05.data_dump_PyTorch.md +91 -61
msprobe/docs/06.data_dump_MindSpore.md +57 -19
msprobe/docs/07.accuracy_checker_PyTorch.md +18 -18
msprobe/docs/09.accuracy_checker_MindSpore.md +4 -4
msprobe/docs/10.accuracy_compare_PyTorch.md +99 -41
msprobe/docs/11.accuracy_compare_MindSpore.md +249 -48
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +120 -27
msprobe/docs/21.visualization_PyTorch.md +115 -35
msprobe/docs/22.visualization_MindSpore.md +138 -41
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/{23.tool_function_introduction.md → 25.tool_function_introduction.md} +1 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +521 -0
msprobe/docs/FAQ.md +26 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +10 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +57 -25
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +2 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +5 -7
msprobe/mindspore/api_accuracy_checker/data_manager.py +37 -0
msprobe/mindspore/api_accuracy_checker/main.py +1 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +12 -6
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +3 -1
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +3 -1
msprobe/mindspore/common/utils.py +50 -5
msprobe/mindspore/compare/distributed_compare.py +0 -2
msprobe/mindspore/compare/ms_compare.py +105 -63
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/debugger/debugger_config.py +3 -0
msprobe/mindspore/debugger/precision_debugger.py +81 -12
msprobe/mindspore/dump/hook_cell/api_registry.py +83 -16
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +33 -15
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +11 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +7 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +13 -4
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +24 -12
msprobe/mindspore/grad_probe/hook.py +13 -4
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/ms_config.py +5 -1
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +7 -0
msprobe/mindspore/service.py +267 -101
msprobe/msprobe.py +24 -3
msprobe/pytorch/__init__.py +7 -6
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +100 -267
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +54 -30
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +57 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -1
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +42 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +64 -19
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +34 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/bench_functions/npu_fusion_attention.py +42 -10
msprobe/pytorch/common/parse_json.py +2 -1
msprobe/pytorch/common/utils.py +45 -2
msprobe/pytorch/compare/distributed_compare.py +17 -29
msprobe/pytorch/compare/pt_compare.py +40 -20
msprobe/pytorch/debugger/debugger_config.py +27 -12
msprobe/pytorch/debugger/precision_debugger.py +42 -12
msprobe/pytorch/dump/module_dump/__init__.py +0 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +80 -6
msprobe/pytorch/free_benchmark/common/params.py +2 -1
msprobe/pytorch/free_benchmark/common/utils.py +3 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -2
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +31 -47
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +34 -0
msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -40
msprobe/pytorch/monitor/anomaly_analyse.py +1 -1
msprobe/pytorch/monitor/anomaly_detect.py +107 -22
msprobe/pytorch/monitor/csv2tb.py +166 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +25 -14
msprobe/pytorch/monitor/features.py +3 -3
msprobe/pytorch/monitor/module_hook.py +483 -277
msprobe/pytorch/monitor/module_metric.py +27 -48
msprobe/pytorch/monitor/module_spec_verifier.py +3 -1
msprobe/pytorch/monitor/optimizer_collect.py +52 -14
msprobe/pytorch/monitor/unittest/test_monitor.py +24 -9
msprobe/pytorch/monitor/utils.py +77 -6
msprobe/pytorch/online_dispatch/dispatch.py +8 -2
msprobe/pytorch/parse_tool/lib/compare.py +10 -10
msprobe/pytorch/parse_tool/lib/config.py +5 -7
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +11 -10
msprobe/pytorch/parse_tool/lib/utils.py +18 -19
msprobe/pytorch/parse_tool/lib/visualization.py +9 -10
msprobe/pytorch/service.py +176 -106
msprobe/visualization/builder/graph_builder.py +62 -5
msprobe/visualization/builder/msprobe_adapter.py +24 -2
msprobe/visualization/compare/graph_comparator.py +64 -14
msprobe/visualization/compare/mode_adapter.py +1 -15
msprobe/visualization/graph/base_node.py +12 -17
msprobe/visualization/graph/distributed_analyzer.py +318 -0
msprobe/visualization/graph/graph.py +9 -0
msprobe/visualization/graph_service.py +97 -23
msprobe/visualization/utils.py +14 -29
msprobe/pytorch/functional/module_dump.py +0 -84
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
/msprobe/docs/{data_dump_Mindspore → data_dump_MindSpore}/dynamic_graph_quick_start_example.md +0 -0
/msprobe/{pytorch/functional → mindspore/code_mapping}/__init__.py +0 -0

msprobe/core/data_dump/data_processor/factory.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -56,7 +56,7 @@ class DataProcessorFactory:
                 FreeBenchmarkDataProcessor as PytorchFreeBenchmarkDataProcessor,
                 KernelDumpDataProcessor as PytorchKernelDumpDataProcessor
             )
-            from msprobe.pytorch.module_processer import ModuleProcesser
+            from msprobe.pytorch.dump.module_dump.module_processer import ModuleProcesser
             cls.register_processor(Const.PT_FRAMEWORK, Const.STATISTICS, PytorchStatisticsDataProcessor)
             cls.register_processor(Const.PT_FRAMEWORK, Const.TENSOR, PytorchTensorDataProcessor)
             cls.register_processor(Const.PT_FRAMEWORK, Const.OVERFLOW_CHECK, PytorchOverflowCheckDataProcessor)
@@ -67,10 +67,12 @@ class DataProcessorFactory:
             from msprobe.core.data_dump.data_processor.mindspore_processor import (
                 StatisticsDataProcessor as MindsporeStatisticsDataProcessor,
                 TensorDataProcessor as MindsporeTensorDataProcessor,
-                OverflowCheckDataProcessor as MindsporeOverflowCheckDataProcessor
+                OverflowCheckDataProcessor as MindsporeOverflowCheckDataProcessor,
+                KernelDumpDataProcessor as MindsporeKernelDumpDataProcessor
             )
             from msprobe.mindspore.cell_processor import CellProcessor
             cls.register_processor(Const.MS_FRAMEWORK, Const.STATISTICS, MindsporeStatisticsDataProcessor)
             cls.register_processor(Const.MS_FRAMEWORK, Const.TENSOR, MindsporeTensorDataProcessor)
             cls.register_processor(Const.MS_FRAMEWORK, Const.OVERFLOW_CHECK, MindsporeOverflowCheckDataProcessor)
+            cls.register_processor(Const.MS_FRAMEWORK, Const.KERNEL_DUMP, MindsporeKernelDumpDataProcessor)
             cls.register_module_processor(Const.MS_FRAMEWORK, CellProcessor)

msprobe/core/data_dump/data_processor/mindspore_processor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
+# Copyright 2024-2025 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 import zlib
 import mindspore as ms
-from mindspore import mint, ops
+from mindspore import mint, ops, hal
 from mindspore._c_expression.typing import Number
 import numpy as np
@@ -28,6 +28,12 @@ from msprobe.mindspore.common.utils import convert_bf16_to_fp32, save_tensor_as_
 from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+has_adump = True
+try:
+    from msprobe.lib import _msprobe_c
+except ImportError:
+    has_adump = False
 class MindsporeDataProcessor(BaseDataProcessor):
     mindspore_special_type = tuple([ms.Tensor, Number])
@@ -37,11 +43,12 @@ class MindsporeDataProcessor(BaseDataProcessor):
         self.mindspore_object_key = {
             "dtype": self.analyze_dtype_in_kwargs
         }
+        self._async_dump_cache = {}
     @staticmethod
     def get_md5_for_tensor(x):
         x = convert_bf16_to_fp32(x)
-        tensor_bytes = x.contiguous().asnumpy().tobytes()
+        tensor_bytes = x.asnumpy().tobytes()
         crc32_hash = zlib.crc32(tensor_bytes)
         return f"{crc32_hash:08x}"
@@ -49,22 +56,17 @@ class MindsporeDataProcessor(BaseDataProcessor):
     def analyze_dtype_in_kwargs(element):
         return {"type": "mindspore.dtype", "value": str(element)}
-    @classmethod
-    def get_special_types(cls):
-        return super().get_special_types() + cls.mindspore_special_type
-    def get_stat_info(self, data):
+    @staticmethod
+    def get_stat_info_sync(data):
         tensor_stat = TensorStatInfo()
-        if data.numel() == 0:
-            return tensor_stat
-        elif data.dtype == ms.bool_:
-            data_np = data.contiguous().asnumpy()
+        if data.dtype == ms.bool_:
+            data_np = data.asnumpy()
             tensor_stat.max = np.max(data_np).item()
             tensor_stat.min = np.min(data_np).item()
         elif not data.shape:
             tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item()
         elif data.dtype == ms.complex64 or data.dtype == ms.complex128:
-            data_abs = np.abs(data.contiguous().asnumpy())
+            data_abs = np.abs(data.asnumpy())
             tensor_stat.max = np.max(data_abs).item()
             tensor_stat.min = np.min(data_abs).item()
             tensor_stat.mean = np.mean(data_abs).item()
@@ -87,6 +89,47 @@ class MindsporeDataProcessor(BaseDataProcessor):
             api_register.norm_inner_op_set_hook_func()
         return tensor_stat
+    @staticmethod
+    def get_stat_info_async(data):
+        tensor_stat = TensorStatInfo()
+        stack_method = api_register.functional_ori_attr.get("stack", ms.ops.stack)
+        if data.dtype == ms.complex64 or data.dtype == ms.complex128:
+            logger.warning("Async dump do not support complex data!")
+            return tensor_stat
+        elif data.dtype == ms.bool_:
+            tensor_stat.stack_tensor_stat = (["Max", "Min"], stack_method([data.any(), data.all()]))
+        elif not data.shape:
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method([data, data, data, data]))
+        else:
+            if not ops.is_floating_point(data) or data.dtype == ms.float64:
+                data = data.to(ms.float32)
+            api_register.norm_inner_op_set_ori_func()
+            get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)
+            get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min)
+            get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean)
+            if hasattr(mint, "norm"):
+                get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm)
+            else:
+                get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm)
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method(
+                [get_max_value(data), get_min_value(data), get_mean_value(data), get_norm_value(data)]))
+            api_register.norm_inner_op_set_hook_func()
+        return tensor_stat
+    @classmethod
+    def get_special_types(cls):
+        return super().get_special_types() + cls.mindspore_special_type
+    def get_stat_info(self, data):
+        tensor_stat = TensorStatInfo()
+        if data.numel() == 0:
+            return tensor_stat
+        else:
+            if self.config.async_dump:
+                return MindsporeDataProcessor.get_stat_info_async(data)
+            else:
+                return MindsporeDataProcessor.get_stat_info_sync(data)
     def analyze_single_element(self, element, suffix_stack):
         if suffix_stack and suffix_stack[-1] in self.mindspore_object_key:
             return self.mindspore_object_key[suffix_stack[-1]](element)
@@ -107,13 +150,17 @@ class MindsporeDataProcessor(BaseDataProcessor):
         tensor_json = {
             'type': 'mindspore.Tensor',
             'dtype': str(tensor.dtype),
-            'shape': tensor.shape,
-            'Max': self.transfer_type(tensor_stat.max),
-            'Min': self.transfer_type(tensor_stat.min),
-            'Mean': self.transfer_type(tensor_stat.mean),
-            'Norm': self.transfer_type(tensor_stat.norm),
+            'shape': tensor.shape
         }
-        if self.config.summary_mode == Const.MD5:
+        if tensor_stat.stack_tensor_stat is None:
+            tensor_json.update({'Max': self.transfer_type(tensor_stat.max)})
+            tensor_json.update({'Min': self.transfer_type(tensor_stat.min)})
+            tensor_json.update({'Mean': self.transfer_type(tensor_stat.mean)})
+            tensor_json.update({'Norm': self.transfer_type(tensor_stat.norm)})
+        else:
+            tensor_json.update({'tensor_stat': tensor_stat.stack_tensor_stat})
+        if self.config.summary_mode == Const.MD5 and not self.config.async_dump:
             tensor_md5 = self.get_md5_for_tensor(tensor)
             tensor_json.update({Const.MD5: tensor_md5})
         return tensor_json
@@ -124,11 +171,19 @@ class StatisticsDataProcessor(MindsporeDataProcessor):
 class TensorDataProcessor(MindsporeDataProcessor):
+    def dump_async_data(self):
+        for file_path, tensor in self._async_dump_cache.items():
+            save_tensor_as_npy(tensor, file_path)
+        self._async_dump_cache.clear()
     def _analyze_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         single_arg = super()._analyze_tensor(tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
-        save_tensor_as_npy(tensor, file_path)
+        if self.config.async_dump:
+            self._async_dump_cache[file_path] = tensor.copy()
+        else:
+            save_tensor_as_npy(tensor, file_path)
         return single_arg
@@ -138,6 +193,7 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
     def __init__(self, config, data_writer):
         super().__init__(config, data_writer)
         self.has_overflow = False
+        self.cached_api_info = {}
         self.cached_tensors_and_file_paths = {}
         self.real_overflow_nums = 0
         self.overflow_nums = config.overflow_nums
@@ -150,6 +206,20 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
             return True
         return False
+    def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+        self.has_overflow = False
+        self.cached_api_info = super().analyze_forward_input(name, module, module_input_output)
+        return None
+    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+        api_info_struct = super().analyze_forward_output(name, module, module_input_output)
+        if name in self.cached_api_info and name in api_info_struct:
+            self.cached_api_info[name].update(api_info_struct[name])
+        elif name in api_info_struct:
+            self.cached_api_info = api_info_struct
+        self.maybe_save_overflow_data()
+        return self.cached_api_info if self.has_overflow else None
     def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         self.has_overflow = False
         api_info_struct = super().analyze_forward(name, module, module_input_output)
@@ -161,6 +231,12 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
         api_info_struct = super().analyze_backward(name, module, module_input_output)
         self.maybe_save_overflow_data()
         return api_info_struct if self.has_overflow else None
+    def analyze_params(self, name, param_name, grad):
+        self.has_overflow = False
+        api_info_struct = super().analyze_params(name, param_name, grad)
+        self.maybe_save_overflow_data()
+        return api_info_struct if self.has_overflow else None
     def maybe_save_overflow_data(self):
         if self.has_overflow:
@@ -190,3 +266,61 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
         self._analyze_maybe_overflow_tensor(single_arg)
         single_arg.update({"data_name": dump_data_name})
         return single_arg
+class KernelDumpDataProcessor(MindsporeDataProcessor):
+    def __init__(self, config, data_writer):
+        super().__init__(config, data_writer)
+        self.enable_kernel_dump = True
+    @staticmethod
+    def start_kernel_dump(config_path):
+        hal.synchronize()
+        _msprobe_c.init_dump()
+        _msprobe_c.set_dump(config_path)
+        hal.synchronize()
+    @staticmethod
+    def stop_kernel_dump():
+        hal.synchronize()
+        _msprobe_c.finalize_dump()
+        hal.synchronize()
+    @staticmethod
+    def _print_unsupported_log(api_name):
+        logger.warning(f"The kernel dump does not support the {api_name} API.")
+    def analyze_forward_input(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        if not has_adump:
+            logger.warning("The current msprobe package does not compile adump, and kernel dump cannot be used.")
+            self.enable_kernel_dump = False
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
+    def analyze_forward_output(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    def analyze_backward_input(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        if not has_adump:
+            logger.warning("The current msprobe package does not compile adump, and kernel dump cannot be used.")
+            self.enable_kernel_dump = False
+            return
+        self.start_kernel_dump(self.config.kernel_config_path)
+    def analyze_backward(self, name, module, module_input_output):
+        if not self.enable_kernel_dump:
+            return
+        self.enable_kernel_dump = False
+        self.stop_kernel_dump()
+        logger.info(f"The kernel data of {name} is dumped successfully.")
+    def reset_status(self):
+        self.enable_kernel_dump = True

msprobe/core/data_dump/data_processor/pytorch_processor.py CHANGED Viewed

@@ -54,6 +54,7 @@ class PytorchDataProcessor(BaseDataProcessor):
             "device": self.analyze_device_in_kwargs,
             "dtype": self.analyze_dtype_in_kwargs
         }
+        self._async_dump_cache = {}
     @staticmethod
     def get_md5_for_tensor(x):
@@ -82,49 +83,80 @@ class PytorchDataProcessor(BaseDataProcessor):
         return {"type": "torch.dtype", "value": str(element)}
     @staticmethod
-    def get_stat_info(data):
+    def get_stat_info_async(data):
         tensor_stat = TensorStatInfo()
-        if data.is_meta:
-            return tensor_stat
-        data_clone = data.detach()
-        if data_clone.numel() == 0:
+        if torch.is_complex(data):
+            logger.warning("Async dump do not support complex data!")
             return tensor_stat
-        elif data_clone.dtype == torch.bool:
-            tensor_stat.max = torch._C._VariableFunctionsClass.any(data_clone).item()
-            tensor_stat.min = torch._C._VariableFunctionsClass.all(data_clone).item()
-        elif not data_clone.shape:
-            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data_clone.item()
-        elif torch.is_complex(data_clone):
-            data_np = data_clone.cpu().numpy()
+        elif data.dtype == torch.bool:
+            tensor_stat.stack_tensor_stat = (["Max", "Min"], torch.stack(
+                [torch.any(data), torch.all(data)]))
+        elif not data.shape:
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], torch.stack([data, data, data, data]))
+        else:
+            if not data.is_floating_point() or data.dtype == torch.float64:
+                data = data.float()
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], torch.stack([
+                torch.max(data),
+                torch.min(data),
+                torch.mean(data),
+                torch.norm(data)
+            ]))
+        return tensor_stat
+    @staticmethod
+    def get_stat_info_sync(data):
+        tensor_stat = TensorStatInfo()
+        if torch.is_complex(data):
+            data_np = data.cpu().numpy()
             data_abs = np.abs(data_np)
             tensor_stat.max = np.max(data_abs).item()
             tensor_stat.min = np.min(data_abs).item()
             tensor_stat.mean = np.mean(data_abs).item()
+        elif data.dtype == torch.bool:
+            tensor_stat.max = torch.any(data).item()
+            tensor_stat.min = torch.all(data).item()
+        elif not data.shape:
+            tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item()
         else:
-            if not data_clone.is_floating_point() or data_clone.dtype == torch.float64:
-                data_clone = data_clone.float()
-            tensor_stat.max = torch._C._VariableFunctionsClass.max(data_clone).item()
-            tensor_stat.min = torch._C._VariableFunctionsClass.min(data_clone).item()
-            tensor_stat.mean = torch._C._VariableFunctionsClass.mean(data_clone).item()
-            tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item()
+            if not data.is_floating_point() or data.dtype == torch.float64:
+                data = data.float()
+            tensor_stat.max = torch.max(data).item()
+            tensor_stat.min = torch.min(data).item()
+            tensor_stat.mean = torch.mean(data).item()
+            tensor_stat.norm = torch.norm(data).item()
         return tensor_stat
+    @staticmethod
+    def get_stat_info(data, async_dump=False):
+        tensor_stat = TensorStatInfo()
+        if data.is_meta:
+            return tensor_stat
+        data_clone = data.detach()
+        if data_clone.numel() == 0:
+            return tensor_stat
+        else:
+            if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump:
+                return PytorchDataProcessor.get_stat_info_sync(data_clone)
+            else:
+                return PytorchDataProcessor.get_stat_info_async(data_clone)
     @staticmethod
     def handle_tensor_extremum_nan_inf(tensor, operator):
         data_clone = tensor.detach()
-        data_nan = torch._C._VariableFunctionsClass.isnan(data_clone)
-        if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel():
+        data_nan = torch.isnan(data_clone)
+        if int(torch.sum(data_nan)) == data_clone.numel():
             return float('nan')
-        finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone)
-        if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0:
-            finite_values = getattr(torch._C._TensorBase, "__getitem__")(data_clone, finite_mask)
-            return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \
-                torch._C._VariableFunctionsClass.min(finite_values).item()
+        finite_mask = torch.isfinite(data_clone)
+        if int(torch.sum(finite_mask)) > 0:
+            finite_values = data_clone[finite_mask]
+            return torch.max(finite_values).item() if operator == 'max' else \
+                torch.min(finite_values).item()
         else:
-            data_no_nan = getattr(torch._C._TensorBase, "__getitem__")(data_clone, ~data_nan)
-            return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \
-                torch._C._VariableFunctionsClass.min(data_no_nan).item()
+            data_no_nan = data_clone[~data_nan]
+            return torch.max(data_no_nan).item() if operator == 'max' else \
+                torch.min(data_no_nan).item()
     @staticmethod
     def process_group_hash(arg):
@@ -132,6 +164,10 @@ class PytorchDataProcessor(BaseDataProcessor):
         group_ranks_hash = hashlib.md5(str(group_ranks).encode('utf-8')).hexdigest()
         return group_ranks_hash
+    @staticmethod
+    def is_distributed_op(module):
+        return getattr(module, "op_is_distributed", False)
     @staticmethod
     def _analyze_torch_size(arg):
         return {"type": "torch.Size", "value": list(arg)}
@@ -177,26 +213,35 @@ class PytorchDataProcessor(BaseDataProcessor):
             return self._analyze_builtin(element)
         return {}
+    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+        if self.is_distributed_op(module):
+            module_input_output.update_output_with_args_and_kwargs()
+        return super().analyze_forward_output(name, module, module_input_output)
     def _analyze_tensor(self, tensor, suffix):
-        tensor_stat = self.get_stat_info(tensor)
+        tensor_stat = self.get_stat_info(tensor, self.config.async_dump)
         tensor_json = {}
         tensor_json.update({'type': 'torch.Tensor'})
         tensor_json.update({'dtype': str(tensor.dtype)})
         tensor_json.update({"shape": tensor.shape})
-        tensor_json.update({"Max": tensor_stat.max})
-        tensor_json.update({"Min": tensor_stat.min})
-        tensor_json.update({"Mean": tensor_stat.mean})
-        tensor_json.update({"Norm": tensor_stat.norm})
-        tensor_json.update({"requires_grad": tensor.requires_grad})
-        if tensor_stat.max is not None:
-            if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max):
-                tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max")
-        if tensor_stat.min is not None:
-            if np.isinf(tensor_stat.min) or np.isnan(tensor_stat.min):
-                tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min")
-        if self.config.summary_mode == Const.MD5:
+        if tensor_stat.stack_tensor_stat is None:
+            tensor_json.update({"Max": tensor_stat.max})
+            tensor_json.update({"Min": tensor_stat.min})
+            tensor_json.update({"Mean": tensor_stat.mean})
+            tensor_json.update({"Norm": tensor_stat.norm})
+            tensor_json.update({"requires_grad": tensor.requires_grad})
+            if tensor_stat.max is not None:
+                if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max):
+                    tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max")
+            if tensor_stat.min is not None:
+                if np.isinf(tensor_stat.min) or np.isnan(tensor_stat.min):
+                    tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min")
+        else:
+            tensor_json.update({"requires_grad": tensor.requires_grad})
+            tensor_json.update({"tensor_stat": tensor_stat.stack_tensor_stat})
+        if self.config.summary_mode == Const.MD5 and not self.config.async_dump:
             tensor_md5 = self.get_md5_for_tensor(tensor)
             tensor_json.update({Const.MD5: tensor_md5})
         return tensor_json
@@ -207,12 +252,20 @@ class StatisticsDataProcessor(PytorchDataProcessor):
 class TensorDataProcessor(PytorchDataProcessor):
+    def dump_async_data(self):
+        for file_path, tensor in self._async_dump_cache.items():
+            save_pt(tensor.contiguous(), file_path)
+        self._async_dump_cache.clear()
     def _analyze_tensor(self, tensor, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
-        saved_tensor = tensor.clone().contiguous().detach()
-        save_pt(saved_tensor, file_path)
         single_arg = super()._analyze_tensor(tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
+        if self.config.async_dump:
+            self._async_dump_cache[file_path] = tensor.clone().detach()
+        else:
+            saved_tensor = tensor.clone().contiguous().detach()
+            save_pt(saved_tensor, file_path)
         return single_arg
@@ -223,7 +276,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
         super().__init__(config, data_writer)
         self.has_overflow = False
         self.support_inf_nan = None
-        self.cached_inplace_api_info = {}
+        self.cached_api_info = {}
         self.cached_tensors_and_file_paths = {}
         self.bits_for_overflow = 8
         self.real_overflow_nums = 0
@@ -237,21 +290,21 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
             return True
         return False
-    def analyze_pre_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         self.has_overflow = False
         self._is_support_inf_nan()
-        self.cached_inplace_api_info = super().analyze_pre_forward_inplace(name, module_input_output)
+        self.cached_api_info = super().analyze_forward_input(name, module, module_input_output)
         return None
-    def analyze_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         self._is_support_inf_nan()
-        api_info_struct = super().analyze_forward_inplace(name, module_input_output)
-        if name in self.cached_inplace_api_info and name in api_info_struct:
-            self.cached_inplace_api_info[name].update(api_info_struct[name])
+        api_info_struct = super().analyze_forward_output(name, module, module_input_output)
+        if name in self.cached_api_info and name in api_info_struct:
+            self.cached_api_info[name].update(api_info_struct[name])
         elif name in api_info_struct:
-            self.cached_inplace_api_info = api_info_struct
+            self.cached_api_info = api_info_struct
         self.handle_overflow()
-        return self.cached_inplace_api_info if self.has_overflow else None
+        return self.cached_api_info if self.has_overflow else None
     def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         self.has_overflow = False
@@ -266,6 +319,13 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
         api_info_struct = super().analyze_backward(name, module, module_input_output)
         self.handle_overflow()
         return api_info_struct if self.has_overflow else None
+    def analyze_params(self, name, param_name, grad):
+        self.has_overflow = False
+        self._is_support_inf_nan()
+        api_info_struct = super().analyze_params(name, param_name, grad)
+        self.handle_overflow()
+        return api_info_struct if self.has_overflow else None
     def handle_overflow(self):
         if not self.support_inf_nan:
@@ -340,10 +400,10 @@ class FreeBenchmarkDataProcessor(PytorchDataProcessor):
             )
         return
-    def analyze_pre_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         self.checker.pre_forward(name, module, self, module_input_output.args, module_input_output.kwargs)
-    def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         new_output, unequal_rows = self.checker.forward(
             name,
             module,
@@ -388,7 +448,7 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
     def _print_unsupported_log(api_name):
         logger.warning(f"The kernel dump does not support the {api_name} API.")
-    def analyze_pre_forward(self, name, module, module_input_output):
+    def analyze_forward_input(self, name, module, module_input_output):
         if not self.enable_kernel_dump:
             return
         if is_gpu:
@@ -413,7 +473,7 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
             return
         self.start_kernel_dump(self.config.kernel_config_path)
-    def analyze_forward(self, name, module, module_input_output):
+    def analyze_forward_output(self, name, module, module_input_output):
         if not self.enable_kernel_dump:
             return
         if self.config.is_backward_kernel_dump:

msprobe/core/data_dump/json_writer.py CHANGED Viewed

@@ -15,10 +15,12 @@
 import csv
 import os
+import numpy as np
 from msprobe.core.common.const import Const, FileCheckConst
-from msprobe.core.common.file_utils import change_mode, FileOpen, save_json
+from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json
 from msprobe.core.common.log import logger
+from msprobe.core.common.exceptions import MsprobeException
 class DataWriter:
@@ -115,3 +117,29 @@ class DataWriter:
             self.write_stack_info_json(self.stack_file_path)
         if self.cache_construct:
             self.write_construct_info_json(self.construct_file_path)
+    def fill_stack_tensor_data(self):
+        self.process_stat_data_recursive(self.cache_data)
+    def process_stat_data_recursive(self, data, depth=0):
+        if depth > Const.MAX_DEPTH:
+            logger.error(f"The maximum depth of recursive process stat data, {Const.MAX_DEPTH} is reached.")
+            raise MsprobeException(MsprobeException.RECURSION_LIMIT_ERROR)
+        if isinstance(data, dict):
+            if "tensor_stat" in data.keys():
+                tensor_stat = data["tensor_stat"]
+                if len(tensor_stat) != Const.TENSOR_STAT_LEN or len(tensor_stat[0]) != len(tensor_stat[1]):
+                    logger.warning("Some bad data in async dump")
+                else:
+                    tensor_stat_index, tensor_stat_data = tensor_stat[0], tensor_stat[1]
+                    if hasattr(tensor_stat_data, "device") and tensor_stat_data.device != Const.CPU_LOWERCASE:
+                        tensor_stat_data = tensor_stat_data.cpu()
+                    for index, stat in zip(tensor_stat_index, tensor_stat_data):
+                        data.update({index, stat.item()})
+                del data["tensor_stat"]
+            else:
+                for key in data.keys():
+                    self.process_stat_data_recursive(data[key], depth + 1)
+        elif isinstance(data, (list, tuple)):
+            for i in data:
+                self.process_stat_data_recursive(i, depth + 1)

mindstudio-probe 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl