PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +143 -144
msprobe/README.md +25 -20
msprobe/core/common/const.py +110 -66
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/utils.py +30 -34
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +8 -2
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +20 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_processor/base.py +2 -2
msprobe/core/data_dump/data_processor/mindspore_processor.py +19 -32
msprobe/core/data_dump/data_processor/pytorch_processor.py +45 -15
msprobe/core/data_dump/json_writer.py +38 -35
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +2 -1
msprobe/docs/02.config_introduction.md +17 -15
msprobe/docs/05.data_dump_PyTorch.md +70 -2
msprobe/docs/06.data_dump_MindSpore.md +33 -12
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +1 -1
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +124 -62
msprobe/docs/21.visualization_PyTorch.md +32 -13
msprobe/docs/22.visualization_MindSpore.md +32 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +6 -1
msprobe/mindspore/api_accuracy_checker/api_runner.py +19 -9
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +31 -19
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +6 -4
msprobe/mindspore/debugger/precision_debugger.py +22 -10
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +14 -9
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/module_hook.py +354 -302
msprobe/mindspore/monitor/utils.py +46 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +23 -17
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +11 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/common/utils.py +29 -7
msprobe/pytorch/debugger/precision_debugger.py +10 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +12 -6
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +172 -75
msprobe/pytorch/monitor/csv2tb.py +8 -2
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +131 -105
msprobe/pytorch/monitor/module_metric.py +3 -0
msprobe/pytorch/monitor/optimizer_collect.py +55 -4
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +68 -1
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +11 -7
msprobe/pytorch/service.py +11 -8
msprobe/visualization/builder/graph_builder.py +44 -5
msprobe/visualization/builder/msprobe_adapter.py +0 -1
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +8 -1
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +1 -1
msprobe/visualization/utils.py +2 -33
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/parse.py +0 -19
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/core/data_dump/api_registry.py ADDED Viewed

@@ -0,0 +1,176 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Any, Optional, Callable, Union, List, Tuple
+from msprobe.core.common.const import Const
+from msprobe.core.common.file_utils import load_yaml
+def _get_attr(module, attr_name):
+    if Const.SEP in attr_name:
+        sub_module_name, sub_attr = attr_name.rsplit(Const.SEP, 1)
+        sub_module = getattr(module, sub_module_name, None)
+        attr = getattr(sub_module, sub_attr, None)
+    else:
+        attr = getattr(module, attr_name, None)
+    return attr
+class ApiWrapper:
+    def __init__(
+        self, api_types: Dict[str, Dict[str, Any]],
+        api_list_paths: Union[str, List[str], Tuple[str]]
+    ):
+        self.api_types = api_types
+        if not isinstance(api_list_paths, (list, tuple)):
+            api_list_paths = [api_list_paths] * len(self.api_types)
+        elif len(api_list_paths) != len(self.api_types):
+            raise RuntimeError("The number of api_list_paths must be equal to the number of frameworks in 'api_types', "
+                               "when api_list_paths is a list or tuple.")
+        self.api_list_paths = api_list_paths
+        self.api_names = self._get_api_names()
+        self.wrapped_api_functions = dict()
+    def wrap_api(
+        self, api_templates, hook_build_func: Optional[Callable]
+    ):
+        api_types_num = sum([len(v) for v in self.api_types.values()])
+        if not isinstance(api_templates, (list, tuple)):
+            api_templates = [api_templates] * api_types_num
+        elif len(api_templates) != api_types_num:
+            raise RuntimeError("The number of api_templates must be equal to the number of api_types, "
+                               "when api_templates is a list or tuple.")
+        self.wrapped_api_functions.clear()
+        index = 0
+        for framework, api_types in self.api_types.items():
+            wrapped_functions_in_framework = dict()
+            for api_type, api_modules in api_types.items():
+                wrapped_functions = dict()
+                name_prefix = Const.API_DATA_PREFIX.get(framework, {}).get(api_type, "API")
+                api_template = api_templates[index]
+                index += 1
+                for api_name in self.api_names.get(framework, {}).get(api_type, []):
+                    ori_api = _get_attr(api_modules[0], api_name)
+                    if callable(ori_api):
+                        def wrap_api_func(api_name, api_func, prefix, hook_build_func, api_template):
+                            def api_function(*args, **kwargs):
+                                return api_template(api_name, api_func, prefix, hook_build_func)(*args, **kwargs)
+                            api_function.__name__ = api_name
+                            return api_function
+                        wrapped_functions[api_name] = wrap_api_func(api_name, ori_api, name_prefix,
+                                                                    hook_build_func, api_template)
+                wrapped_functions_in_framework[api_type] = wrapped_functions
+            self.wrapped_api_functions[framework] = wrapped_functions_in_framework
+        return self.wrapped_api_functions
+    def _get_api_names(self):
+        api_names = dict()
+        for index, framework in enumerate(self.api_types.keys()):
+            api_list = load_yaml(self.api_list_paths[index])
+            valid_names = dict()
+            for api_type, api_modules in self.api_types.get(framework, {}).items():
+                api_from_file = api_list.get(Const.SUPPORT_API_DICT_KEY_MAP.get(framework, {}).get(api_type), [])
+                names = set()
+                for api_name in api_from_file:
+                    target_attr = api_name
+                    target_module = api_modules[0]
+                    if Const.SEP in api_name:
+                        sub_module_name, target_attr = api_name.rsplit(Const.SEP, 1)
+                        target_module = getattr(api_modules[0], sub_module_name, None)
+                    if target_module and target_attr in dir(target_module):
+                        names.add(api_name)
+                valid_names[api_type] = names
+            api_names[framework] = valid_names
+        return api_names
+class ApiRegistry:
+    """
+    Base class for api registry.
+    """
+    def __init__(self, api_types, inner_used_api, supported_api_list_path, api_templates):
+        self.ori_api_attr = dict()
+        self.wrapped_api_attr = dict()
+        self.inner_used_ori_attr = dict()
+        self.inner_used_wrapped_attr = dict()
+        self.api_types = api_types
+        self.inner_used_api = inner_used_api
+        self.supported_api_list_path = supported_api_list_path
+        self.api_templates = api_templates
+    @staticmethod
+    def store_ori_attr(ori_api_group, api_list, api_ori_attr):
+        for api in api_list:
+            api_ori_attr[api] = _get_attr(ori_api_group, api)
+    @staticmethod
+    def set_api_attr(api_group, attr_dict):
+        for api, api_attr in attr_dict.items():
+            if Const.SEP in api:
+                sub_module_name, sub_op = api.rsplit(Const.SEP, 1)
+                sub_module = getattr(api_group, sub_module_name, None)
+                if sub_module is not None:
+                    setattr(sub_module, sub_op, api_attr)
+            else:
+                setattr(api_group, api, api_attr)
+    def register_all_api(self):
+        for framework, api_types in self.api_types.items():
+            for api_type, api_modules in api_types.items():
+                api_type_with_framework = framework + Const.SEP + api_type
+                for module in api_modules[1]:
+                    self.set_api_attr(module, self.wrapped_api_attr.get(api_type_with_framework, {}))
+    def register_inner_used_api(self):
+        for api_type in self.inner_used_api.keys():
+            self.set_api_attr(self.inner_used_api.get(api_type)[0], self.inner_used_wrapped_attr.get(api_type, {}))
+    def restore_all_api(self):
+        for framework, api_types in self.api_types.items():
+            for api_type, api_modules in api_types.items():
+                api_type_with_framework = framework + Const.SEP + api_type
+                for module in api_modules[1]:
+                    self.set_api_attr(module, self.ori_api_attr.get(api_type_with_framework, {}))
+    def restore_inner_used_api(self):
+        for api_type in self.inner_used_api.keys():
+            self.set_api_attr(self.inner_used_api.get(api_type)[0], self.inner_used_ori_attr.get(api_type, {}))
+    def initialize_hook(self, hook_build_func):
+        api_wrapper = ApiWrapper(self.api_types, self.supported_api_list_path)
+        wrapped_api_functions = api_wrapper.wrap_api(self.api_templates, hook_build_func)
+        for framework, api_types in self.api_types.items():
+            for api_type, api_modules in api_types.items():
+                ori_attr = dict()
+                self.store_ori_attr(api_modules[0], api_wrapper.api_names.get(framework).get(api_type), ori_attr)
+                api_type_with_framework = framework + Const.SEP + api_type
+                self.ori_api_attr[api_type_with_framework] = ori_attr
+                self.wrapped_api_attr[api_type_with_framework] = wrapped_api_functions.get(framework).get(api_type)
+        for inner_used_api_type, inner_used_api_list in self.inner_used_api.items():
+            ori_attr = dict()
+            wrapped_attr = dict()
+            for api_name in inner_used_api_list[1:]:
+                if self.ori_api_attr.get(inner_used_api_type, {}).get(api_name):
+                    ori_attr[api_name] = self.ori_api_attr.get(inner_used_api_type).get(api_name)
+                    wrapped_attr[api_name] = self.wrapped_api_attr.get(inner_used_api_type).get(api_name)
+            self.inner_used_ori_attr[inner_used_api_type] = ori_attr
+            self.inner_used_wrapped_attr[inner_used_api_type] = wrapped_attr

msprobe/core/data_dump/data_processor/base.py CHANGED Viewed

@@ -252,8 +252,8 @@ class BaseDataProcessor:
     @classmethod
     def recursive_apply_transform(cls, args, transform, depth=0) -> Union[dict, list, None]:
-        if depth > Const.MAX_DEPTH:
-            logger.error(f"The maximum depth of recursive transform, {Const.MAX_DEPTH} is reached.")
+        if depth > Const.DUMP_MAX_DEPTH:
+            logger.error(f"The maximum depth of recursive transform, {Const.DUMP_MAX_DEPTH} is reached.")
             raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
         if isinstance(args, cls.get_special_types()):
             arg_transform = transform(args, cls._recursive_key_stack)

msprobe/core/data_dump/data_processor/mindspore_processor.py CHANGED Viewed

@@ -26,7 +26,7 @@ from msprobe.core.data_dump.data_processor.base import (BaseDataProcessor, Tenso
 from msprobe.core.common.file_utils import path_len_exceeds_limit, save_npy
 from msprobe.mindspore.common.utils import convert_bf16_to_fp32, save_tensor_as_npy
 from msprobe.mindspore.common.log import logger
-from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
 has_adump = True
 try:
@@ -44,6 +44,7 @@ class MindsporeDataProcessor(BaseDataProcessor):
             "dtype": self.analyze_dtype_in_kwargs
         }
         self._async_dump_cache = {}
+        self.api_register = get_api_register()
     @staticmethod
     def get_md5_for_tensor(x):
@@ -74,46 +75,29 @@ class MindsporeDataProcessor(BaseDataProcessor):
         else:
             if not ops.is_floating_point(data) or data.dtype == ms.float64:
                 data = data.to(ms.float32)
-            api_register.norm_inner_op_set_ori_func()
-            get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)
-            get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min)
-            get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean)
-            if hasattr(mint, "norm"):
-                get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm)
-            else:
-                get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm)
-            tensor_stat.max = get_max_value(data).item()
-            tensor_stat.min = get_min_value(data).item()
-            tensor_stat.mean = get_mean_value(data).item()
+            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
+            tensor_stat.max = mint.max(data).item()
+            tensor_stat.min = mint.min(data).item()
+            tensor_stat.mean = mint.mean(data).item()
             tensor_stat.norm = get_norm_value(data).item()
-            api_register.norm_inner_op_set_hook_func()
         return tensor_stat
     @staticmethod
     def get_stat_info_async(data):
         tensor_stat = TensorStatInfo()
-        stack_method = api_register.functional_ori_attr.get("stack", ms.ops.stack)
         if data.dtype == ms.complex64 or data.dtype == ms.complex128:
             logger.warning("Async dump do not support complex data!")
             return tensor_stat
         elif data.dtype == ms.bool_:
-            tensor_stat.stack_tensor_stat = (["Max", "Min"], stack_method([data.any(), data.all()]))
+            tensor_stat.stack_tensor_stat = (["Max", "Min"], ops.stack([data.any(), data.all()]))
         elif not data.shape:
-            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method([data, data, data, data]))
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack([data, data, data, data]))
         else:
             if not ops.is_floating_point(data) or data.dtype == ms.float64:
                 data = data.to(ms.float32)
-            api_register.norm_inner_op_set_ori_func()
-            get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)
-            get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min)
-            get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean)
-            if hasattr(mint, "norm"):
-                get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm)
-            else:
-                get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm)
-            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method(
-                [get_max_value(data), get_min_value(data), get_mean_value(data), get_norm_value(data)]))
-            api_register.norm_inner_op_set_hook_func()
+            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack(
+                [mint.max(data), mint.min(data), mint.mean(data), get_norm_value(data)]))
         return tensor_stat
     @staticmethod
@@ -125,14 +109,17 @@ class MindsporeDataProcessor(BaseDataProcessor):
         return super().get_special_types() + cls.mindspore_special_type
     def get_stat_info(self, data):
+        self.api_register.restore_inner_used_api()
         tensor_stat = TensorStatInfo()
         if data.numel() == 0:
-            return tensor_stat
+            stat_info = tensor_stat
         else:
             if self.config.async_dump:
-                return MindsporeDataProcessor.get_stat_info_async(data)
+                stat_info = MindsporeDataProcessor.get_stat_info_async(data)
             else:
-                return MindsporeDataProcessor.get_stat_info_sync(data)
+                stat_info = MindsporeDataProcessor.get_stat_info_sync(data)
+        self.api_register.register_inner_used_api()
+        return stat_info
     def analyze_single_element(self, element, suffix_stack):
         if suffix_stack and suffix_stack[-1] in self.mindspore_object_key:
@@ -191,7 +178,7 @@ class TensorDataProcessor(MindsporeDataProcessor):
         else:
             save_tensor_as_npy(tensor, file_path)
         return single_arg
     def _analyze_numpy(self, ndarray, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         save_npy(ndarray, file_path)
@@ -244,7 +231,7 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor):
         api_info_struct = super().analyze_backward(name, module, module_input_output)
         self.maybe_save_overflow_data()
         return api_info_struct if self.has_overflow else None
     def analyze_params(self, name, param_name, grad):
         self.has_overflow = False
         api_info_struct = super().analyze_params(name, param_name, grad)

msprobe/core/data_dump/data_processor/pytorch_processor.py CHANGED Viewed

@@ -24,14 +24,15 @@ from torch import distributed as dist
 from torch.distributed.distributed_c10d import _get_default_group
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import path_len_exceeds_limit
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import convert_tuple
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
     ModuleForwardInputsOutputs, TensorStatInfo
-from msprobe.pytorch.common.utils import save_pt, load_pt
+from msprobe.pytorch.common.utils import Const as PtConst, save_pt, is_hifloat8_tensor, is_float8_tensor
 from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
-from msprobe.core.common.utils import recursion_depth_decorator
 is_gpu = False
 try:
@@ -78,14 +79,16 @@ class PytorchDataProcessor(BaseDataProcessor):
     def analyze_device_in_kwargs(element):
         single_arg = {}
         single_arg.update({'type': "torch.device"})
-        if not isinstance(element, str):
+        if isinstance(element, (int, str)):
+            single_arg.update({"value": element})
+        elif isinstance(element, torch.device):
             if hasattr(element, "index"):
                 device_value = element.type + ":" + str(element.index)
             else:
                 device_value = element.type
             single_arg.update({"value": device_value})
         else:
-            single_arg.update({"value": element})
+            logger.debug(f"Device type {type(element)} is not supported.")
         return single_arg
     @staticmethod
@@ -143,7 +146,7 @@ class PytorchDataProcessor(BaseDataProcessor):
         if data.is_meta:
             return tensor_stat
         data_clone = data.detach()
-        if data_clone.numel() == 0:
+        if not data_clone.numel() or not data_clone.data_ptr():
             return tensor_stat
         else:
             if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump:
@@ -214,6 +217,18 @@ class PytorchDataProcessor(BaseDataProcessor):
             logger.warning(f"Failed to get value of torch.distributed.ReduceOp with error info: {e}.")
         return {"type": "torch.distributed.ReduceOp", "value": op_type}
+    @staticmethod
+    def _cast_to_float_if_fp8(tensor):
+        dtype = str(tensor.dtype)
+        if is_float8_tensor(tensor):
+            dtype = PtConst.HIFLOAT8_TYPE if is_hifloat8_tensor(tensor) else dtype
+            logger.debug(
+                f"The {dtype} tensor analyzing/saving is unsupported in dump function."
+                f"Casting to float for processing."
+            )
+            tensor = tensor.float()
+        return tensor, dtype
     @classmethod
     def get_special_types(cls):
         return super().get_special_types() + cls.pytorch_special_type
@@ -228,7 +243,7 @@ class PytorchDataProcessor(BaseDataProcessor):
         if isinstance(element, dist.ProcessGroup):
             return self._analyze_process_group(element)
         if isinstance(element, dist.P2POp):
-            return self._analyze_p2pop(element)
+            return self._analyze_p2pop(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
         if isinstance(element, dist.ReduceOp):
             return self._analyze_reduce_op(element)
         converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
@@ -247,10 +262,10 @@ class PytorchDataProcessor(BaseDataProcessor):
             module_input_output.update_output_with_args_and_kwargs()
         return super().analyze_forward_output(name, module, module_input_output)
-    def _analyze_p2pop(self, arg):
+    def _analyze_p2pop(self, arg, suffix):
         p2pop_info = {"class_type": "torch.distributed.P2POp"}
         try:
-            tensor_info = self._analyze_tensor(arg.tensor, [])
+            tensor_info = self._analyze_tensor(arg.tensor, suffix)
             p2pop_info.update({"tensor": tensor_info})
             p2pop_info.update({"op": arg.op.__name__})
             p2pop_info.update({"peer": arg.peer})
@@ -263,10 +278,11 @@ class PytorchDataProcessor(BaseDataProcessor):
         return p2pop_info
     def _analyze_tensor(self, tensor, suffix):
+        tensor, dtype = self._cast_to_float_if_fp8(tensor)
         tensor_stat = self.get_stat_info(tensor, self.config.async_dump)
         tensor_json = {}
         tensor_json.update({'type': 'torch.Tensor'})
-        tensor_json.update({'dtype': str(tensor.dtype)})
+        tensor_json.update({'dtype': dtype})
         tensor_json.update({"shape": tensor.shape})
         if tensor_stat.stack_tensor_stat is None:
             tensor_json.update({"Max": tensor_stat.max})
@@ -305,13 +321,14 @@ class TensorDataProcessor(PytorchDataProcessor):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         single_arg = super()._analyze_tensor(tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
+        tensor, _ = self._cast_to_float_if_fp8(tensor)
         if self.config.async_dump:
             self._async_dump_cache[file_path] = tensor.clone().detach()
         else:
             saved_tensor = tensor.clone().contiguous().detach()
             save_pt(saved_tensor, file_path)
         return single_arg
     def _analyze_numpy(self, ndarray, suffix):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         save_pt(torch.tensor(ndarray), file_path)
@@ -383,7 +400,8 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
             self._analyze_maybe_overflow_flag()
         if self.has_overflow:
             for file_path, tensor in self.cached_tensors_and_file_paths.items():
-                save_pt(tensor, file_path)
+                tensor, _ = self._cast_to_float_if_fp8(tensor)
+                save_pt(tensor.clone().contiguous().detach(), file_path)
             self.real_overflow_nums += 1
             if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums:
                 logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, "
@@ -508,11 +526,13 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
             return
         if self.config.is_backward_kernel_dump:
-            self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
-            self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
             try:
+                self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
+                self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
                 output = module.forward(*self.forward_args, **self.forward_kwargs)
-            except Exception:
+            except Exception as e:
+                if isinstance(e, MsprobeException):
+                    logger.warning(str(e))
                 self._print_unsupported_log(name)
                 self.enable_kernel_dump = False
                 return
@@ -554,9 +574,17 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
         self.stop_kernel_dump()
         logger.info(f"The kernel data of {name} is dumped successfully.")
-    @recursion_depth_decorator("KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor")
+    @recursion_depth_decorator(
+        "KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor",
+        max_depth=Const.DUMP_MAX_DEPTH
+    )
     def clone_and_detach_tensor(self, input_params):
         if isinstance(input_params, torch.Tensor):
+            if is_float8_tensor(input_params):
+                raise MsprobeException(
+                    MsprobeException.UNSUPPORTED_TYPE_ERROR,
+                    f"L2 backward dump does not support float8 type."
+                )
             if input_params.requires_grad:
                 return input_params.clone().detach().requires_grad_()
             return input_params.clone()
@@ -571,6 +599,8 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
     def analyze_single_element(self, element, suffix_stack):
         if isinstance(element, torch.Tensor):
+            if is_float8_tensor(element):
+                return {}
             if not self.is_found_output_tensor:
                 if element.requires_grad:
                     self.forward_output_tensor = element

msprobe/core/data_dump/json_writer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -16,12 +16,14 @@
 import csv
 import os
 import copy
-import numpy as np
+import threading
 from msprobe.core.common.const import Const, FileCheckConst
 from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json
 from msprobe.core.common.log import logger
-from msprobe.core.common.exceptions import MsprobeException
+from msprobe.core.common.decorator import recursion_depth_decorator
+lock = threading.Lock()
 class DataWriter:
@@ -90,28 +92,32 @@ class DataWriter:
             self.write_json()
     def update_data(self, new_data):
-        if not isinstance(new_data, dict) or len(new_data.keys()) != 1:
-            logger.warning(f"The data info({new_data}) should be a dict with only one outer key.")
-            return
-        dump_data = self.cache_data.get(Const.DATA)
-        if not isinstance(dump_data, dict):
-            logger.warning(f"The dump data({dump_data}) should be a dict.")
-            return
-        key = next(iter(new_data.keys()))
-        if key in dump_data:
-            dump_data.get(key).update(new_data.get(key))
-        else:
-            dump_data.update(new_data)
+        with lock:
+            if not isinstance(new_data, dict) or len(new_data.keys()) != 1:
+                logger.warning(f"The data info({new_data}) should be a dict with only one outer key.")
+                return
+            dump_data = self.cache_data.get(Const.DATA)
+            if not isinstance(dump_data, dict):
+                logger.warning(f"The dump data({dump_data}) should be a dict.")
+                return
+            key = next(iter(new_data.keys()))
+            if key in dump_data:
+                dump_data.get(key).update(new_data.get(key))
+            else:
+                dump_data.update(new_data)
     def update_stack(self, new_data):
-        self.cache_stack.update(new_data)
+        with lock:
+            self.cache_stack.update(new_data)
     def update_construct(self, new_data):
-        self.cache_construct.update(new_data)
+        with lock:
+            self.cache_construct.update(new_data)
     def update_debug(self, new_data):
-        self.cache_debug['data'].update(new_data)
+        with lock:
+            self.cache_debug['data'].update(new_data)
     def write_data_json(self, file_path):
         logger.info(f"dump.json is at {os.path.dirname(os.path.dirname(file_path))}. ")
@@ -127,22 +133,21 @@ class DataWriter:
         save_json(file_path, self.cache_debug, indent=1)
     def write_json(self):
-        if self.cache_data:
-            self.write_data_json(self.dump_file_path)
-        if self.cache_stack:
-            self.write_stack_info_json(self.stack_file_path)
-        if self.cache_construct:
-            self.write_construct_info_json(self.construct_file_path)
-        if self.cache_debug:
-            self.write_debug_info_json(self.debug_file_path)
+        with lock:
+            if self.cache_data:
+                self.write_data_json(self.dump_file_path)
+            if self.cache_stack:
+                self.write_stack_info_json(self.stack_file_path)
+            if self.cache_construct:
+                self.write_construct_info_json(self.construct_file_path)
+            if self.cache_debug:
+                self.write_debug_info_json(self.debug_file_path)
     def fill_stack_tensor_data(self):
         self.process_stat_data_recursive(self.cache_data)
-    def process_stat_data_recursive(self, data, depth=0):
-        if depth > Const.MAX_DEPTH:
-            logger.error(f"The maximum depth of recursive process stat data, {Const.MAX_DEPTH} is reached.")
-            raise MsprobeException(MsprobeException.RECURSION_LIMIT_ERROR)
+    @recursion_depth_decorator("AsyncDump: DataWriter.process_stat_data_recursive", max_depth=Const.DUMP_MAX_DEPTH)
+    def process_stat_data_recursive(self, data):
         if isinstance(data, dict):
             if "tensor_stat" in data.keys():
                 tensor_stat = data["tensor_stat"]
@@ -150,14 +155,12 @@ class DataWriter:
                     logger.warning("Some bad data in async dump")
                 else:
                     tensor_stat_index, tensor_stat_data = tensor_stat[0], tensor_stat[1]
-                    if hasattr(tensor_stat_data, "device") and tensor_stat_data.device != Const.CPU_LOWERCASE:
-                        tensor_stat_data = tensor_stat_data.cpu()
                     for index, stat in zip(tensor_stat_index, tensor_stat_data):
                         data.update({index: stat.item()})
                 del data["tensor_stat"]
             else:
                 for key in data.keys():
-                    self.process_stat_data_recursive(data[key], depth + 1)
+                    self.process_stat_data_recursive(data[key])
         elif isinstance(data, (list, tuple)):
             for i in data:
-                self.process_stat_data_recursive(i, depth + 1)
+                self.process_stat_data_recursive(i)

msprobe/core/grad_probe/constant.py CHANGED Viewed

@@ -31,6 +31,7 @@ class GradConst:
     STEP = "step"
     BOUNDS = "bounds"
     OUTPUT_PATH = "output_path"
+    TIME_STAMP = "time_stamp"
     # level const
     LEVEL = "level"

msprobe/core/grad_probe/grad_compare.py CHANGED Viewed

@@ -112,7 +112,7 @@ class GradComparator:
             result.append([key] + value)
         result_csv_path = os.path.join(output_dir, "similarities.csv")
         if os.path.exists(result_csv_path):
-            logger.warning(f"{result_csv_path} will be recoverd")
+            logger.warning(f"{result_csv_path} will be deleted")
             remove_path(result_csv_path)
         write_csv(result, result_csv_path)

msprobe/core/overflow_check/abnormal_scene.py CHANGED Viewed

@@ -20,6 +20,7 @@ import numpy as np
 from msprobe.core.overflow_check.api_info import APIInfo
 from msprobe.core.overflow_check.level import OverflowLevel
 from msprobe.core.overflow_check.utils import has_nan_inf
+from msprobe.core.common.decorator import recursion_depth_decorator
 class AnomalyScene:
@@ -35,6 +36,7 @@ class AnomalyScene:
         raise NotImplementedError
     @staticmethod
+    @recursion_depth_decorator("AbnormalScene: AnomalyScene._has_anomaly")
     def _has_anomaly(data: Union[Dict, Any]) -> bool:
         """检查张量是否包含异常值"""
         if isinstance(data, dict):

msprobe/docs/01.installation.md CHANGED Viewed

@@ -16,6 +16,7 @@ pip install mindstudio-probe
 |版本|发布日期|支持 PyTorch 版本|支持 MindSpore 版本|下载链接|校验码|
 |:--:|:--:|:--:|:--:|:--:|:--:|
+|1.2.2|2025.3.03|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.2.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.2/mindstudio_probe-1.2.2-py3-none-any.whl)|961411bb460d327ea51d6ca4d0c8e8c5565f07c0852d7b8592b781ca35b87212|
 |1.2.1|2025.2.07|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.2.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.2/mindstudio_probe-1.2.1-py3-none-any.whl)|b64b342118558e0339b39237f88a49b93fd24551b0cb202c872fbfef4260c86b|
 |1.2.0|2025.1.13|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.2/mindstudio_probe-1.2.0-py3-none-any.whl)|1e3aeea1706112f6ee52fd1165037936bb209138f0b9ec42ea21e2c1c8942cdc|
 |1.1.1|2024.12.09|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.1.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.1/mindstudio_probe-1.1.1-py3-none-any.whl)|577b597555dc155b76ba1a62d575c3546004644e140a456c3ba0824d46283735|
@@ -51,7 +52,7 @@ pip install ./mindstudio_probe*.whl
 |参数|说明|是否必选|
 |--|--|:--:|
-|--include-mod|指定可选模块，可取值`adump`，表示在编whl包时加入adump模块。默认未配置该参数，表示编基础包。<br>&#8226; adump模块用于MindSpore静态图场景L2级别的dump。<br>&#8226; 仅MindSpore 2.5.0及以上版本支持adump模块。<br>&#8226; 若使用源码安装，编译环境需支持GCC 7或以上版本，和CMAKE 3.14或以上版本。<br>&#8226; 生成的whl包仅限编译时使用的python版本和处理器架构可用。|否|
+|--include-mod|指定可选模块，可取值`adump`，表示在编whl包时加入adump模块。默认未配置该参数，表示编基础包。<br>&#8226; adump模块用于MindSpore静态图场景L2级别的dump。<br>&#8226; 仅MindSpore 2.5.0及以上版本支持adump模块。<br>&#8226; 若使用源码安装，编译环境需支持GCC 7.5或以上版本，和CMAKE 3.14或以上版本。<br>&#8226; 生成的whl包仅限编译时使用的python版本和处理器架构可用。|否|
 # 特性变更说明

mindstudio-probe 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl