PyPI - mindstudio-probe - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +168 -150
msprobe/README.md +27 -22
msprobe/core/common/const.py +129 -60
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/inplace_ops.yaml +1 -0
msprobe/core/common/utils.py +43 -33
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +1 -1
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +16 -9
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +30 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_collector.py +58 -13
msprobe/core/data_dump/data_processor/base.py +94 -10
msprobe/core/data_dump/data_processor/factory.py +3 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +33 -33
msprobe/core/data_dump/data_processor/pytorch_processor.py +99 -18
msprobe/core/data_dump/json_writer.py +61 -40
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +27 -1
msprobe/docs/02.config_introduction.md +27 -23
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +103 -16
msprobe/docs/06.data_dump_MindSpore.md +76 -32
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +5 -3
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +332 -273
msprobe/docs/21.visualization_PyTorch.md +42 -13
msprobe/docs/22.visualization_MindSpore.md +43 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +301 -27
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +32 -7
msprobe/mindspore/api_accuracy_checker/api_runner.py +70 -22
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +47 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +130 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +48 -18
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +31 -6
msprobe/mindspore/debugger/precision_debugger.py +45 -14
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +21 -15
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +873 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +309 -0
msprobe/mindspore/ms_config.py +8 -2
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +114 -34
msprobe/pytorch/__init__.py +0 -1
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +12 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/{parse.py → bench_functions/mish.py} +6 -4
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +50 -0
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/utils.py +97 -4
msprobe/pytorch/debugger/debugger_config.py +19 -9
msprobe/pytorch/debugger/precision_debugger.py +24 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +21 -35
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +8 -2
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +173 -75
msprobe/pytorch/monitor/anomaly_detect.py +14 -29
msprobe/pytorch/monitor/csv2tb.py +18 -14
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +238 -193
msprobe/pytorch/monitor/module_metric.py +9 -6
msprobe/pytorch/monitor/optimizer_collect.py +100 -67
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +76 -44
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +30 -29
msprobe/pytorch/service.py +114 -32
msprobe/visualization/builder/graph_builder.py +75 -10
msprobe/visualization/builder/msprobe_adapter.py +7 -6
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +11 -3
msprobe/visualization/graph/distributed_analyzer.py +71 -3
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +4 -3
msprobe/visualization/graph_service.py +4 -5
msprobe/visualization/utils.py +12 -35
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -205
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -75
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/core/data_dump/data_processor/mindspore_processor.py CHANGED Viewed

@@ -23,10 +23,10 @@ import numpy as np
 from msprobe.core.common.const import Const
 from msprobe.core.data_dump.data_processor.base import (BaseDataProcessor, TensorStatInfo,
                                                         ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs)
-from msprobe.core.common.file_utils import path_len_exceeds_limit
+from msprobe.core.common.file_utils import path_len_exceeds_limit, save_npy
 from msprobe.mindspore.common.utils import convert_bf16_to_fp32, save_tensor_as_npy
 from msprobe.mindspore.common.log import logger
-from msprobe.mindspore.dump.hook_cell.api_registry import api_register
+from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
 has_adump = True
 try:
@@ -44,6 +44,7 @@ class MindsporeDataProcessor(BaseDataProcessor):
             "dtype": self.analyze_dtype_in_kwargs
         }
         self._async_dump_cache = {}
+        self.api_register = get_api_register()
     @staticmethod
     def get_md5_for_tensor(x):
@@ -74,61 +75,51 @@ class MindsporeDataProcessor(BaseDataProcessor):
         else:
             if not ops.is_floating_point(data) or data.dtype == ms.float64:
                 data = data.to(ms.float32)
-            api_register.norm_inner_op_set_ori_func()
-            get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)
-            get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min)
-            get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean)
-            if hasattr(mint, "norm"):
-                get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm)
-            else:
-                get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm)
-            tensor_stat.max = get_max_value(data).item()
-            tensor_stat.min = get_min_value(data).item()
-            tensor_stat.mean = get_mean_value(data).item()
+            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
+            tensor_stat.max = mint.max(data).item()
+            tensor_stat.min = mint.min(data).item()
+            tensor_stat.mean = mint.mean(data).item()
             tensor_stat.norm = get_norm_value(data).item()
-            api_register.norm_inner_op_set_hook_func()
         return tensor_stat
     @staticmethod
     def get_stat_info_async(data):
         tensor_stat = TensorStatInfo()
-        stack_method = api_register.functional_ori_attr.get("stack", ms.ops.stack)
         if data.dtype == ms.complex64 or data.dtype == ms.complex128:
             logger.warning("Async dump do not support complex data!")
             return tensor_stat
         elif data.dtype == ms.bool_:
-            tensor_stat.stack_tensor_stat = (["Max", "Min"], stack_method([data.any(), data.all()]))
+            tensor_stat.stack_tensor_stat = (["Max", "Min"], ops.stack([data.any(), data.all()]))
         elif not data.shape:
-            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method([data, data, data, data]))
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack([data, data, data, data]))
         else:
             if not ops.is_floating_point(data) or data.dtype == ms.float64:
                 data = data.to(ms.float32)
-            api_register.norm_inner_op_set_ori_func()
-            get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max)
-            get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min)
-            get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean)
-            if hasattr(mint, "norm"):
-                get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm)
-            else:
-                get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm)
-            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method(
-                [get_max_value(data), get_min_value(data), get_mean_value(data), get_norm_value(data)]))
-            api_register.norm_inner_op_set_hook_func()
+            get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm
+            tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack(
+                [mint.max(data), mint.min(data), mint.mean(data), get_norm_value(data)]))
         return tensor_stat
+    @staticmethod
+    def is_hookable_element(element):
+        return hasattr(element, "register_hook") and callable(element.register_hook)
     @classmethod
     def get_special_types(cls):
         return super().get_special_types() + cls.mindspore_special_type
     def get_stat_info(self, data):
+        self.api_register.restore_inner_used_api()
         tensor_stat = TensorStatInfo()
         if data.numel() == 0:
-            return tensor_stat
+            stat_info = tensor_stat
         else:
             if self.config.async_dump:
-                return MindsporeDataProcessor.get_stat_info_async(data)
+                stat_info = MindsporeDataProcessor.get_stat_info_async(data)
             else:
-                return MindsporeDataProcessor.get_stat_info_sync(data)
+                stat_info = MindsporeDataProcessor.get_stat_info_sync(data)
+        self.api_register.register_inner_used_api()
+        return stat_info
     def analyze_single_element(self, element, suffix_stack):
         if suffix_stack and suffix_stack[-1] in self.mindspore_object_key:
@@ -136,11 +127,13 @@ class MindsporeDataProcessor(BaseDataProcessor):
         converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
         if converted_numpy is not element:
-            return self._analyze_numpy(converted_numpy, numpy_type)
+            return {"type": numpy_type, "value": converted_numpy}
         if isinstance(element, Number):
             return self.analyze_dtype_in_kwargs(element)
         if isinstance(element, ms.Tensor):
-            return self._analyze_tensor(element, Const.SEP.join(suffix_stack))
+            return self._analyze_tensor(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
+        if isinstance(element, np.ndarray):
+            return self._analyze_numpy(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
         if isinstance(element, (bool, int, float, str, slice, type(Ellipsis))):
             return self._analyze_builtin(element)
         return {}
@@ -186,6 +179,13 @@ class TensorDataProcessor(MindsporeDataProcessor):
             save_tensor_as_npy(tensor, file_path)
         return single_arg
+    def _analyze_numpy(self, ndarray, suffix):
+        dump_data_name, file_path = self.get_save_file_path(suffix)
+        save_npy(ndarray, file_path)
+        ndarray_json = super()._analyze_numpy(ndarray, suffix)
+        ndarray_json.update({"data_name": dump_data_name})
+        return ndarray_json
 class OverflowCheckDataProcessor(MindsporeDataProcessor):
     __slots__ = ["cached_tensors_and_file_paths"]

msprobe/core/data_dump/data_processor/pytorch_processor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -21,16 +21,18 @@ from typing import List
 import numpy as np
 import torch
 from torch import distributed as dist
+from torch.distributed.distributed_c10d import _get_default_group
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import path_len_exceeds_limit
 from msprobe.core.common.log import logger
 from msprobe.core.common.utils import convert_tuple
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \
     ModuleForwardInputsOutputs, TensorStatInfo
-from msprobe.pytorch.common.utils import save_pt, load_pt
+from msprobe.pytorch.common.utils import Const as PtConst, save_pt, is_hifloat8_tensor, is_float8_tensor
 from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow
-from msprobe.core.common.utils import recursion_depth_decorator
 is_gpu = False
 try:
@@ -40,7 +42,16 @@ except ImportError:
 class PytorchDataProcessor(BaseDataProcessor):
-    pytorch_special_type = (torch.device, torch.dtype, torch.Size, torch.Tensor, torch.memory_format, dist.ProcessGroup)
+    pytorch_special_type = (
+        torch.device,
+        torch.dtype,
+        torch.Size,
+        torch.Tensor,
+        torch.memory_format,
+        dist.ProcessGroup,
+        dist.P2POp,
+        dist.ReduceOp
+    )
     memory_format = {
         torch.contiguous_format: "contiguous_format",
         torch.channels_last: "channels_last",
@@ -68,14 +79,16 @@ class PytorchDataProcessor(BaseDataProcessor):
     def analyze_device_in_kwargs(element):
         single_arg = {}
         single_arg.update({'type': "torch.device"})
-        if not isinstance(element, str):
+        if isinstance(element, (int, str)):
+            single_arg.update({"value": element})
+        elif isinstance(element, torch.device):
             if hasattr(element, "index"):
                 device_value = element.type + ":" + str(element.index)
             else:
                 device_value = element.type
             single_arg.update({"value": device_value})
         else:
-            single_arg.update({"value": element})
+            logger.debug(f"Device type {type(element)} is not supported.")
         return single_arg
     @staticmethod
@@ -133,7 +146,7 @@ class PytorchDataProcessor(BaseDataProcessor):
         if data.is_meta:
             return tensor_stat
         data_clone = data.detach()
-        if data_clone.numel() == 0:
+        if not data_clone.numel() or not data_clone.data_ptr():
             return tensor_stat
         else:
             if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump:
@@ -168,6 +181,11 @@ class PytorchDataProcessor(BaseDataProcessor):
     def is_distributed_op(module):
         return getattr(module, "op_is_distributed", False)
+    @staticmethod
+    def is_hookable_element(element):
+        return (hasattr(element, "register_hook") and callable(element.register_hook)) and \
+            (hasattr(element, "requires_grad") and element.requires_grad)
     @staticmethod
     def _analyze_torch_size(arg):
         return {"type": "torch.Size", "value": list(arg)}
@@ -176,7 +194,6 @@ class PytorchDataProcessor(BaseDataProcessor):
     def _analyze_memory_format(arg):
         # 获取内存格式
         format_type = PytorchDataProcessor.memory_format.get(arg)
         return {"type": "torch.memory_format", "format": format_type}
     @staticmethod
@@ -188,9 +205,30 @@ class PytorchDataProcessor(BaseDataProcessor):
             group_id = PytorchDataProcessor.process_group_hash(arg)
             group_info.update({"group_id": group_id})
         except Exception as e:
-            logger.warning(f"Failed to get process group(id: {group_id}) ranks info with error info: {e}.")
+            logger.warning(f"Failed to get process group ranks info with error info: {e}.")
         return group_info
+    @staticmethod
+    def _analyze_reduce_op(arg):
+        op_type = None
+        try:
+            op_type = str(arg)
+        except Exception as e:
+            logger.warning(f"Failed to get value of torch.distributed.ReduceOp with error info: {e}.")
+        return {"type": "torch.distributed.ReduceOp", "value": op_type}
+    @staticmethod
+    def _cast_to_float_if_fp8(tensor):
+        dtype = str(tensor.dtype)
+        if is_float8_tensor(tensor):
+            dtype = PtConst.HIFLOAT8_TYPE if is_hifloat8_tensor(tensor) else dtype
+            logger.debug(
+                f"The {dtype} tensor analyzing/saving is unsupported in dump function."
+                f"Casting to float for processing."
+            )
+            tensor = tensor.float()
+        return tensor, dtype
     @classmethod
     def get_special_types(cls):
         return super().get_special_types() + cls.pytorch_special_type
@@ -204,11 +242,17 @@ class PytorchDataProcessor(BaseDataProcessor):
             return self._analyze_memory_format(element)
         if isinstance(element, dist.ProcessGroup):
             return self._analyze_process_group(element)
+        if isinstance(element, dist.P2POp):
+            return self._analyze_p2pop(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
+        if isinstance(element, dist.ReduceOp):
+            return self._analyze_reduce_op(element)
         converted_numpy, numpy_type = self._convert_numpy_to_builtin(element)
         if converted_numpy is not element:
-            return self._analyze_numpy(converted_numpy, numpy_type)
+            return {"type": numpy_type, "value": converted_numpy}
         if isinstance(element, torch.Tensor):
-            return self._analyze_tensor(element, Const.SEP.join(suffix_stack))
+            return self._analyze_tensor(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
+        if isinstance(element, np.ndarray):
+            return self._analyze_numpy(element, Const.SEP.join([str(suffix) for suffix in suffix_stack]))
         if isinstance(element, (bool, int, float, str, slice, type(Ellipsis))):
             return self._analyze_builtin(element)
         return {}
@@ -218,11 +262,27 @@ class PytorchDataProcessor(BaseDataProcessor):
             module_input_output.update_output_with_args_and_kwargs()
         return super().analyze_forward_output(name, module, module_input_output)
+    def _analyze_p2pop(self, arg, suffix):
+        p2pop_info = {"class_type": "torch.distributed.P2POp"}
+        try:
+            tensor_info = self._analyze_tensor(arg.tensor, suffix)
+            p2pop_info.update({"tensor": tensor_info})
+            p2pop_info.update({"op": arg.op.__name__})
+            p2pop_info.update({"peer": arg.peer})
+            p2pop_info.update({"tag": arg.tag})
+            group_id = PytorchDataProcessor.process_group_hash(
+                arg.group) if arg.group else PytorchDataProcessor.process_group_hash(_get_default_group())
+            p2pop_info.update({"group_id": group_id})
+        except Exception as e:
+            logger.warning(f"Failed to parse the P2POp content with error info: {e}.")
+        return p2pop_info
     def _analyze_tensor(self, tensor, suffix):
+        tensor, dtype = self._cast_to_float_if_fp8(tensor)
         tensor_stat = self.get_stat_info(tensor, self.config.async_dump)
         tensor_json = {}
         tensor_json.update({'type': 'torch.Tensor'})
-        tensor_json.update({'dtype': str(tensor.dtype)})
+        tensor_json.update({'dtype': dtype})
         tensor_json.update({"shape": tensor.shape})
         if tensor_stat.stack_tensor_stat is None:
             tensor_json.update({"Max": tensor_stat.max})
@@ -261,6 +321,7 @@ class TensorDataProcessor(PytorchDataProcessor):
         dump_data_name, file_path = self.get_save_file_path(suffix)
         single_arg = super()._analyze_tensor(tensor, suffix)
         single_arg.update({"data_name": dump_data_name})
+        tensor, _ = self._cast_to_float_if_fp8(tensor)
         if self.config.async_dump:
             self._async_dump_cache[file_path] = tensor.clone().detach()
         else:
@@ -268,6 +329,13 @@ class TensorDataProcessor(PytorchDataProcessor):
             save_pt(saved_tensor, file_path)
         return single_arg
+    def _analyze_numpy(self, ndarray, suffix):
+        dump_data_name, file_path = self.get_save_file_path(suffix)
+        save_pt(torch.tensor(ndarray), file_path)
+        ndarray_json = super()._analyze_numpy(ndarray, suffix)
+        ndarray_json.update({"data_name": dump_data_name})
+        return ndarray_json
 class OverflowCheckDataProcessor(PytorchDataProcessor):
     __slots__ = ["cached_tensors_and_file_paths"]
@@ -319,7 +387,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
         api_info_struct = super().analyze_backward(name, module, module_input_output)
         self.handle_overflow()
         return api_info_struct if self.has_overflow else None
     def analyze_params(self, name, param_name, grad):
         self.has_overflow = False
         self._is_support_inf_nan()
@@ -332,7 +400,8 @@ class OverflowCheckDataProcessor(PytorchDataProcessor):
             self._analyze_maybe_overflow_flag()
         if self.has_overflow:
             for file_path, tensor in self.cached_tensors_and_file_paths.items():
-                save_pt(tensor, file_path)
+                tensor, _ = self._cast_to_float_if_fp8(tensor)
+                save_pt(tensor.clone().contiguous().detach(), file_path)
             self.real_overflow_nums += 1
             if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums:
                 logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, "
@@ -457,11 +526,13 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
             return
         if self.config.is_backward_kernel_dump:
-            self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
-            self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
             try:
+                self.forward_args = self.clone_and_detach_tensor(module_input_output.args)
+                self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs)
                 output = module.forward(*self.forward_args, **self.forward_kwargs)
-            except Exception:
+            except Exception as e:
+                if isinstance(e, MsprobeException):
+                    logger.warning(str(e))
                 self._print_unsupported_log(name)
                 self.enable_kernel_dump = False
                 return
@@ -503,9 +574,17 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
         self.stop_kernel_dump()
         logger.info(f"The kernel data of {name} is dumped successfully.")
-    @recursion_depth_decorator("KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor")
+    @recursion_depth_decorator(
+        "KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor",
+        max_depth=Const.DUMP_MAX_DEPTH
+    )
     def clone_and_detach_tensor(self, input_params):
         if isinstance(input_params, torch.Tensor):
+            if is_float8_tensor(input_params):
+                raise MsprobeException(
+                    MsprobeException.UNSUPPORTED_TYPE_ERROR,
+                    f"L2 backward dump does not support float8 type."
+                )
             if input_params.requires_grad:
                 return input_params.clone().detach().requires_grad_()
             return input_params.clone()
@@ -520,6 +599,8 @@ class KernelDumpDataProcessor(PytorchDataProcessor):
     def analyze_single_element(self, element, suffix_stack):
         if isinstance(element, torch.Tensor):
+            if is_float8_tensor(element):
+                return {}
             if not self.is_found_output_tensor:
                 if element.requires_grad:
                     self.forward_output_tensor = element

msprobe/core/data_dump/json_writer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -15,12 +15,15 @@
 import csv
 import os
-import numpy as np
+import copy
+import threading
 from msprobe.core.common.const import Const, FileCheckConst
 from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json
 from msprobe.core.common.log import logger
-from msprobe.core.common.exceptions import MsprobeException
+from msprobe.core.common.decorator import recursion_depth_decorator
+lock = threading.Lock()
 class DataWriter:
@@ -31,10 +34,12 @@ class DataWriter:
         self.construct_file_path = None
         self.free_benchmark_file_path = None
         self.dump_tensor_data_dir = None
+        self.debug_file_path = None
         self.flush_size = 1000
         self.cache_data = {}
         self.cache_stack = {}
         self.cache_construct = {}
+        self.cache_debug = {}
     @staticmethod
     def write_data_to_csv(result: list, result_header: tuple, file_path: str):
@@ -57,6 +62,13 @@ class DataWriter:
         self.cache_construct = {}
     def initialize_json_file(self, **kwargs):
+        if self.debug_file_path and not self.cache_debug:
+            # debug level case only create debug.json
+            debug_dict = copy.deepcopy(kwargs)
+            debug_dict.update({"dump_data_dir": self.dump_tensor_data_dir, Const.DATA: {}})
+            self.cache_debug = debug_dict
+            save_json(self.debug_file_path, self.cache_debug, indent=1)
+            return
         if not self.cache_data:
             kwargs.update({"dump_data_dir": self.dump_tensor_data_dir, Const.DATA: {}})
             self.cache_data = kwargs
@@ -66,13 +78,13 @@ class DataWriter:
         if not self.cache_construct:
             save_json(self.construct_file_path, self.cache_construct, indent=1)
-    def update_dump_paths(self, dump_file_path, stack_file_path, construct_file_path, dump_data_dir,
-                          free_benchmark_file_path):
-        self.dump_file_path = dump_file_path
-        self.stack_file_path = stack_file_path
-        self.construct_file_path = construct_file_path
-        self.dump_tensor_data_dir = dump_data_dir
-        self.free_benchmark_file_path = free_benchmark_file_path
+    def update_dump_paths(self, dump_path_aggregation):
+        self.dump_file_path = dump_path_aggregation.dump_file_path
+        self.stack_file_path = dump_path_aggregation.stack_file_path
+        self.construct_file_path = dump_path_aggregation.construct_file_path
+        self.dump_tensor_data_dir = dump_path_aggregation.dump_tensor_data_dir
+        self.free_benchmark_file_path = dump_path_aggregation.free_benchmark_file_path
+        self.debug_file_path = dump_path_aggregation.debug_file_path
     def flush_data_periodically(self):
         dump_data = self.cache_data.get(Const.DATA)
@@ -80,25 +92,32 @@ class DataWriter:
             self.write_json()
     def update_data(self, new_data):
-        if not isinstance(new_data, dict) or len(new_data.keys()) != 1:
-            logger.warning(f"The data info({new_data}) should be a dict with only one outer key.")
-            return
-        dump_data = self.cache_data.get(Const.DATA)
-        if not isinstance(dump_data, dict):
-            logger.warning(f"The dump data({dump_data}) should be a dict.")
-            return
-        key = next(iter(new_data.keys()))
-        if key in dump_data:
-            dump_data.get(key).update(new_data.get(key))
-        else:
-            dump_data.update(new_data)
+        with lock:
+            if not isinstance(new_data, dict) or len(new_data.keys()) != 1:
+                logger.warning(f"The data info({new_data}) should be a dict with only one outer key.")
+                return
+            dump_data = self.cache_data.get(Const.DATA)
+            if not isinstance(dump_data, dict):
+                logger.warning(f"The dump data({dump_data}) should be a dict.")
+                return
+            key = next(iter(new_data.keys()))
+            if key in dump_data:
+                dump_data.get(key).update(new_data.get(key))
+            else:
+                dump_data.update(new_data)
     def update_stack(self, new_data):
-        self.cache_stack.update(new_data)
+        with lock:
+            self.cache_stack.update(new_data)
     def update_construct(self, new_data):
-        self.cache_construct.update(new_data)
+        with lock:
+            self.cache_construct.update(new_data)
+    def update_debug(self, new_data):
+        with lock:
+            self.cache_debug['data'].update(new_data)
     def write_data_json(self, file_path):
         logger.info(f"dump.json is at {os.path.dirname(os.path.dirname(file_path))}. ")
@@ -110,21 +129,25 @@ class DataWriter:
     def write_construct_info_json(self, file_path):
         save_json(file_path, self.cache_construct, indent=1)
+    def write_debug_info_json(self, file_path):
+        save_json(file_path, self.cache_debug, indent=1)
     def write_json(self):
-        if self.cache_data:
-            self.write_data_json(self.dump_file_path)
-        if self.cache_stack:
-            self.write_stack_info_json(self.stack_file_path)
-        if self.cache_construct:
-            self.write_construct_info_json(self.construct_file_path)
+        with lock:
+            if self.cache_data:
+                self.write_data_json(self.dump_file_path)
+            if self.cache_stack:
+                self.write_stack_info_json(self.stack_file_path)
+            if self.cache_construct:
+                self.write_construct_info_json(self.construct_file_path)
+            if self.cache_debug:
+                self.write_debug_info_json(self.debug_file_path)
     def fill_stack_tensor_data(self):
         self.process_stat_data_recursive(self.cache_data)
-    def process_stat_data_recursive(self, data, depth=0):
-        if depth > Const.MAX_DEPTH:
-            logger.error(f"The maximum depth of recursive process stat data, {Const.MAX_DEPTH} is reached.")
-            raise MsprobeException(MsprobeException.RECURSION_LIMIT_ERROR)
+    @recursion_depth_decorator("AsyncDump: DataWriter.process_stat_data_recursive", max_depth=Const.DUMP_MAX_DEPTH)
+    def process_stat_data_recursive(self, data):
         if isinstance(data, dict):
             if "tensor_stat" in data.keys():
                 tensor_stat = data["tensor_stat"]
@@ -132,14 +155,12 @@ class DataWriter:
                     logger.warning("Some bad data in async dump")
                 else:
                     tensor_stat_index, tensor_stat_data = tensor_stat[0], tensor_stat[1]
-                    if hasattr(tensor_stat_data, "device") and tensor_stat_data.device != Const.CPU_LOWERCASE:
-                        tensor_stat_data = tensor_stat_data.cpu()
                     for index, stat in zip(tensor_stat_index, tensor_stat_data):
-                        data.update({index, stat.item()})
+                        data.update({index: stat.item()})
                 del data["tensor_stat"]
             else:
                 for key in data.keys():
-                    self.process_stat_data_recursive(data[key], depth + 1)
+                    self.process_stat_data_recursive(data[key])
         elif isinstance(data, (list, tuple)):
             for i in data:
-                self.process_stat_data_recursive(i, depth + 1)
+                self.process_stat_data_recursive(i)

msprobe/core/grad_probe/constant.py CHANGED Viewed

@@ -31,6 +31,7 @@ class GradConst:
     STEP = "step"
     BOUNDS = "bounds"
     OUTPUT_PATH = "output_path"
+    TIME_STAMP = "time_stamp"
     # level const
     LEVEL = "level"

msprobe/core/grad_probe/grad_compare.py CHANGED Viewed

@@ -112,7 +112,7 @@ class GradComparator:
             result.append([key] + value)
         result_csv_path = os.path.join(output_dir, "similarities.csv")
         if os.path.exists(result_csv_path):
-            logger.warning(f"{result_csv_path} will be recoverd")
+            logger.warning(f"{result_csv_path} will be deleted")
             remove_path(result_csv_path)
         write_csv(result, result_csv_path)

msprobe/core/overflow_check/abnormal_scene.py CHANGED Viewed

@@ -20,6 +20,7 @@ import numpy as np
 from msprobe.core.overflow_check.api_info import APIInfo
 from msprobe.core.overflow_check.level import OverflowLevel
 from msprobe.core.overflow_check.utils import has_nan_inf
+from msprobe.core.common.decorator import recursion_depth_decorator
 class AnomalyScene:
@@ -35,6 +36,7 @@ class AnomalyScene:
         raise NotImplementedError
     @staticmethod
+    @recursion_depth_decorator("AbnormalScene: AnomalyScene._has_anomaly")
     def _has_anomaly(data: Union[Dict, Any]) -> bool:
         """检查张量是否包含异常值"""
         if isinstance(data, dict):

msprobe/docs/01.installation.md CHANGED Viewed

@@ -16,6 +16,8 @@ pip install mindstudio-probe
 |版本|发布日期|支持 PyTorch 版本|支持 MindSpore 版本|下载链接|校验码|
 |:--:|:--:|:--:|:--:|:--:|:--:|
+|1.2.2|2025.3.03|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.2.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.2/mindstudio_probe-1.2.2-py3-none-any.whl)|961411bb460d327ea51d6ca4d0c8e8c5565f07c0852d7b8592b781ca35b87212|
+|1.2.1|2025.2.07|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.2.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.2/mindstudio_probe-1.2.1-py3-none-any.whl)|b64b342118558e0339b39237f88a49b93fd24551b0cb202c872fbfef4260c86b|
 |1.2.0|2025.1.13|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.2/mindstudio_probe-1.2.0-py3-none-any.whl)|1e3aeea1706112f6ee52fd1165037936bb209138f0b9ec42ea21e2c1c8942cdc|
 |1.1.1|2024.12.09|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.1.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.1/mindstudio_probe-1.1.1-py3-none-any.whl)|577b597555dc155b76ba1a62d575c3546004644e140a456c3ba0824d46283735|
 |1.1.0|2024.10.14|1.11/2.0/2.1/2.2|2.4.0|[mindstudio_probe-1.1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.1/mindstudio_probe-1.1.0-py3-none-any.whl)|83a5a9b7c65a357639f8c9636d88c693b4cf0eb590d4f8f5cb56395ba69b1f6d|
@@ -50,10 +52,34 @@ pip install ./mindstudio_probe*.whl
 |参数|说明|是否必选|
 |--|--|:--:|
-|--include-mod|指定可选模块，可取值`adump`，表示在编whl包时加入adump模块。默认未配置该参数，表示编基础包。<br>&#8226; adump模块用于MindSpore静态图场景L2级别的dump。<br>&#8226; 仅MindSpore 2.5.0及以上版本支持adump模块。<br>&#8226; 若使用源码安装，编译环境需支持GCC 7或以上版本，和CMAKE 3.14或以上版本。<br>&#8226; 生成的whl包仅限编译时使用的python版本和处理器架构可用。|否|
+|--include-mod|指定可选模块，可取值`adump`，表示在编whl包时加入adump模块。默认未配置该参数，表示编基础包。<br>&#8226; adump模块用于MindSpore静态图场景L2级别的dump。<br>&#8226; 仅MindSpore 2.5.0及以上版本支持adump模块。<br>&#8226; 若使用源码安装，编译环境需支持GCC 7.5或以上版本，和CMAKE 3.14或以上版本。<br>&#8226; 生成的whl包仅限编译时使用的python版本和处理器架构可用。|否|
 # 特性变更说明
+## 1.2.0
+【数据采集】
+- 模块级dump支持采集权重及权重梯度
+- 修复原地覆盖类API前向输入数据采集不正确的问题
+- seed_all接口支持控制dropout失效功能
+【精度预检】
+- MindSpore场景新增支持Tensor类的mint API的预检
+【训练状态监控】
+- 支持FSDP和ZeRO-0
+- 异常排序支持前向激活值和反向梯度
+【分级可视化构图比对】
+- 支持graph结构分页展示，支持graph批量构建和比对
+- 支持溢出检测模式
+## 1.1.1
 ## 1.1.1
 【数据采集】

mindstudio-probe 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl