PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +143 -144
msprobe/README.md +25 -20
msprobe/core/common/const.py +110 -66
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/utils.py +30 -34
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +8 -2
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +20 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_processor/base.py +2 -2
msprobe/core/data_dump/data_processor/mindspore_processor.py +19 -32
msprobe/core/data_dump/data_processor/pytorch_processor.py +45 -15
msprobe/core/data_dump/json_writer.py +38 -35
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +2 -1
msprobe/docs/02.config_introduction.md +17 -15
msprobe/docs/05.data_dump_PyTorch.md +70 -2
msprobe/docs/06.data_dump_MindSpore.md +33 -12
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +1 -1
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +124 -62
msprobe/docs/21.visualization_PyTorch.md +32 -13
msprobe/docs/22.visualization_MindSpore.md +32 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +6 -1
msprobe/mindspore/api_accuracy_checker/api_runner.py +19 -9
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +31 -19
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +6 -4
msprobe/mindspore/debugger/precision_debugger.py +22 -10
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +14 -9
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/module_hook.py +354 -302
msprobe/mindspore/monitor/utils.py +46 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +23 -17
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +11 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/common/utils.py +29 -7
msprobe/pytorch/debugger/precision_debugger.py +10 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +12 -6
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +172 -75
msprobe/pytorch/monitor/csv2tb.py +8 -2
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +131 -105
msprobe/pytorch/monitor/module_metric.py +3 -0
msprobe/pytorch/monitor/optimizer_collect.py +55 -4
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +68 -1
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +11 -7
msprobe/pytorch/service.py +11 -8
msprobe/visualization/builder/graph_builder.py +44 -5
msprobe/visualization/builder/msprobe_adapter.py +0 -1
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +8 -1
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +1 -1
msprobe/visualization/utils.py +2 -33
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/parse.py +0 -19
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -28,7 +28,7 @@ from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.core.common.file_utils import (FileCheckConst, change_mode,
                                             check_file_or_directory_path, check_path_before_create, FileOpen)
 from msprobe.core.common.log import logger
-from msprobe.core.common.utils import check_seed_all
+from msprobe.core.common.utils import check_seed_all, is_save_variable_valid
 from packaging import version
 try:
@@ -57,7 +57,7 @@ def parameter_adapter(func):
     @wraps(func)
     def inner(self, *args, **kwargs):
-        if self.op_name_ == "__getitem__" and len(args) > 1 and isinstance(args[1], torch.Tensor):
+        if self.api_name == "__getitem__" and len(args) > 1 and isinstance(args[1], torch.Tensor):
             input_tensor = args[0]
             indices = args[1]
             if indices.dtype == torch.uint8:
@@ -77,7 +77,7 @@ def parameter_adapter(func):
                 else:
                     res = [input_tensor[tensor_index] for tensor_index in indices]
                     return getattr(torch._C._VariableFunctionsClass, "stack")(res, 0)
-        if self.op_name_ == "__eq__" and len(args) > 1 and args[1] is None:
+        if self.api_name == "__eq__" and len(args) > 1 and args[1] is None:
             return False
         return func(self, *args, **kwargs)
@@ -261,6 +261,10 @@ class Const:
     NPU = 'NPU'
     DISTRIBUTED = 'Distributed'
+    HIFLOAT8_TYPE = "torch_npu.HiFloat8Tensor"
+    FLOAT8_E5M2_TYPE = "torch.float8_e5m2"
+    FLOAT8_E4M3FN_TYPE = "torch.float8_e4m3fn"
     RAISE_PRECISION = {
         torch.float16: torch.float32,
         torch.bfloat16: torch.float32,
@@ -419,7 +423,11 @@ def is_recomputation():
         bool: True if in the re-computation phase, False otherwise.
     """
     backward_function_indices = []
-    call_stack = inspect.stack()
+    try:
+        call_stack = inspect.stack()
+    except Exception as e:
+        logger.warning(f"Failed to capture stack trace, recomputation validation may be incorrect, error info: {e}.")
+        return False
     # Identify the function 'backward' is being executed within the 'torch/_tensor.py' file.
     for frame_info in call_stack:
@@ -449,9 +457,11 @@ def is_recomputation():
 def check_save_param(variable, name, save_backward):
     # try catch this api to skip invalid call
-    if not isinstance(variable, (list, dict, torch.Tensor, int, float, str)):
+    valid_data_types = tuple([torch.Tensor, int, float, str])
+    if not is_save_variable_valid(variable, valid_data_types):
+        valid_data_types_with_nested_types = valid_data_types + (dict, tuple, list)
         logger.warning("PrecisionDebugger.save variable type not valid, "
-                       "should be one of list, dict, torch.Tensor, int, float or string. "
+                       f"should be one of {valid_data_types_with_nested_types}"
                        "Skip current save process.")
         raise ValueError
     if not isinstance(name, str):
@@ -473,3 +483,15 @@ def replace_last_occurrence(text, old, new):
     if index != -1:
         return text[:index] + text[index:].replace(old, new, 1)
     return text
+def is_hifloat8_tensor(tensor):
+    if not is_gpu and hasattr(torch_npu, "HiFloat8Tensor") and isinstance(tensor, torch_npu.HiFloat8Tensor):
+        return True
+    return False
+def is_float8_tensor(tensor):
+    if str(tensor.dtype) in [Const.FLOAT8_E5M2_TYPE, Const.FLOAT8_E4M3FN_TYPE]:
+        return True
+    return is_hifloat8_tensor(tensor)

msprobe/pytorch/debugger/precision_debugger.py CHANGED Viewed

@@ -19,7 +19,7 @@ import torch
 from msprobe.core.common.const import Const, FileCheckConst, MsgConst
 from msprobe.core.common.exceptions import MsprobeException
 from msprobe.core.common.file_utils import FileChecker
-from msprobe.core.common.utils import get_real_step_or_rank
+from msprobe.core.common.utils import get_real_step_or_rank, check_init_step
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.common.utils import check_save_param
 from msprobe.pytorch.debugger.debugger_config import DebuggerConfig
@@ -172,6 +172,15 @@ class PrecisionDebugger:
             return
         instance.service.save(variable, name, save_backward)
+    @classmethod
+    def set_init_step(cls, step):
+        instance = cls._instance
+        if not instance:
+            raise Exception(MsgConst.NOT_CREATED_INSTANCE)
+        check_init_step(step)
+        instance.service.init_step = step
+        instance.service.loop = 0
 def module_dump(module, dump_name):
     if not isinstance(module, torch.nn.Module):

msprobe/pytorch/dump/module_dump/module_dump.py CHANGED Viewed

@@ -17,7 +17,7 @@ import torch
 from msprobe.core.common.const import Const
 from msprobe.core.data_dump.scope import BaseScope
 from msprobe.pytorch.common.log import logger
-from msprobe.pytorch.hook_module.api_registry import api_register
+from msprobe.pytorch.hook_module.api_register import get_api_register
 torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0'
@@ -26,13 +26,14 @@ class ModuleDumper:
     def __init__(self, service):
         self.service = service
         self.hook_handle_list = []
+        self.api_register = get_api_register()
     def start_module_dump(self, module, dump_name):
-        api_register.api_originality()
+        self.api_register.restore_all_api()
         self.register_hook(module, dump_name)
     def stop_module_dump(self):
-        api_register.api_modularity()
+        self.api_register.register_all_api()
         for hook_handle in self.hook_handle_list:
             if isinstance(hook_handle, torch.utils.hooks.RemovableHandle):
                 hook_handle.remove()

msprobe/pytorch/dump/module_dump/module_processer.py CHANGED Viewed

@@ -16,15 +16,17 @@
 from functools import wraps
 import torch
+from torch.utils.hooks import BackwardHook
 from msprobe.core.common.const import Const
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.core.data_dump.scope import BaseScope, ModuleRangeScope, MixRangeScope
 from msprobe.pytorch.common.log import logger
-from msprobe.pytorch.common.utils import replace_last_occurrence
-from torch.utils.checkpoint import checkpoint as origin_checkpoint
-from torch.utils.checkpoint import set_checkpoint_early_stop
-from torch.utils.hooks import BackwardHook
+from msprobe.pytorch.common.utils import replace_last_occurrence, is_float8_tensor
 torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0'
+if torch_version_above_or_equal_2:
+    from torch.utils.checkpoint import checkpoint as origin_checkpoint, set_checkpoint_early_stop
 def checkpoint_without_early_stop(*args, **kwargs):
@@ -33,7 +35,8 @@ def checkpoint_without_early_stop(*args, **kwargs):
 def replace_checkpoint():
-    torch.utils.checkpoint.checkpoint = checkpoint_without_early_stop
+    if torch_version_above_or_equal_2:
+        torch.utils.checkpoint.checkpoint = checkpoint_without_early_stop
 class ModuleProcesser:
@@ -58,8 +61,9 @@ class ModuleProcesser:
         return clone_return_value_func
     @staticmethod
+    @recursion_depth_decorator("ModuleDump: ModuleProcesser.clone_if_tensor", max_depth=Const.DUMP_MAX_DEPTH)
     def clone_if_tensor(result):
-        if isinstance(result, torch.Tensor):
+        if isinstance(result, torch.Tensor) and not is_float8_tensor(result):
             return result.clone()
         elif type(result) is tuple:
             return tuple(ModuleProcesser.clone_if_tensor(x) for x in result)
@@ -109,6 +113,8 @@ class ModuleProcesser:
             for name, module in modules_and_names:
                 if module == model:
                     continue
+                if module.__class__.__name__ == "FullyShardedDataParallel":
+                    continue
                 module_index = (index + Const.SEP) if index != "-1" else ""
                 prefix_name = (BaseScope.Module_Type_Module + Const.SEP + module_index +
                                name + Const.SEP + module.__class__.__name__ + Const.SEP)

msprobe/pytorch/free_benchmark/common/utils.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import torch
 from msprobe.core.common.exceptions import FreeBenchmarkException
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark.common.enums import DeviceType

msprobe/pytorch/free_benchmark/compare/single_benchmark.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import math
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.utils import TorchC

msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
@@ -95,13 +95,13 @@ class AddNoiseLayer(NpuBaseLayer):
         except Exception:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"when calculate maximun value, tensor is changed to float32."
+                f"when calculating the maximum value, the tensor is changed to float32."
             )
             max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item()
         if max_val < abs_tol:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"Maximun value is less than the minimun threshold. Cancel add noise."
+                f"maximum value is less than the minimum threshold. Cancel adding noise."
             )
             return False
         return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
@@ -100,13 +100,13 @@ class BitNoiseLayer(NpuBaseLayer):
         except Exception:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"when calculate maximun value, tensor is changed to float32."
+                f"when calculate the maximum value, the tensor is changed to float32."
             )
             max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item()
         if max_val < abs_tol:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"Maximun value is less than the minimun threshold. Cancel add noise."
+                f"maximum value is less than the minimum threshold. Cancel adding noise."
             )
             return False
         return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
 from msprobe.pytorch.free_benchmark.common.params import DataParams

msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import torch
 from msprobe.core.common.const import Const
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import CommonField
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode

msprobe/pytorch/free_benchmark/result_handlers/check_handler.py CHANGED Viewed

@@ -49,6 +49,6 @@ class CheckerHandler(FuzzHandler):
         except Exception as e:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.params.api_name}, "
-                f"when campare the result exception raise {e}"
+                f"when comparing the results, an exception is raised: {e}"
             )
         return data_params.original_result

msprobe/pytorch/function_factory.py CHANGED Viewed

@@ -70,7 +70,7 @@ class Register(dict):
         def add_register_item(key, value):
             if key in self._dict:
-                logger.warning(f"{value.__name__} has been registered before, so we will overriden it.")
+                logger.warning(f"{value.__name__} has been registered before, so we will override it.")
             self[key] = value
             return value

msprobe/pytorch/grad_probe/grad_monitor.py CHANGED Viewed

@@ -46,7 +46,7 @@ class GradientMonitor:
         if not os.path.exists(self._output_path):
             create_directory(self._output_path)
         else:
-            logger.warning(f"the file in {self._output_path} will be recoverd")
+            logger.warning(f"the file in {self._output_path} will be deleted")
         self._step = -1
         self._param2name = defaultdict(str)
@@ -97,7 +97,7 @@ class GradientMonitor:
                 create_directory(output_dirpath)
             output_path = os.path.join(output_dirpath, f"grad_summary_{self._step}.csv")
             if os.path.exists(output_path):
-                logger.warning(f"{output_path} will be recoverd")
+                logger.warning(f"{output_path} will be deleted")
                 remove_path(output_path)
             header_result = GradStatCsv.generate_csv_header(self._level_adp, self._bounds)
             output_lines.insert(0, header_result)

msprobe/pytorch/hook_module/api_register.py ADDED Viewed

@@ -0,0 +1,131 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import os
+import torch
+import torch.distributed as dist
+from msprobe.core.common.const import Const
+from msprobe.core.data_dump.api_registry import ApiRegistry
+from msprobe.pytorch.common.utils import (
+    torch_without_guard_version, is_gpu, torch_device_guard, parameter_adapter
+)
+from msprobe.pytorch.function_factory import npu_custom_functions
+from msprobe.pytorch.hook_module.hook_module import HOOKModule
+torch_version_above_2 = torch.__version__.split('+')[0] > '2.0'
+_api_types = {
+    Const.PT_FRAMEWORK: {
+        Const.PT_API_TYPE_FUNCTIONAL: (torch.nn.functional, (torch.nn.functional,)),
+        Const.PT_API_TYPE_TENSOR: (torch.Tensor, (torch.Tensor,)),
+        Const.PT_API_TYPE_TORCH: (torch, (torch,)),
+        Const.PT_API_TYPE_VF: (torch._C._VariableFunctionsClass, (torch._VF,)),
+        Const.PT_API_TYPE_DIST: (dist, (dist, dist.distributed_c10d))
+    }
+}
+if not is_gpu:
+    import torch_npu
+    if torch_without_guard_version:
+        _api_types.get(Const.PT_FRAMEWORK).update(
+            {
+                Const.PT_API_TYPE_NPU: (torch.ops.npu, (torch_npu, torch.ops.npu))
+            }
+        )
+    else:
+        _api_types.get(Const.PT_FRAMEWORK).update(
+            {Const.PT_API_TYPE_NPU: (torch_npu._C._VariableFunctionsClass, (torch_npu,))}
+        )
+        _api_types.get(Const.PT_FRAMEWORK).update(
+            {
+                Const.PT_API_TYPE_NPU_DIST: (torch_npu.distributed, (torch_npu.distributed,
+                                                                     torch_npu.distributed.distributed_c10d))
+            }
+        )
+_inner_used_api = {}
+_supported_api_list_path = (os.path.join(os.path.dirname(os.path.realpath(__file__)), Const.SUPPORT_API_FILE_NAME),)
+_cuda_func_mapping = {"npu_fusion_attention": "gpu_fusion_attention"}
+@parameter_adapter
+def tensor_module_forward(module, *args, **kwargs):
+    return module.api_func(*args, **kwargs)
+def dist_module_forward(module, *args, **kwargs):
+    handle = module.api_func(*args, **kwargs)
+    if kwargs.get("async_op") or module.api_name in ["isend", "irecv"]:
+        if handle and hasattr(handle, 'wait'):
+            handle.wait()
+    if module.api_name == "batch_isend_irecv":
+        if isinstance(handle, list):
+            for req in handle:
+                req.wait()
+    return handle
+def npu_module_forward(module, *args, **kwargs):
+    if not module.need_hook:
+        if module.api_name not in npu_custom_functions:
+            raise Exception(f'There is not bench function {module.api_name}')
+        if module.device == Const.CUDA_LOWERCASE:
+            module.api_name = _cuda_func_mapping.get(module.api_name, module.api_name)
+        if module.device in [Const.CUDA_LOWERCASE, Const.CPU_LOWERCASE]:
+            return npu_custom_functions[module.api_name](*args, **kwargs)
+    return module.api_func(*args, **kwargs)
+forward_methods = {
+    "Tensor": tensor_module_forward,
+    "Distributed": dist_module_forward,
+    "NPU": npu_module_forward
+}
+class ApiTemplate(HOOKModule):
+    def __init__(self, api_name, api_func, prefix, hook_build_func, need_hook=True, device=Const.CPU_LOWERCASE):
+        self.api_name = api_name
+        self.api_func = api_func
+        self.prefix = prefix
+        self.prefix_api_name = prefix + Const.SEP + str(api_name.split(Const.SEP)[-1]) + Const.SEP
+        self.need_hook = need_hook
+        self.device = device
+        if self.need_hook:
+            super().__init__(hook_build_func)
+        if prefix == Const.DIST_API_TYPE_PREFIX:
+            self.op_is_distributed = True
+    @torch_device_guard
+    def forward(self, *args, **kwargs):
+        exec_func = forward_methods.get(self.prefix)
+        exec_func = functools.partial(exec_func, self) if exec_func else self.api_func
+        return exec_func(*args, **kwargs)
+api_register = None
+def get_api_register(return_new=False):
+    if return_new:
+        return ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    global api_register
+    if api_register is None:
+        api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    return api_register

msprobe/pytorch/hook_module/hook_module.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -21,6 +21,8 @@ import torch
 import torch.nn as nn
 import torch.utils.hooks as full_hooks
+from msprobe.pytorch.common.utils import is_float8_tensor
 torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0'
@@ -28,28 +30,27 @@ class HOOKModule(nn.Module):
     module_count = defaultdict(int)
     inner_stop_hook = {}
-    def __init__(self, build_hook) -> None:
+    def __init__(self, hook_build_func) -> None:
         super(HOOKModule, self).__init__()
         self.has_overflow = False
-        self.prefix = ""
         self.current_thread = threading.current_thread().ident
         if self.current_thread not in HOOKModule.inner_stop_hook:
             HOOKModule.inner_stop_hook[self.current_thread] = False
         self.stop_hook = HOOKModule.inner_stop_hook.get(self.current_thread, False)
         if not self.stop_hook:
-            if hasattr(self, "prefix_op_name_"):
-                self.prefix = self.prefix_op_name_
             self.forward_data_collected = False
-            forward_pre_hook, forward_hook, backward_hook, _ = build_hook(self.prefix)
-            if torch_version_above_or_equal_2:
-                self.register_forward_pre_hook(forward_pre_hook, with_kwargs=True)
-                self.register_forward_hook(forward_hook, with_kwargs=True)
-            else:
-                self.register_forward_pre_hook(forward_pre_hook)
-                self.register_forward_hook(forward_hook)
-            self.register_backward_hook(backward_hook)
+            prefix = self.prefix_api_name if hasattr(self, "prefix_api_name") else ""
+            if callable(hook_build_func):
+                forward_pre_hook, forward_hook, backward_hook, _ = hook_build_func(prefix)
+                if torch_version_above_or_equal_2:
+                    self.register_forward_pre_hook(forward_pre_hook, with_kwargs=True)
+                    self.register_forward_hook(forward_hook, with_kwargs=True)
+                else:
+                    self.register_forward_pre_hook(forward_pre_hook)
+                    self.register_forward_hook(forward_hook)
+                self.register_backward_hook(backward_hook)
     def __call__(self, *args, **kwargs):
         changed = False
@@ -111,6 +112,10 @@ class HOOKModule(nn.Module):
                         return result
                 else:
                     return result
+            if is_float8_tensor(var) or not (var.requires_grad and torch.is_grad_enabled()):
+                return result
             grad_fn = var.grad_fn
             if grad_fn is not None:
                 for hook in non_full_backward_hooks:

msprobe/pytorch/hook_module/register_optimizer_hook.py CHANGED Viewed

@@ -32,8 +32,9 @@ def register_optimizer_hook(data_collector):
     def patch_clip_grad(func):
         def wrapper(*args, **kwargs):
             data_collector.optimizer_status = Const.CLIP_GRAD
-            func(*args, **kwargs)
+            result = func(*args, **kwargs)
             data_collector.optimizer_status = Const.END_PREFIX + Const.CLIP_GRAD
+            return result
         return wrapper

mindstudio-probe 1.2.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 1.3.0py3-none-any.whl