PyPI - mindstudio-probe - Versions diffs - 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/METADATA +4 -3
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/RECORD +243 -191
msprobe/README.md +57 -21
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +224 -82
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +5 -3
msprobe/core/common/file_utils.py +274 -40
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +148 -72
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +640 -462
msprobe/core/compare/check.py +36 -107
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +217 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +4 -1
msprobe/core/compare/merge_result/merge_result.py +12 -6
msprobe/core/compare/multiprocessing_compute.py +227 -107
msprobe/core/compare/npy_compare.py +32 -16
msprobe/core/compare/utils.py +218 -244
msprobe/{mindspore/runtime.py → core/config_check/__init__.py} +2 -4
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{pytorch/parse.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +239 -0
msprobe/core/data_dump/data_collector.py +36 -9
msprobe/core/data_dump/data_processor/base.py +74 -53
msprobe/core/data_dump/data_processor/mindspore_processor.py +119 -78
msprobe/core/data_dump/data_processor/pytorch_processor.py +134 -96
msprobe/core/data_dump/json_writer.py +146 -57
msprobe/core/debugger/precision_debugger.py +143 -0
msprobe/core/grad_probe/constant.py +2 -1
msprobe/core/grad_probe/grad_compare.py +2 -2
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/core/service.py +356 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +157 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +89 -30
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +184 -50
msprobe/docs/06.data_dump_MindSpore.md +193 -28
msprobe/docs/07.accuracy_checker_PyTorch.md +13 -3
msprobe/docs/08.accuracy_checker_online_PyTorch.md +72 -10
msprobe/docs/09.accuracy_checker_MindSpore.md +19 -7
msprobe/docs/10.accuracy_compare_PyTorch.md +266 -102
msprobe/docs/11.accuracy_compare_MindSpore.md +117 -43
msprobe/docs/12.overflow_check_PyTorch.md +5 -3
msprobe/docs/13.overflow_check_MindSpore.md +6 -4
msprobe/docs/14.data_parse_PyTorch.md +4 -10
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +3 -3
msprobe/docs/19.monitor.md +211 -103
msprobe/docs/21.visualization_PyTorch.md +100 -28
msprobe/docs/22.visualization_MindSpore.md +103 -31
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +278 -8
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +190 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +3 -3
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -55
msprobe/mindspore/api_accuracy_checker/api_runner.py +25 -11
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +580 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/data_manager.py +4 -3
msprobe/mindspore/api_accuracy_checker/generate_op_script/config_op.json +9 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +451 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +11 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +73 -2
msprobe/mindspore/common/utils.py +157 -29
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +18 -398
msprobe/mindspore/compare/ms_graph_compare.py +20 -10
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +59 -7
msprobe/mindspore/debugger/precision_debugger.py +83 -90
msprobe/mindspore/dump/cell_dump_process.py +902 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +889 -0
msprobe/mindspore/dump/dump_tool_factory.py +18 -8
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +176 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +22 -12
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +42 -26
msprobe/mindspore/dump/jit_dump.py +35 -27
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -16
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +22 -12
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +9 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/mindspore_service.py +111 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
msprobe/mindspore/monitor/features.py +13 -1
msprobe/mindspore/monitor/module_hook.py +568 -444
msprobe/mindspore/monitor/optimizer_collect.py +331 -0
msprobe/mindspore/monitor/utils.py +71 -9
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +15 -13
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +206 -4
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +9 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +6 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +31 -9
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -20
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +154 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +53 -19
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +50 -96
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +15 -61
msprobe/pytorch/dump/module_dump/module_processer.py +150 -114
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +1 -1
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +155 -0
msprobe/pytorch/hook_module/hook_module.py +18 -22
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +193 -75
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +14 -4
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +336 -241
msprobe/pytorch/monitor/module_metric.py +17 -0
msprobe/pytorch/monitor/optimizer_collect.py +244 -224
msprobe/pytorch/monitor/utils.py +84 -4
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +13 -2
msprobe/pytorch/online_dispatch/dump_compare.py +8 -2
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +5 -4
msprobe/pytorch/pt_config.py +16 -11
msprobe/pytorch/pytorch_service.py +70 -0
msprobe/visualization/builder/graph_builder.py +69 -10
msprobe/visualization/builder/msprobe_adapter.py +24 -12
msprobe/visualization/compare/graph_comparator.py +63 -51
msprobe/visualization/compare/mode_adapter.py +22 -20
msprobe/visualization/graph/base_node.py +11 -4
msprobe/visualization/graph/distributed_analyzer.py +1 -10
msprobe/visualization/graph/graph.py +2 -13
msprobe/visualization/graph/node_op.py +1 -2
msprobe/visualization/graph_service.py +251 -104
msprobe/visualization/utils.py +26 -44
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -140
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -543
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -470
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/pytorch/dump/module_dump/module_processer.py CHANGED Viewed

@@ -13,18 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from functools import wraps
+from collections import OrderedDict
 import torch
+from torch.utils.hooks import BackwardHook, RemovableHandle
 from msprobe.core.common.const import Const
 from msprobe.core.data_dump.scope import BaseScope, ModuleRangeScope, MixRangeScope
 from msprobe.pytorch.common.log import logger
-from msprobe.pytorch.common.utils import replace_last_occurrence
-from torch.utils.checkpoint import checkpoint as origin_checkpoint
-from torch.utils.checkpoint import set_checkpoint_early_stop
-from torch.utils.hooks import BackwardHook
+from msprobe.pytorch.common.utils import is_torch_nn_module, register_forward_pre_hook
+from msprobe.pytorch.dump.module_dump.hook_wrapper import wrap_setup_input_output_hook
 torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0'
+if torch_version_above_or_equal_2:
+    from torch.utils.checkpoint import checkpoint as origin_checkpoint, set_checkpoint_early_stop
 def checkpoint_without_early_stop(*args, **kwargs):
@@ -33,7 +35,18 @@ def checkpoint_without_early_stop(*args, **kwargs):
 def replace_checkpoint():
-    torch.utils.checkpoint.checkpoint = checkpoint_without_early_stop
+    if torch_version_above_or_equal_2:
+        torch.utils.checkpoint.checkpoint = checkpoint_without_early_stop
+def wrap_megatron_deallocate(func):
+    def wrapper_func(out, deallocate_pipeline_outputs=False):
+        if deallocate_pipeline_outputs and isinstance(out, torch.Tensor) and getattr(out, "_base") is not None:
+            out_clone = out.clone()
+            out.data = torch.empty((1,), device=out.device, dtype=out.dtype, )
+            return func(out_clone, deallocate_pipeline_outputs)
+        return func(out, deallocate_pipeline_outputs)
+    return wrapper_func
 class ModuleProcesser:
@@ -41,37 +54,25 @@ class ModuleProcesser:
     module_stack = []
     api_parent_node = ""
     module_node = {}
+    module_bw_hook_kernels = {}
+    module_with_backward_hook = {}
+    enable_module_dump = False
     def __init__(self, scope):
         self.scope = scope if isinstance(scope, (ModuleRangeScope, MixRangeScope)) else None
-        BackwardHook.setup_input_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_input_hook)
-        BackwardHook.setup_output_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_output_hook)
+        wrap_setup_input_output_hook()
         replace_checkpoint()
+        try:
+            from megatron.core.pipeline_parallel import schedules
+            schedules.deallocate_output_tensor = wrap_megatron_deallocate(schedules.deallocate_output_tensor)
+            logger.info_on_rank_0("Patch megatron method success.")
+        except ImportError:
+            logger.info_on_rank_0("No megatron find.")
+        except Exception as e:
+            logger.info_on_rank_0(f"Patch megatron method failed, detail:{str(e)}")
     @staticmethod
-    def clone_return_value(func):
-        @wraps(func)
-        def clone_return_value_func(*args, **kwargs):
-            result = func(*args, **kwargs)
-            return ModuleProcesser.clone_if_tensor(result)
-        return clone_return_value_func
-    @staticmethod
-    def clone_if_tensor(result):
-        if isinstance(result, torch.Tensor):
-            return result.clone()
-        elif type(result) is tuple:
-            return tuple(ModuleProcesser.clone_if_tensor(x) for x in result)
-        elif type(result) is list:
-            return list(ModuleProcesser.clone_if_tensor(x) for x in result)
-        elif type(result) is dict:
-            return {k: ModuleProcesser.clone_if_tensor(v) for k, v in result.items()}
-        else:
-            return result
-    @staticmethod
-    def module_count_func(module_name):
+    def set_and_get_calls_number(module_name):
         if module_name not in ModuleProcesser.module_count:
             ModuleProcesser.module_count[module_name] = 0
         else:
@@ -85,13 +86,19 @@ class ModuleProcesser:
             module._is_full_backward_hook is False
     @staticmethod
-    def get_modules_and_names(models):
+    def get_modules_and_names(models, recursive, module_names):
         modules_and_names_with_index = {}
         if isinstance(models, (list, tuple)):
+            if not recursive and len(module_names) != len(models):
+                return modules_and_names_with_index
             for index, model in enumerate(models):
-                modules_and_names_with_index[str(index)] = model.named_modules()
+                modules_and_names_with_index[str(index)] = model.named_modules() if recursive else \
+                    [(module_names[index], model)]
         else:
-            modules_and_names_with_index["-1"] = models.named_modules()
+            if not recursive and len(module_names) != 1:
+                return modules_and_names_with_index
+            modules_and_names_with_index["-1"] = models.named_modules() if recursive else \
+                [(module_names[0], models)]
         return modules_and_names_with_index
     @classmethod
@@ -100,105 +107,134 @@ class ModuleProcesser:
         cls.module_stack = []
         cls.api_parent_node = ""
         cls.module_node = {}
+        cls.module_bw_hook_kernels = {}
+        cls.enable_module_dump = False
+    def register_module_hook(self, models, build_hook, recursive=True, module_names=None):
+        if module_names is None:
+            module_names = []
-    def register_module_hook(self, models, build_hook):
-        logger.info_on_rank_0("The init dump is enabled, and the module dump function will not be available.")
-        modules_and_names_with_index = self.get_modules_and_names(models)
+        modules_and_names_with_index = self.get_modules_and_names(models, recursive, module_names)
         for index, modules_and_names in modules_and_names_with_index.items():
             model = models if index == "-1" else models[int(index)]
             for name, module in modules_and_names:
-                if module == model:
+                if recursive and module == model:
                     continue
+                if not is_torch_nn_module(module):
+                    logger.warning(
+                        f"The module dump does not support {type(module)} type. "
+                        f"The data dump for this module will be skipped."
+                    )
+                    continue
+                if module.__class__.__name__ == "FullyShardedDataParallel":
+                    continue
+                setattr(module, 'msprobe_hook', True)
                 module_index = (index + Const.SEP) if index != "-1" else ""
-                prefix_name = (BaseScope.Module_Type_Module + Const.SEP + module_index +
-                               name + Const.SEP + module.__class__.__name__ + Const.SEP)
-                pre_forward_hook, forward_hook, backward_hook, forward_hook_torch_version_below_2 = build_hook(
-                    BaseScope.Module_Type_Module,
-                    prefix_name
-                )
+                prefix_name = f'{BaseScope.Module_Type_Module}{Const.SEP}{module_index}{name}{Const.SEP}' + \
+                              f'{module.__class__.__name__}{Const.SEP}'
+                forward_pre_hook = self.build_module_hook(prefix_name, build_hook)
                 if self.has_register_backward_hook(module):
                     logger.warning(
                         f"The {prefix_name[:-1]} has registered deprecated register_backward_hook,"
                         f"which may cause abnormal data dump. The backward data dump for this module will be skipped."
                     )
+                    ModuleProcesser.module_with_backward_hook[prefix_name] = True
+                register_forward_pre_hook(module, forward_pre_hook)
+    def build_module_hook(self, module_name, build_data_hook):
+        def forward_pre_hook(module, args, kwargs=None):
+            if kwargs is None:
+                kwargs = {}
+            if hasattr(module, 'msprobe_module_dump') and not self.enable_module_dump:
+                return (args, kwargs) if torch_version_above_or_equal_2 else args
+            index = ModuleProcesser.set_and_get_calls_number(module_name)
+            full_forward_name = f'{module_name}{Const.FORWARD}{Const.SEP}{index}'
+            full_backward_name = f'{module_name}{Const.BACKWARD}{Const.SEP}{index}'
+            self.set_construct_info_in_pre_hook(full_forward_name)
+            if not hasattr(module, 'msprobe_forward_hook'):
+                forward_hooks_dict = getattr(module, '_forward_hooks', OrderedDict())
+                handle = RemovableHandle(forward_hooks_dict)
+                forward_hooks_dict[handle.id] = forward_hook
+                forward_hooks_dict.move_to_end(handle.id, last=False)
+                if torch_version_above_or_equal_2:
+                    forward_hooks_with_kwargs_dict = getattr(module, '_forward_hooks_with_kwargs', OrderedDict())
+                    forward_hooks_with_kwargs_dict[handle.id] = True
+                setattr(module, 'msprobe_forward_hook', True)
+            hook_set = build_data_hook(BaseScope.Module_Type_Module, full_forward_name)
+            def get_backward_pre_hook(full_backward_name):
+                def backward_pre_hook_fn(module, grad_output):
+                    self.set_construct_info_in_pre_hook(full_backward_name)
+                return backward_pre_hook_fn
+            def get_backward_hook(backward_data_hook, full_backward_name):
+                def backward_hook_fn(module, grad_input, grad_output):
+                    new_output = backward_data_hook(module, grad_input, grad_output)
+                    self.set_construct_info_in_hook(full_backward_name, is_forward=False)
+                    return new_output
+                return backward_hook_fn
+            if not ModuleProcesser.module_with_backward_hook.get(module_name):
+                backward_pre_hook = get_backward_pre_hook(full_backward_name)
+                backward_hook = get_backward_hook(hook_set.backward_hook, full_backward_name)
                 if torch_version_above_or_equal_2:
-                    module.register_forward_hook(forward_hook, with_kwargs=True)
+                    bw_hook = BackwardHook(module, [backward_hook], [backward_pre_hook])
                 else:
-                    if not self.has_register_backward_hook(module):
-                        module.register_full_backward_hook(self.node_hook(prefix_name + Const.BACKWARD, Const.STOP))
-                    module.register_forward_hook(forward_hook_torch_version_below_2)
-                if not self.has_register_backward_hook(module):
-                    module.register_full_backward_hook(backward_hook)
-                module.register_forward_pre_hook(self.node_hook(prefix_name + Const.FORWARD, Const.START))
-                module.register_forward_hook(self.node_hook(prefix_name + Const.FORWARD, Const.STOP))
-                if torch_version_above_or_equal_2 and not self.has_register_backward_hook(module):
-                    module.register_full_backward_pre_hook(self.node_hook(prefix_name + Const.BACKWARD, Const.START))
-                    module.register_full_backward_hook(self.node_hook(prefix_name + Const.BACKWARD, Const.STOP))
-    def node_hook(self, name_prefix, start_or_stop, **kwargs):
-        def pre_hook(module, input, output=None):
-            try:
-                index = ModuleProcesser.module_count_func(name_prefix)
-            except IndexError as e:
-                index = None
-                pass
-            full_name = name_prefix + Const.SEP + str(index)
-            if not hasattr(module, "mindstudio_reserved_name") or not module.mindstudio_reserved_name:
-                module.mindstudio_reserved_name = []
-            module.mindstudio_reserved_name.append(full_name)
-            if self.module_stack:
-                ModuleProcesser.module_node[full_name] = self.module_stack[-1]
+                    bw_hook = BackwardHook(module, [backward_hook])
+                ModuleProcesser.module_bw_hook_kernels[full_forward_name] = bw_hook
+                args = bw_hook.setup_input_hook(args)
+            return (args, kwargs) if torch_version_above_or_equal_2 else args
+        def forward_hook(module, args, kwargs_or_output, output_or_kwargs=None):
+            if hasattr(module, 'msprobe_module_dump') and not self.enable_module_dump:
+                return output_or_kwargs if torch_version_above_or_equal_2 else kwargs_or_output
+            index = ModuleProcesser.module_count.get(module_name)
+            full_name = f'{module_name}{Const.FORWARD}{Const.SEP}{index}'
+            hook_set = build_data_hook(BaseScope.Module_Type_Module, full_name)
+            hook_result = hook_set.forward_hook(module, args, kwargs_or_output, output_or_kwargs)
+            self.set_construct_info_in_hook(full_name)
+            if hook_result is not None:
+                result = hook_result
             else:
-                ModuleProcesser.module_node[full_name] = None
+                result = output_or_kwargs if torch_version_above_or_equal_2 else kwargs_or_output
-            ModuleProcesser.module_stack.append(full_name)
-            if self.module_stack:
-                ModuleProcesser.api_parent_node = self.module_stack[-1]
-            if self.scope:
-                self.scope.begin_module(full_name)
+            bw_hook = ModuleProcesser.module_bw_hook_kernels.get(full_name)
+            if bw_hook:
+                result = bw_hook.setup_output_hook(result)
-        def end_hook(module, input, output=None):
+            return result
+        return forward_pre_hook
+    def set_construct_info_in_pre_hook(self, full_name):
+        if self.module_stack:
+            ModuleProcesser.module_node[full_name] = self.module_stack[-1]
+        else:
+            ModuleProcesser.module_node[full_name] = None
+        ModuleProcesser.module_stack.append(full_name)
+        ModuleProcesser.api_parent_node = full_name
+        if self.scope:
+            self.scope.begin_module(full_name)
+    def set_construct_info_in_hook(self, full_name, is_forward=True):
+        if torch_version_above_or_equal_2 or is_forward:
             if self.module_stack:
                 ModuleProcesser.module_stack.pop()
-            if self.module_stack:
-                ModuleProcesser.api_parent_node = self.module_stack[-1]
-            else:
-                ModuleProcesser.api_parent_node = None
-            if not hasattr(module, "mindstudio_reserved_name") or not module.mindstudio_reserved_name:
-                raise RuntimeError(f"module reserve name is None when pop")
-            current_name = module.mindstudio_reserved_name.pop()
+            ModuleProcesser.api_parent_node = ModuleProcesser.module_stack[-1] if self.module_stack else None
             if self.scope:
-                self.scope.end_module(current_name)
-        def backward_hook(module, input, output=None):
-            try:
-                index = ModuleProcesser.module_count_func(name_prefix)
-            except IndexError as e:
-                index = None
-                pass
-            full_name = name_prefix + Const.SEP + str(index)
-            if not hasattr(module, "mindstudio_reserved_name") or not module.mindstudio_reserved_name:
-                module.mindstudio_reserved_name = []
-            module.mindstudio_reserved_name.append(full_name)
-            forward_full_name = replace_last_occurrence(full_name, Const.BACKWARD, Const.FORWARD)
-            ModuleProcesser.module_node[full_name] = replace_last_occurrence(
-                ModuleProcesser.module_node.get(forward_full_name), Const.FORWARD, Const.BACKWARD)
-            ModuleProcesser.api_parent_node = None
+                self.scope.end_module(full_name)
+        else:
             if self.scope:
                 self.scope.begin_module(full_name)
-        if torch_version_above_or_equal_2:
-            if Const.START in start_or_stop:
-                return pre_hook
-            else:
-                return end_hook
-        else:
-            if Const.FORWARD in name_prefix and Const.START in start_or_stop:
-                return pre_hook
-            elif Const.BACKWARD in name_prefix:
-                return backward_hook
-            else:
-                return end_hook
+            ModuleProcesser.api_parent_node = full_name

msprobe/pytorch/free_benchmark/common/utils.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import torch
 from msprobe.core.common.exceptions import FreeBenchmarkException
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark.common.enums import DeviceType

msprobe/pytorch/free_benchmark/compare/single_benchmark.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import math
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.utils import TorchC

msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
@@ -95,13 +95,13 @@ class AddNoiseLayer(NpuBaseLayer):
         except Exception:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"when calculate maximun value, tensor is changed to float32."
+                f"when calculating the maximum value, the tensor is changed to float32."
             )
             max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item()
         if max_val < abs_tol:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"Maximun value is less than the minimun threshold. Cancel add noise."
+                f"maximum value is less than the minimum threshold. Cancel adding noise."
             )
             return False
         return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import ThresholdConfig
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
@@ -100,13 +100,13 @@ class BitNoiseLayer(NpuBaseLayer):
         except Exception:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"when calculate maximun value, tensor is changed to float32."
+                f"when calculate the maximum value, the tensor is changed to float32."
             )
             max_val = TorchC.max(TorchC.abs(tensor_obj.to(torch.float32))).item()
         if max_val < abs_tol:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.api_name}, "
-                f"Maximun value is less than the minimun threshold. Cancel add noise."
+                f"maximum value is less than the minimum threshold. Cancel adding noise."
             )
             return False
         return True

msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 import torch
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode
 from msprobe.pytorch.free_benchmark.common.params import DataParams

msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import torch
 from msprobe.core.common.const import Const
-from msprobe.core.common.utils import recursion_depth_decorator
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.pytorch.free_benchmark import logger
 from msprobe.pytorch.free_benchmark.common.constant import CommonField
 from msprobe.pytorch.free_benchmark.common.enums import PerturbationMode

msprobe/pytorch/free_benchmark/result_handlers/check_handler.py CHANGED Viewed

@@ -49,6 +49,6 @@ class CheckerHandler(FuzzHandler):
         except Exception as e:
             logger.warning_on_rank_0(
                 f"[msprobe] Free Benchmark: For {self.params.api_name}, "
-                f"when campare the result exception raise {e}"
+                f"when comparing the results, an exception is raised: {e}"
             )
         return data_params.original_result

msprobe/pytorch/function_factory.py CHANGED Viewed

@@ -70,7 +70,7 @@ class Register(dict):
         def add_register_item(key, value):
             if key in self._dict:
-                logger.warning(f"{value.__name__} has been registered before, so we will overriden it.")
+                logger.warning(f"{value.__name__} has been registered before, so we will override it.")
             self[key] = value
             return value

msprobe/pytorch/grad_probe/grad_monitor.py CHANGED Viewed

@@ -46,7 +46,7 @@ class GradientMonitor:
         if not os.path.exists(self._output_path):
             create_directory(self._output_path)
         else:
-            logger.warning(f"the file in {self._output_path} will be recoverd")
+            logger.warning(f"the file in {self._output_path} will be deleted")
         self._step = -1
         self._param2name = defaultdict(str)
@@ -97,7 +97,7 @@ class GradientMonitor:
                 create_directory(output_dirpath)
             output_path = os.path.join(output_dirpath, f"grad_summary_{self._step}.csv")
             if os.path.exists(output_path):
-                logger.warning(f"{output_path} will be recoverd")
+                logger.warning(f"{output_path} will be deleted")
                 remove_path(output_path)
             header_result = GradStatCsv.generate_csv_header(self._level_adp, self._bounds)
             output_lines.insert(0, header_result)

msprobe/pytorch/grad_probe/grad_stat_csv.py CHANGED Viewed

@@ -17,6 +17,7 @@ from abc import ABC, abstractmethod
 from collections import namedtuple
 import hashlib
 from functools import wraps
+import zlib
 import torch
 from msprobe.core.grad_probe.constant import GradConst
@@ -74,8 +75,8 @@ class CsvMd5(CsvItem):
     def generate_csv_content(csv_content_input):
         grad = csv_content_input.grad
         tensor_bytes = grad.cpu().detach().float().numpy().tobytes()
-        md5_hash = hashlib.md5(tensor_bytes)
-        return [md5_hash.hexdigest()]
+        md5_hash = f"{zlib.crc32(tensor_bytes):08x}"
+        return [md5_hash]
 @register_csv_item(GradConst.DISTRIBUTION)

msprobe/pytorch/hook_module/api_register.py ADDED Viewed

@@ -0,0 +1,155 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import os
+import inspect
+import torch
+import torch.distributed as dist
+from msprobe.core.common.const import Const
+from msprobe.core.data_dump.api_registry import ApiRegistry
+from msprobe.pytorch.common.log import logger
+from msprobe.pytorch.common.utils import (
+    torch_without_guard_version, is_gpu, torch_device_guard, parameter_adapter
+)
+from msprobe.pytorch.function_factory import npu_custom_functions
+from msprobe.pytorch.hook_module.hook_module import HOOKModule
+from msprobe.pytorch.hook_module.utils import dynamic_import_op
+from msprobe.core.common.file_utils import load_yaml
+try:
+    import mindspeed.ops
+except ImportError:
+    mindspeed_enable = False
+else:
+    mindspeed_enable = True
+torch_version_above_2 = torch.__version__.split('+')[0] > '2.0'
+_inner_used_api = {}
+_supported_api_list_path = (os.path.join(os.path.dirname(os.path.realpath(__file__)), Const.SUPPORT_API_FILE_NAME),)
+_cuda_func_mapping = {"npu_fusion_attention": "gpu_fusion_attention"}
+_api_types = {
+    Const.PT_FRAMEWORK: {
+        Const.PT_API_TYPE_FUNCTIONAL: (torch.nn.functional, (torch.nn.functional,)),
+        Const.PT_API_TYPE_TENSOR: (torch.Tensor, (torch.Tensor,)),
+        Const.PT_API_TYPE_TORCH: (torch, (torch,)),
+        Const.PT_API_TYPE_VF: (torch._C._VariableFunctionsClass, (torch._VF,)),
+        Const.PT_API_TYPE_DIST: (dist, (dist, dist.distributed_c10d))
+    }
+}
+if not is_gpu:
+    import torch_npu
+    if torch_without_guard_version:
+        _api_types.get(Const.PT_FRAMEWORK).update(
+            {
+                Const.PT_API_TYPE_NPU: (torch.ops.npu, (torch_npu, torch.ops.npu))
+            }
+        )
+    else:
+        _api_types.get(Const.PT_FRAMEWORK).update(
+            {Const.PT_API_TYPE_NPU: (torch_npu._C._VariableFunctionsClass, (torch_npu,))}
+        )
+        _api_types.get(Const.PT_FRAMEWORK).update(
+            {
+                Const.PT_API_TYPE_NPU_DIST: (torch_npu.distributed, (torch_npu.distributed,
+                                                                     torch_npu.distributed.distributed_c10d))
+            }
+        )
+    if mindspeed_enable:
+        _api_types.get(Const.PT_FRAMEWORK).update({Const.PT_API_TYPE_MINDSPEED: (mindspeed.ops, (mindspeed.ops,))})
+        mindspeed_op_list = load_yaml(_supported_api_list_path[0]).get(Const.PT_API_TYPE_MINDSPEED)
+        mindspeed_op_file_list = [op.split(Const.SEP)[0] + Const.PY_SUFFIX for op in mindspeed_op_list]
+        dynamic_import_op(mindspeed.ops, mindspeed_op_file_list)
+@parameter_adapter
+def tensor_module_forward(module, *args, **kwargs):
+    return module.api_func(*args, **kwargs)
+def dist_module_forward(module, *args, **kwargs):
+    handle = module.api_func(*args, **kwargs)
+    try:
+        bound = inspect.signature(module.api_func).bind(*args, **kwargs)
+        bound.apply_defaults()
+        use_asyn_op_flag = bound.arguments.get("asyn_op", False)
+    except Exception as e:
+        use_asyn_op_flag = False
+        logger.warning(f"fail to get dist api's func signature because {e}, no wait")
+    if use_asyn_op_flag or module.api_name in ["isend", "irecv"]:
+        if handle and hasattr(handle, 'wait'):
+            handle.wait()
+    if module.api_name == "batch_isend_irecv":
+        if isinstance(handle, list):
+            for req in handle:
+                req.wait()
+    return handle
+def npu_module_forward(module, *args, **kwargs):
+    if not module.need_hook:
+        if module.api_name not in npu_custom_functions:
+            raise Exception(f'There is not bench function {module.api_name}')
+        if module.device == Const.CUDA_LOWERCASE:
+            module.api_name = _cuda_func_mapping.get(module.api_name, module.api_name)
+        if module.device in [Const.CUDA_LOWERCASE, Const.CPU_LOWERCASE]:
+            return npu_custom_functions[module.api_name](*args, **kwargs)
+    return module.api_func(*args, **kwargs)
+forward_methods = {
+    "Tensor": tensor_module_forward,
+    "Distributed": dist_module_forward,
+    "NPU": npu_module_forward
+}
+class ApiTemplate(HOOKModule):
+    def __init__(self, api_name, api_func, prefix, hook_build_func, need_hook=True, device=Const.CPU_LOWERCASE):
+        self.api_name = api_name
+        self.api_func = api_func
+        self.prefix = prefix
+        self.prefix_api_name = prefix + Const.SEP + str(api_name.split(Const.SEP)[-1]) + Const.SEP
+        self.need_hook = need_hook
+        self.device = device
+        if self.need_hook:
+            super().__init__(hook_build_func)
+        if prefix == Const.DIST_API_TYPE_PREFIX:
+            self.op_is_distributed = True
+    @torch_device_guard
+    def forward(self, *args, **kwargs):
+        exec_func = forward_methods.get(self.prefix)
+        exec_func = functools.partial(exec_func, self) if exec_func else self.api_func
+        return exec_func(*args, **kwargs)
+api_register = None
+def get_api_register(return_new=False):
+    if return_new:
+        return ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    global api_register
+    if api_register is None:
+        api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate)
+    return api_register

mindstudio-probe 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl

mindstudio-probe 1.2.2py3-none-any.whl → 8.1.0py3-none-any.whl