PyPI - mindstudio-probe - Versions diffs - 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl - Mend

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (213) hide show

{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/METADATA +4 -2
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/RECORD +204 -152
msprobe/README.md +32 -1
msprobe/core/__init__.py +17 -0
msprobe/core/common/const.py +120 -21
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +279 -50
msprobe/core/common/framework_adapter.py +169 -0
msprobe/core/common/global_lock.py +86 -0
msprobe/core/common/runtime.py +25 -0
msprobe/core/common/utils.py +136 -45
msprobe/core/common_config.py +7 -0
msprobe/core/compare/acc_compare.py +646 -428
msprobe/core/compare/check.py +36 -103
msprobe/core/compare/compare_cli.py +4 -0
msprobe/core/compare/config.py +72 -0
msprobe/core/compare/highlight.py +215 -215
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -0
msprobe/core/compare/merge_result/merge_result.py +4 -4
msprobe/core/compare/multiprocessing_compute.py +223 -110
msprobe/core/compare/npy_compare.py +2 -4
msprobe/core/compare/utils.py +214 -244
msprobe/core/config_check/__init__.py +17 -0
msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
msprobe/core/config_check/checkers/base_checker.py +60 -0
msprobe/core/config_check/checkers/dataset_checker.py +138 -0
msprobe/core/config_check/checkers/env_args_checker.py +96 -0
msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
msprobe/core/config_check/checkers/pip_checker.py +90 -0
msprobe/core/config_check/checkers/random_checker.py +367 -0
msprobe/core/config_check/checkers/weights_checker.py +147 -0
msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
msprobe/core/config_check/config_check_cli.py +51 -0
msprobe/core/config_check/config_checker.py +100 -0
msprobe/{mindspore/runtime.py → core/config_check/resource/dependency.yaml} +7 -4
msprobe/core/config_check/resource/env.yaml +57 -0
msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
msprobe/core/config_check/utils/utils.py +107 -0
msprobe/core/data_dump/api_registry.py +67 -4
msprobe/core/data_dump/data_collector.py +170 -89
msprobe/core/data_dump/data_processor/base.py +72 -51
msprobe/core/data_dump/data_processor/mindspore_processor.py +109 -55
msprobe/core/data_dump/data_processor/pytorch_processor.py +90 -82
msprobe/core/data_dump/json_writer.py +143 -27
msprobe/core/debugger/precision_debugger.py +144 -0
msprobe/core/grad_probe/constant.py +1 -1
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/grad_probe/utils.py +1 -1
msprobe/core/hook_manager.py +242 -0
msprobe/core/monitor/anomaly_processor.py +384 -0
msprobe/core/service.py +357 -0
msprobe/core/single_save/__init__.py +0 -0
msprobe/core/single_save/single_comparator.py +243 -0
msprobe/core/single_save/single_saver.py +146 -0
msprobe/docs/01.installation.md +6 -5
msprobe/docs/02.config_introduction.md +79 -22
msprobe/docs/03.config_examples.md +1 -0
msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
msprobe/docs/05.data_dump_PyTorch.md +118 -49
msprobe/docs/06.data_dump_MindSpore.md +167 -20
msprobe/docs/07.accuracy_checker_PyTorch.md +2 -2
msprobe/docs/08.accuracy_checker_online_PyTorch.md +69 -9
msprobe/docs/09.accuracy_checker_MindSpore.md +18 -6
msprobe/docs/10.accuracy_compare_PyTorch.md +212 -74
msprobe/docs/11.accuracy_compare_MindSpore.md +87 -37
msprobe/docs/12.overflow_check_PyTorch.md +2 -2
msprobe/docs/13.overflow_check_MindSpore.md +2 -2
msprobe/docs/14.data_parse_PyTorch.md +3 -3
msprobe/docs/17.grad_probe.md +2 -1
msprobe/docs/18.online_dispatch.md +2 -2
msprobe/docs/19.monitor.md +90 -44
msprobe/docs/21.visualization_PyTorch.md +68 -15
msprobe/docs/22.visualization_MindSpore.md +71 -18
msprobe/docs/25.tool_function_introduction.md +23 -22
msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
msprobe/docs/27.dump_json_instruction.md +1 -1
msprobe/docs/28.debugger_save_instruction.md +111 -20
msprobe/docs/29.data_dump_MSAdapter.md +2 -2
msprobe/docs/30.overflow_check_MSAdapter.md +2 -2
msprobe/docs/31.config_check.md +95 -0
msprobe/docs/32.ckpt_compare.md +69 -0
msprobe/docs/33.generate_operator_MindSpore.md +181 -0
msprobe/docs/34.RL_collect.md +92 -0
msprobe/docs/35.nan_analyze.md +72 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/save_compare_result_sample.png +0 -0
msprobe/docs/img/visualization/proxy.png +0 -0
msprobe/mindspore/__init__.py +1 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +150 -58
msprobe/mindspore/api_accuracy_checker/api_runner.py +7 -3
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +47 -69
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +0 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -2
msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +460 -0
msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +9 -0
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
msprobe/mindspore/cell_processor.py +204 -33
msprobe/mindspore/code_mapping/graph_parser.py +4 -21
msprobe/mindspore/common/const.py +17 -7
msprobe/mindspore/common/utils.py +128 -11
msprobe/mindspore/compare/common_dir_compare.py +382 -0
msprobe/mindspore/compare/distributed_compare.py +2 -26
msprobe/mindspore/compare/ms_compare.py +17 -405
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/compare/utils.py +37 -0
msprobe/mindspore/debugger/debugger_config.py +53 -3
msprobe/mindspore/debugger/precision_debugger.py +72 -91
msprobe/mindspore/dump/cell_dump_process.py +877 -0
msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +864 -0
msprobe/mindspore/dump/dump_tool_factory.py +13 -5
msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
msprobe/mindspore/dump/hook_cell/api_register.py +40 -6
msprobe/mindspore/dump/hook_cell/hook_cell.py +18 -7
msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +18 -0
msprobe/mindspore/dump/jit_dump.py +21 -18
msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -15
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +12 -6
msprobe/mindspore/free_benchmark/common/utils.py +1 -1
msprobe/mindspore/grad_probe/global_context.py +7 -2
msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
msprobe/mindspore/mindspore_service.py +114 -0
msprobe/mindspore/monitor/common_func.py +52 -0
msprobe/mindspore/monitor/data_writers.py +237 -0
msprobe/mindspore/monitor/features.py +20 -7
msprobe/mindspore/monitor/module_hook.py +281 -209
msprobe/mindspore/monitor/optimizer_collect.py +334 -0
msprobe/mindspore/monitor/utils.py +25 -5
msprobe/mindspore/ms_config.py +16 -15
msprobe/mindspore/task_handler_factory.py +5 -2
msprobe/msprobe.py +19 -0
msprobe/nan_analyze/__init__.py +14 -0
msprobe/nan_analyze/analyzer.py +255 -0
msprobe/nan_analyze/graph.py +189 -0
msprobe/nan_analyze/utils.py +211 -0
msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +20 -20
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +4 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +204 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +12 -11
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +1 -0
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +8 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +2 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +156 -0
msprobe/pytorch/attl_manager.py +65 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
msprobe/pytorch/common/utils.py +26 -14
msprobe/pytorch/compare/distributed_compare.py +4 -36
msprobe/pytorch/compare/pt_compare.py +13 -84
msprobe/pytorch/compare/utils.py +47 -0
msprobe/pytorch/debugger/debugger_config.py +34 -17
msprobe/pytorch/debugger/precision_debugger.py +66 -118
msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
msprobe/pytorch/dump/module_dump/module_dump.py +11 -58
msprobe/pytorch/dump/module_dump/module_processer.py +143 -113
msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
msprobe/pytorch/hook_module/api_register.py +29 -5
msprobe/pytorch/hook_module/hook_module.py +9 -18
msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +22 -1
msprobe/pytorch/hook_module/utils.py +28 -2
msprobe/pytorch/monitor/csv2tb.py +6 -2
msprobe/pytorch/monitor/data_writers.py +259 -0
msprobe/pytorch/monitor/module_hook.py +227 -158
msprobe/pytorch/monitor/module_metric.py +14 -0
msprobe/pytorch/monitor/optimizer_collect.py +242 -270
msprobe/pytorch/monitor/utils.py +16 -3
msprobe/pytorch/online_dispatch/dispatch.py +4 -2
msprobe/pytorch/online_dispatch/dump_compare.py +5 -2
msprobe/pytorch/parse_tool/lib/utils.py +3 -3
msprobe/pytorch/pt_config.py +8 -7
msprobe/pytorch/pytorch_service.py +73 -0
msprobe/visualization/builder/graph_builder.py +33 -13
msprobe/visualization/builder/msprobe_adapter.py +24 -11
msprobe/visualization/compare/graph_comparator.py +53 -45
msprobe/visualization/compare/mode_adapter.py +31 -1
msprobe/visualization/graph/base_node.py +3 -3
msprobe/visualization/graph/graph.py +2 -2
msprobe/visualization/graph_service.py +250 -103
msprobe/visualization/utils.py +27 -11
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -106
msprobe/mindspore/monitor/anomaly_detect.py +0 -404
msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
msprobe/mindspore/service.py +0 -549
msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
msprobe/pytorch/monitor/anomaly_detect.py +0 -410
msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
msprobe/pytorch/service.py +0 -473
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/top_level.txt +0 -0
/msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
/msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
/msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0

msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py CHANGED Viewed

@@ -33,6 +33,9 @@ from msprobe.mindspore.api_accuracy_checker.multi_data_manager import MultiDataM
 from msprobe.mindspore.common.log import logger
 from msprobe.mindspore.common.const import MsCompareConst
+from msprobe.core.data_dump.data_collector import build_data_collector
+from msprobe.core.common.utils import Const, print_tools_ends_info, DumpPathAggregation
 class MultiApiAccuracyChecker(ApiAccuracyChecker):
     def __init__(self, args):
@@ -51,6 +54,12 @@ class MultiApiAccuracyChecker(ApiAccuracyChecker):
         # 初始化一个属性来存储当前的设备ID（用于日志中显示）
         self.current_device_id = None
+        self.save_error_data = args.save_error_data
+        if self.save_error_data:
+            config, dump_path_aggregation = self.init_save_error_data(args)
+            self.data_collector = build_data_collector(config)
+            self.data_collector.update_dump_paths(dump_path_aggregation)
     def process_on_device(self, device_id, api_infos, progress_queue):
         """
         在特定设备上处理一部分API。

msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py CHANGED Viewed

@@ -108,7 +108,8 @@ def delete_torch_paths():
         if count_delete_env_path >= MsCompareConst.MAX_RECURSION_DEPTH - 1:
             raise Exception(f"Please check if you have a valid PyTorch and MindTorch environment, and ensure "
-                            f"the PYTHONPATH environment variable depth does not exceed {Const.MAX_RECURSION_DEPTH}.")
+                            f"the PYTHONPATH environment variable depth does not "
+                            f"exceed {MsCompareConst.MAX_RECURSION_DEPTH}.")
 if not is_mindtorch():

msprobe/mindspore/cell_processor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -13,21 +13,50 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from msprobe.core.data_dump.scope import ModuleRangeScope, MixRangeScope
+from collections import OrderedDict
+from mindspore import Tensor
+from mindspore.common.hook_handle import HookHandle
+from mindspore.ops.operations import _inner_ops as inner
 from msprobe.core.common.const import Const
+from msprobe.core.common.exceptions import MsprobeException
+from msprobe.core.data_dump.scope import ModuleRangeScope, MixRangeScope, BaseScope
+from msprobe.mindspore.common.const import Const as MsConst
+from msprobe.mindspore.common.log import logger
+from msprobe.mindspore.common.utils import (
+    is_mindtorch,
+    get_cells_and_names_with_index,
+    has_kwargs_in_forward_hook,
+    is_graph_mode_cell_dump_allowed
+)
+from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
+from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump
+from msprobe.core.common.runtime import Runtime
+def get_cell_construct(construct):
+    def _construct(self, *args, **kwargs):
+        if hasattr(self, 'msprobe_hook'):
+            setattr(self, 'msprobe_input_kwargs', kwargs)
+        return construct(self, *args, **kwargs)
+    return _construct
 class CellProcessor:
     cell_count = {}
     cell_stack = []
-    api_parent_node = ""
+    api_parent_node = None
     module_node = {}
+    cell_bw_hook_kernels = {}
+    cell_backward_pre_hook = []
+    cell_backward_hook = []
     def __init__(self, scope):
         self.scope = scope if isinstance(scope, (ModuleRangeScope, MixRangeScope)) else None
     @staticmethod
-    def set_cell_count(cell_name):
+    def set_and_get_calls_number(cell_name):
         if cell_name not in CellProcessor.cell_count:
             CellProcessor.cell_count[cell_name] = 0
         else:
@@ -38,42 +67,184 @@ class CellProcessor:
     def reset_cell_stats(cls):
         cls.cell_count = {}
         cls.cell_stack = []
-        cls.api_parent_node = ""
+        cls.api_parent_node = None
         cls.module_node = {}
+        cls.cell_bw_hook_kernels = {}
+        cls.cell_backward_pre_hook = []
+        cls.cell_backward_hook = []
-    def node_hook(self, name_prefix, start_or_stop, **kwargs):
-        def begin_hook(cell, input_data):
-            full_name = self.set_and_get_reserved_name(cell, name_prefix, is_called_by_pre_hook=True)
-            if CellProcessor.cell_stack:
-                CellProcessor.module_node[full_name] = CellProcessor.cell_stack[-1]
-            else:
-                CellProcessor.module_node[full_name] = None
+    def register_cell_hook(self, models, build_hook, config: DebuggerConfig):
+        if not models:
+            raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR,
+                                   'The model cannot be None, when level is "L0" or "mix"')
+        is_registered = False
+        model_type = Const.MODULE if is_mindtorch() else Const.CELL
+        cells_with_index_in_pynative_mode, cells_with_index_in_graph_mode = get_cells_and_names_with_index(models)
+        construct_name = '_call_impl' if is_mindtorch() else '_run_construct'
+        for index, cells_and_names in cells_with_index_in_pynative_mode.items():
+            model = models if index == "-1" else models[int(index)]
+            for name, cell in cells_and_names:
+                if cell == model:
+                    continue
+                if not has_kwargs_in_forward_hook():
+                    if not hasattr(cell.__class__, 'msprobe_construct'):
+                        setattr(cell.__class__, 'msprobe_construct', True)
+                        if hasattr(cell.__class__, construct_name):
+                            setattr(cell.__class__, construct_name,
+                                    get_cell_construct(getattr(cell.__class__, construct_name)))
+                setattr(cell, 'msprobe_hook', True)
+                cell_index = (index + Const.SEP) if index != "-1" else ""
+                prefix = f'{model_type}{Const.SEP}{cell_index}{name}{Const.SEP}{cell.__class__.__name__}{Const.SEP}'
+                forward_pre_hook = self.build_cell_hook(prefix, build_hook)
+                cell.register_forward_pre_hook(forward_pre_hook)
+                if not is_registered:
+                    logger.info("The cell hook function is successfully mounted to the model.")
+                is_registered = True
+        if is_graph_mode_cell_dump_allowed(config):
+            cells_and_names_in_graph_mode = []
+            for index, cells_and_names in cells_with_index_in_graph_mode.items():
+                model = models if index == "-1" else models[int(index)]
+                for name, cell in cells_and_names:
+                    if cell == model:
+                        continue
+                    cell_index = (index + Const.SEP) if index != "-1" else ""
+                    cells_and_names_in_graph_mode.append((f'{cell_index}{name}', cell))
+            if cells_and_names_in_graph_mode:
+                Runtime.run_mode = MsConst.PYNATIVE_GRAPH_MODE
+                GraphModeCellDump(config, cells_and_names_in_graph_mode, strict=False).handle()
-            CellProcessor.cell_stack.append(full_name)
-            CellProcessor.api_parent_node = full_name
+    def build_cell_hook(self, cell_name, build_data_hook):
+        def forward_pre_hook(cell, args):
+            index = CellProcessor.set_and_get_calls_number(cell_name)
+            full_forward_name = f'{cell_name}{Const.FORWARD}{Const.SEP}{index}'
+            full_backward_name = f'{cell_name}{Const.BACKWARD}{Const.SEP}{index}'
-            if self.scope:
-                self.scope.begin_module(full_name)
+            self.set_construct_info_in_pre_hook(full_forward_name)
-        def end_hook(cell, input_data, output_data):
-            if CellProcessor.cell_stack:
-                CellProcessor.cell_stack.pop()
-            if CellProcessor.cell_stack:
-                CellProcessor.api_parent_node = CellProcessor.cell_stack[-1]
+            if not hasattr(cell, 'msprobe_forward_hook'):
+                if is_mindtorch():
+                    cell.register_forward_hook(forward_hook, prepend=True, with_kwargs=True)
+                else:
+                    forward_hook_dict = getattr(cell, '_forward_hook', OrderedDict())
+                    if has_kwargs_in_forward_hook():
+                        forward_hook_with_kwargs_dict = getattr(cell, '_forward_hook_with_kwargs', OrderedDict())
+                        handle = HookHandle(forward_hook_dict, extra_dict=forward_hook_with_kwargs_dict)
+                        forward_hook_with_kwargs_dict[handle.handle_id] = True
+                    else:
+                        handle = HookHandle(forward_hook_dict)
+                    forward_hook_dict[handle.handle_id] = forward_hook
+                    forward_hook_dict.move_to_end(handle.handle_id, last=False)
+                setattr(cell, 'msprobe_forward_hook', True)
+            def get_backward_hook(backward_data_hook, full_backward_name):
+                def backward_hook_fn(cell, grad_input, grad_output):
+                    new_output = backward_data_hook(cell, grad_input, grad_output)
+                    self.set_construct_info_in_hook(full_backward_name)
+                    cell.has_pre_hook_called = False
+                    return new_output
+                return backward_hook_fn
+            enable_hooked = sum(
+                [isinstance(ele, Tensor) and ele.dtype not in MsConst.NonDifferentiableType for ele in args]
+            )
+            if enable_hooked:
+                backward_hook = OrderedDict()
+                hook_set = build_data_hook(BaseScope.Module_Type_Module, full_forward_name)
+                backward_hook[full_backward_name] = get_backward_hook(hook_set.backward_hook, full_backward_name)
+                CellProcessor.cell_backward_hook.append(backward_hook)
+                bw_hook = inner.CellBackwardHook(full_backward_name, cell,
+                                                 self.cell_backward_hook[-1])
+                bw_hook.register_backward_hook()
+                CellProcessor.cell_bw_hook_kernels[full_forward_name] = bw_hook
+                args = bw_hook(*args)
+            return args
+        def forward_hook(cell, args, kwargs_or_output, output_or_kwargs=None):
+            index = CellProcessor.cell_count.get(cell_name, 0)
+            full_forward_name = f'{cell_name}{Const.FORWARD}{Const.SEP}{index}'
+            full_backward_name = f'{cell_name}{Const.BACKWARD}{Const.SEP}{index}'
+            self.set_construct_info_in_hook(full_forward_name)
+            hook_set = build_data_hook(BaseScope.Module_Type_Module, full_forward_name)
+            hook_result = hook_set.forward_hook(cell, args, kwargs_or_output, output_or_kwargs)
+            if hook_result is not None:
+                outputs = hook_result
             else:
-                CellProcessor.api_parent_node = None
+                outputs = output_or_kwargs if has_kwargs_in_forward_hook() else kwargs_or_output
+            bw_hook = CellProcessor.cell_bw_hook_kernels.get(full_forward_name)
+            if bw_hook:
+                if not isinstance(outputs, (Tensor, tuple)):
+                    logger.warning("For backward hooks to be called,"
+                                   " cell output should be a Tensor or a tuple of Tensors"
+                                   f" but received {type(outputs)}")
+                if isinstance(outputs, tuple):
+                    new_outputs = bw_hook(*outputs)
+                else:
+                    new_outputs = bw_hook(outputs)
+                if isinstance(outputs, tuple) and len(outputs) == 1:
+                    new_outputs = (new_outputs,)
+                outputs = new_outputs
+            def get_backward_pre_hook(full_backward_name, backward_data_hook):
+                def backward_pre_hook_fn(cell, grad_output):
+                    cell.has_pre_hook_called = True
+                    self.set_construct_info_in_pre_hook(full_backward_name)
+                    if backward_data_hook:
+                        backward_data_hook(cell, (), grad_output)
+                        self.set_construct_info_in_hook(full_backward_name)
+                        cell.has_pre_hook_called = False
+                return backward_pre_hook_fn
-            if self.scope:
-                self.scope.end_module(cell.mindstudio_reserved_name)
+            backward_pre_hook = OrderedDict()
+            backward_data_hook = None if bw_hook else hook_set.backward_hook
+            backward_pre_hook[full_backward_name] = get_backward_pre_hook(full_backward_name, backward_data_hook)
+            CellProcessor.cell_backward_pre_hook.append(backward_pre_hook)
+            bw_pre_hook = inner.CellBackwardHook(full_backward_name, cell,
+                                                 self.cell_backward_pre_hook[-1])
+            bw_pre_hook.register_backward_pre_hook()
-        return begin_hook if Const.START == start_or_stop else end_hook
+            if isinstance(outputs, tuple):
+                result = bw_pre_hook(*outputs)
+            else:
+                result = bw_pre_hook(outputs)
+            if isinstance(outputs, tuple):
+                if len(outputs) == 1:
+                    result = (result,)
+                if len(result) != len(outputs):
+                    raise TypeError(
+                        f"The backward pre hook return value size is {len(result)} "
+                        f"not equal to output size {len(outputs)}"
+                    )
+            return result
+        return forward_pre_hook
-    def set_and_get_reserved_name(self, cell, cell_name, is_called_by_pre_hook=False):
-        if not is_called_by_pre_hook and hasattr(cell, 'has_pre_hook_called') and cell.has_pre_hook_called:
-            cell.has_pre_hook_called = False
+    def set_construct_info_in_pre_hook(self, full_name):
+        if self.cell_stack:
+            CellProcessor.module_node[full_name] = self.cell_stack[-1]
         else:
-            if is_called_by_pre_hook:
-                cell.has_pre_hook_called = True
-            index = self.set_cell_count(cell_name)
-            cell.mindstudio_reserved_name = cell_name + Const.SEP + str(index)
-        return cell.mindstudio_reserved_name
+            CellProcessor.module_node[full_name] = None
+        CellProcessor.cell_stack.append(full_name)
+        CellProcessor.api_parent_node = full_name
+        if self.scope:
+            self.scope.begin_module(full_name)
+    def set_construct_info_in_hook(self, full_name):
+        if self.cell_stack:
+            CellProcessor.cell_stack.pop()
+        CellProcessor.api_parent_node = CellProcessor.cell_stack[-1] if self.cell_stack else None
+        if self.scope:
+            self.scope.end_module(full_name)

msprobe/mindspore/code_mapping/graph_parser.py CHANGED Viewed

@@ -34,19 +34,6 @@ class Parser:
             if isinstance(subgraph_node.attrs, list):
                 subgraph_node.attrs.extend(attrs)
-    @staticmethod
-    def parse_graph_attributes(text: str, graph_node: GraphNode) -> None:
-        attr_pattern = re.compile(r'# Attrs:\s*(.*)', re.DOTALL)
-        match = attr_pattern.search(text, graph_node.pos)
-        if match:
-            attrs = match.group(1).strip().split('\n')
-            for attr in attrs:
-                if not attr:
-                    break
-                key, value = attr.split(':')
-                if isinstance(graph_node.attrs, dict):
-                    graph_node.attrs[key.strip()] = value.strip()
     @staticmethod
     def parse_code_info(text: str, start_pos: int, end_pos: int) -> List[str]:
         code_info = []
@@ -124,8 +111,9 @@ class Parser:
             scope_match = scope_pattern.search(text, end_pos)
             scope = scope_match.group(1) if scope_match else ""
-            id_pattern = re.compile(r'.*cnode_primal_attrs:'
-                                    r'\s*\{.*\b(?:forward_unique_id|unique_id):\s*\"(\d+)\".*', re.IGNORECASE)
+            id_pattern = re.compile(
+                r'cnode_primal_attrs:'r'\s*\{[\w+]{1, 10000}\b(?:forward_unique_id|unique_id):\s*\"(\d+)\"',
+                re.IGNORECASE)
             unique_id_match = id_pattern.search(text, end_pos, scope_match.start())
             unique_id = unique_id_match.group(1) if unique_id_match else None
@@ -186,7 +174,7 @@ class Parser:
                     node_info.var_inputs.append(callee_name)
     def parse_subgraphs(self, text: str) -> None:
-        subgraph_pattern = re.compile(r'subgraph\s+@(\S+)(\([^\)]*\))?\s+.*\{')
+        subgraph_pattern = re.compile(r'/subgraph\s+@([\w+]{1,1000)(\([^\)]{1,100}\))?\s+\S[^\{]\{/+')
         matches = list(subgraph_pattern.finditer(text))
         end_pos = 0
         for match in matches:
@@ -203,11 +191,6 @@ class Parser:
             subgraph_info.end = end_pos
             logging.info('Parsed subgraph: %s', subgraph_name)
-    def count_nodes(self) -> Tuple[int, int]:
-        total_nodes = len(self.nodes)
-        total_cnodes = sum(1 for node in self.nodes.values() if node.name.startswith('CNode'))
-        return total_nodes, total_cnodes
     def create_backward_map(self):
         for node in self.nodes.values():
             if node.scope and node.scope.startswith("Gradients"):

msprobe/mindspore/common/const.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import numpy as np
 import mindspore as ms
+from mindspore import dtype as mstype
 from msprobe.core.common.const import Const as CoreConst
@@ -23,14 +24,20 @@ class Const:
     CELL = "cell"
     API = "api"
     KERNEL = "kernel"
+    CELL_AND_API = 'cell_and_api'
     TOOL_LEVEL_DICT = {
         CoreConst.LEVEL_L0: CELL,
         CoreConst.LEVEL_L1: API,
-        CoreConst.LEVEL_L2: KERNEL
+        CoreConst.LEVEL_L2: KERNEL,
+        CoreConst.LEVEL_MIX: CELL_AND_API
     }
-    PYNATIVE_MODE = "pynative"
+    PYNATIVE_MODE = CoreConst.PYNATIVE_MODE
+    GRAPH_MODE = "graph"
     GRAPH_GE_MODE = "graph_ge"
     GRAPH_KBYK_MODE = "graph_kbyk"
+    PYNATIVE_GRAPH_MODE = CoreConst.PYNATIVE_GRAPH_MODE
     JIT_LEVEL = "jit_level"
     JIT_LEVEL_O0 = "O0"
     JIT_LEVEL_O1 = "O1"
@@ -61,6 +68,7 @@ class Const:
     DROPOUT_API_NAME_PREFIX = "dropout"
     GRAPH_DATA_MODE_LIST = [CoreConst.ALL, CoreConst.INPUT, CoreConst.OUTPUT]
+    GRAPH_CELL_DUMP_DATA_MODE_LIST = [CoreConst.ALL, CoreConst.FORWARD, CoreConst.BACKWARD]
     HOOK_MS_PREFIX_DICT = {
         OPS_DATA_PREFIX: OPS_PREFIX,
@@ -69,6 +77,13 @@ class Const:
         MINT_NN_FUNC_DATA_PREFIX: MINT_NN_FUNC_PREFIX
     }
+    NonDifferentiableType = (
+        mstype.bool_, mstype.int8, mstype.byte, mstype.uint8, mstype.ubyte,
+        mstype.int16, mstype.short, mstype.uint16, mstype.ushort,
+        mstype.int32, mstype.intc, mstype.uint32, mstype.uintc,
+        mstype.int64, mstype.intp, mstype.uint64, mstype.uintp
+    )
 class MsCompareConst:
     # api_info field
@@ -88,14 +103,11 @@ class MsCompareConst:
     MINDTORCH_NPU = "NPU"
     MINDTORCH_DIST = "Distributed"
     MT_VALID_API_TYPES = [
         MINDTORCH, MINDTORCH_FUNC, MINDTORCH_TENSOR
     ]
     SUPPORTED_FUSION_LIST = ["flash_attention_score"]
     TASK_FIELD = "task"
     STATISTICS_TASK = "statistics"
     FRAMEWORK = "framework"
@@ -129,8 +141,6 @@ class MsCompareConst:
         EXCEPTION_SKIP = "exception_skip"
 class FreeBenchmarkConst:
     ADD_NOISE = "add_noise"
     BIT_NOISE = "bit_noise"

msprobe/mindspore/common/utils.py CHANGED Viewed

@@ -13,19 +13,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import inspect
 import os
 import random
+import types
 import mindspore as ms
 from mindspore import ops
+from mindspore.common.jit_config import JitConfig
 from mindspore.mint import nn
+from msprobe.core.common.const import Const
+from msprobe.core.common.decorator import recursion_depth_decorator
 from msprobe.core.common.exceptions import DistributedNotInitializedError
 from msprobe.core.common.file_utils import path_len_exceeds_limit, check_path_exists, save_npy
 from msprobe.core.common.log import logger
-from msprobe.core.common.const import Const
 from msprobe.core.common.utils import CompareException, check_seed_all, is_save_variable_valid
+from msprobe.mindspore.common.const import Const as MsConst
+try:
+    from mindspore._c_expression import _set_init_iter
+except ImportError:
+    enable_dynamic_kbyk_dump = False
+else:
+    enable_dynamic_kbyk_dump = True
+mindtorch_check_result = None
+register_backward_hook_functions = {}
+kwargs_exist_in_forward_hook = None
 class MsprobeStep(ms.train.Callback):
@@ -33,6 +48,11 @@ class MsprobeStep(ms.train.Callback):
         super(MsprobeStep, self).__init__()
         self.debugger = debugger
+    def on_train_begin(self, run_context):
+        self.debugger.start()
+        if enable_dynamic_kbyk_dump:
+            _set_init_iter(0)
     def on_train_step_begin(self, run_context):
         self.debugger.start()
@@ -82,8 +102,8 @@ def convert_to_int(value):
 def clean_input_kwargs(cell):
-    if hasattr(cell, 'input_kwargs'):
-        del cell.input_kwargs
+    if hasattr(cell, 'msprobe_input_kwargs'):
+        del cell.msprobe_input_kwargs
 def list_lowest_level_directories(root_dir):
@@ -152,9 +172,6 @@ def remove_dropout():
     nn.functional.dropout = dropout_ext
-mindtorch_check_result = None
 def is_mindtorch():
     global mindtorch_check_result
     if mindtorch_check_result is None:
@@ -169,11 +186,11 @@ def is_mindtorch():
     return mindtorch_check_result
-register_backward_hook_functions = {}
 def set_register_backward_hook_functions():
     global register_backward_hook_functions
+    if register_backward_hook_functions:
+        return
     if is_mindtorch():
         import torch
         from msprobe.mindspore.mindtorch import (_call_impl,
@@ -192,7 +209,7 @@ def set_register_backward_hook_functions():
 def check_save_param(variable, name, save_backward):
     # try catch this api to skip invalid call
-    valid_data_types = tuple([ms.Tensor, int, float, str])
+    valid_data_types = (ms.Tensor, int, float, str)
     if not is_save_variable_valid(variable, valid_data_types):
         valid_data_types_with_nested_types = valid_data_types + (dict, tuple, list)
         logger.warning("PrecisionDebugger.save variable type not valid, "
@@ -209,3 +226,103 @@ def check_save_param(variable, name, save_backward):
                        "should be bool. "
                        "Skip current save process.")
         raise ValueError
+def is_graph_mode_cell_dump_allowed(config):
+    if config.task not in [Const.TENSOR, Const.STATISTICS] or is_mindtorch() or not hasattr(ops, 'DumpGradient'):
+        return False
+    valid_mix_level = [MsConst.CELL_AND_API, Const.LEVEL_MIX]
+    if config.level in valid_mix_level and config.execution_mode == MsConst.PYNATIVE_MODE:
+        return True
+    return config.level == MsConst.CELL or config.level == Const.LEVEL_L0
+@recursion_depth_decorator('msprobe.mindspore.common.utils.is_decorated_by_jit')
+def is_decorated_by_jit(func):
+    closure = getattr(func, '__closure__', [])
+    if closure:
+        for obj in closure:
+            if isinstance(obj.cell_contents, JitConfig):
+                return True
+            elif isinstance(obj.cell_contents, types.FunctionType) and hasattr(obj.cell_contents, '__closure__'):
+                if is_decorated_by_jit(obj.cell_contents):
+                    return True
+    return False
+@recursion_depth_decorator('msprobe.mindspore.common.utils.get_cells_and_names')
+def get_cells_and_names(model, cells_set=None, name_prefix=''):
+    cells_set = cells_set if cells_set else set()
+    if model in cells_set:
+        return
+    cells_set.add(model)
+    jit_decorated = is_decorated_by_jit(model.construct)
+    yield name_prefix, model, jit_decorated
+    if jit_decorated:
+        return
+    children_cells = getattr(model, '_cells')
+    for name, cell in children_cells.items():
+        if cell:
+            cells_name_prefix = f'{name_prefix}{Const.SEP}{name}' if name_prefix else name
+            jit_decorated = is_decorated_by_jit(model.construct)
+            if jit_decorated:
+                yield cells_name_prefix, cell, jit_decorated
+            else:
+                for ele in get_cells_and_names(cell, cells_set, cells_name_prefix):
+                    yield ele
+def get_cells_and_names_with_index(models):
+    cells_with_index_in_pynative_mode = {}
+    cells_with_index_in_graph_mode = {}
+    def distinguish_cells(cells):
+        cells_in_pynative_mode = []
+        cells_in_graph_mode = []
+        for name, cell, jit_decorated in cells:
+            if jit_decorated:
+                cells_in_graph_mode.append((name, cell))
+            else:
+                cells_in_pynative_mode.append((name, cell))
+        return cells_in_pynative_mode, cells_in_graph_mode
+    if is_mindtorch():
+        if isinstance(models, (list, tuple)):
+            for index, model in enumerate(models):
+                cells_with_index_in_pynative_mode[str(index)] = model.named_modules()
+        else:
+            cells_with_index_in_pynative_mode["-1"] = models.named_modules()
+    else:
+        if isinstance(models, (list, tuple)):
+            for index, model in enumerate(models):
+                cells = get_cells_and_names(model)
+                cells_in_pynative_mode, cells_in_graph_mode = distinguish_cells(cells)
+                cells_with_index_in_pynative_mode[str(index)] = cells_in_pynative_mode
+                cells_with_index_in_graph_mode[str(index)] = cells_in_graph_mode
+        else:
+            cells = get_cells_and_names(models)
+            cells_in_pynative_mode, cells_in_graph_mode = distinguish_cells(cells)
+            cells_with_index_in_pynative_mode["-1"] = cells_in_pynative_mode
+            cells_with_index_in_graph_mode["-1"] = cells_in_graph_mode
+    return cells_with_index_in_pynative_mode, cells_with_index_in_graph_mode
+def has_kwargs_in_forward_hook():
+    global kwargs_exist_in_forward_hook
+    if kwargs_exist_in_forward_hook is None:
+        if is_mindtorch():
+            kwargs_exist_in_forward_hook = True
+            return kwargs_exist_in_forward_hook
+        try:
+            func_params = inspect.signature(nn.Cell.register_forward_hook).parameters
+            kwargs_exist_in_forward_hook = 'with_kwargs' in func_params
+        except Exception:
+            kwargs_exist_in_forward_hook = False
+        return kwargs_exist_in_forward_hook
+    return kwargs_exist_in_forward_hook

mindstudio-probe 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

mindstudio-probe 1.3.0py3-none-any.whl → 8.1.1py3-none-any.whl