PyPI - mindstudio-probe - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/METADATA +3 -3
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/RECORD +168 -150
msprobe/README.md +27 -22
msprobe/core/common/const.py +129 -60
msprobe/core/common/decorator.py +50 -0
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +25 -2
msprobe/core/common/inplace_ops.yaml +1 -0
msprobe/core/common/utils.py +43 -33
msprobe/core/compare/acc_compare.py +43 -74
msprobe/core/compare/check.py +2 -6
msprobe/core/compare/highlight.py +2 -0
msprobe/core/compare/layer_mapping/data_scope_parser.py +1 -1
msprobe/core/compare/layer_mapping/layer_mapping.py +2 -1
msprobe/core/compare/merge_result/merge_result.py +16 -9
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/multiprocessing_compute.py +19 -12
msprobe/core/compare/npy_compare.py +30 -12
msprobe/core/compare/utils.py +30 -10
msprobe/core/data_dump/api_registry.py +176 -0
msprobe/core/data_dump/data_collector.py +58 -13
msprobe/core/data_dump/data_processor/base.py +94 -10
msprobe/core/data_dump/data_processor/factory.py +3 -0
msprobe/core/data_dump/data_processor/mindspore_processor.py +33 -33
msprobe/core/data_dump/data_processor/pytorch_processor.py +99 -18
msprobe/core/data_dump/json_writer.py +61 -40
msprobe/core/grad_probe/constant.py +1 -0
msprobe/core/grad_probe/grad_compare.py +1 -1
msprobe/core/overflow_check/abnormal_scene.py +2 -0
msprobe/docs/01.installation.md +27 -1
msprobe/docs/02.config_introduction.md +27 -23
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +103 -16
msprobe/docs/06.data_dump_MindSpore.md +76 -32
msprobe/docs/07.accuracy_checker_PyTorch.md +11 -1
msprobe/docs/08.accuracy_checker_online_PyTorch.md +3 -1
msprobe/docs/09.accuracy_checker_MindSpore.md +5 -3
msprobe/docs/10.accuracy_compare_PyTorch.md +59 -33
msprobe/docs/11.accuracy_compare_MindSpore.md +40 -16
msprobe/docs/12.overflow_check_PyTorch.md +3 -1
msprobe/docs/13.overflow_check_MindSpore.md +4 -2
msprobe/docs/14.data_parse_PyTorch.md +1 -7
msprobe/docs/18.online_dispatch.md +1 -1
msprobe/docs/19.monitor.md +332 -273
msprobe/docs/21.visualization_PyTorch.md +42 -13
msprobe/docs/22.visualization_MindSpore.md +43 -13
msprobe/docs/23.generate_operator_PyTorch.md +9 -9
msprobe/docs/27.dump_json_instruction.md +301 -27
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/29.data_dump_MSAdapter.md +229 -0
msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
msprobe/docs/FAQ.md +3 -11
msprobe/docs/img/compare_result.png +0 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_match_info.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/mindspore/__init__.py +4 -2
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +32 -7
msprobe/mindspore/api_accuracy_checker/api_runner.py +70 -22
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +602 -0
msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +47 -1
msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -1
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +2 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +130 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/common/const.py +61 -0
msprobe/mindspore/common/utils.py +48 -18
msprobe/mindspore/compare/ms_compare.py +27 -19
msprobe/mindspore/compare/ms_graph_compare.py +6 -5
msprobe/mindspore/debugger/debugger_config.py +31 -6
msprobe/mindspore/debugger/precision_debugger.py +45 -14
msprobe/mindspore/dump/dump_tool_factory.py +5 -3
msprobe/mindspore/dump/hook_cell/api_register.py +142 -0
msprobe/mindspore/dump/hook_cell/hook_cell.py +9 -10
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +24 -26
msprobe/mindspore/dump/jit_dump.py +21 -15
msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +22 -56
msprobe/mindspore/dym_loader/hook_dynamic_loader.h +0 -1
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +10 -6
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
msprobe/mindspore/grad_probe/global_context.py +2 -0
msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
msprobe/mindspore/grad_probe/hook.py +2 -4
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +873 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +309 -0
msprobe/mindspore/ms_config.py +8 -2
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
msprobe/mindspore/service.py +114 -34
msprobe/pytorch/__init__.py +0 -1
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +12 -7
msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +2 -2
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +4 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +5 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +25 -6
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -19
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/{parse.py → bench_functions/mish.py} +6 -4
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +50 -0
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/utils.py +97 -4
msprobe/pytorch/debugger/debugger_config.py +19 -9
msprobe/pytorch/debugger/precision_debugger.py +24 -1
msprobe/pytorch/dump/module_dump/module_dump.py +4 -3
msprobe/pytorch/dump/module_dump/module_processer.py +21 -35
msprobe/pytorch/free_benchmark/common/utils.py +1 -1
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
msprobe/pytorch/function_factory.py +8 -2
msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
msprobe/pytorch/hook_module/api_register.py +131 -0
msprobe/pytorch/hook_module/hook_module.py +19 -14
msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
msprobe/pytorch/hook_module/support_wrap_ops.yaml +173 -75
msprobe/pytorch/monitor/anomaly_detect.py +14 -29
msprobe/pytorch/monitor/csv2tb.py +18 -14
msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
msprobe/pytorch/monitor/module_hook.py +238 -193
msprobe/pytorch/monitor/module_metric.py +9 -6
msprobe/pytorch/monitor/optimizer_collect.py +100 -67
msprobe/pytorch/monitor/unittest/test_monitor.py +1 -1
msprobe/pytorch/monitor/utils.py +76 -44
msprobe/pytorch/online_dispatch/compare.py +0 -2
msprobe/pytorch/online_dispatch/dispatch.py +9 -0
msprobe/pytorch/online_dispatch/dump_compare.py +3 -0
msprobe/pytorch/online_dispatch/utils.py +3 -0
msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
msprobe/pytorch/parse_tool/lib/utils.py +2 -1
msprobe/pytorch/pt_config.py +30 -29
msprobe/pytorch/service.py +114 -32
msprobe/visualization/builder/graph_builder.py +75 -10
msprobe/visualization/builder/msprobe_adapter.py +7 -6
msprobe/visualization/compare/graph_comparator.py +42 -38
msprobe/visualization/compare/mode_adapter.py +0 -19
msprobe/visualization/graph/base_node.py +11 -3
msprobe/visualization/graph/distributed_analyzer.py +71 -3
msprobe/visualization/graph/graph.py +0 -11
msprobe/visualization/graph/node_op.py +4 -3
msprobe/visualization/graph_service.py +4 -5
msprobe/visualization/utils.py +12 -35
msprobe/mindspore/dump/hook_cell/api_registry.py +0 -205
msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
msprobe/pytorch/hook_module/api_registry.py +0 -166
msprobe/pytorch/hook_module/wrap_distributed.py +0 -75
msprobe/pytorch/hook_module/wrap_functional.py +0 -66
msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
msprobe/pytorch/hook_module/wrap_torch.py +0 -84
msprobe/pytorch/hook_module/wrap_vf.py +0 -60
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.2.1.dist-info → mindstudio_probe-1.3.0.dist-info}/top_level.txt +0 -0

msprobe/core/compare/utils.py CHANGED Viewed

@@ -170,6 +170,16 @@ def gen_op_item(op_data, op_name):
         elif op_item.get('type') == 'slice':
             op_item['dtype'] = op_data.get('type')
             op_item['shape'] = str(np.shape(np.array(op_data.get('value'))))
+        elif op_item.get('type') == 'ellipsis':
+            op_item['dtype'] = op_data.get('type')
+            op_item['shape'] = '[]'
+            for i in params:
+                op_item[i] = op_data.get('value')
+        elif op_item.get('type') == 'torch.ProcessGroup':
+            op_item['dtype'] = op_data.get('type')
+            op_item['shape'] = '[]'
+            for i in params:
+                op_item[i] = str(op_data.get('group_ranks'))
         else:
             op_item['dtype'] = str(type(op_data.get('value')))
             op_item['shape'] = '[]'
@@ -275,9 +285,9 @@ def result_item_init(n_info, b_info, dump_mode):
             md5_compare_result = CompareConst.PASS if n_info.struct[2] == b_info.struct[2] else CompareConst.DIFF
             result_item.extend([n_info.struct[2], b_info.struct[2], md5_compare_result])
         elif dump_mode == Const.SUMMARY:
-            result_item.extend([" "] * 8)
+            result_item.extend([" "] * 8)  # 8个统计量数据情况的比对指标
         else:
-            result_item.extend([" "] * 5)
+            result_item.extend([" "] * 6)  # 6个真实数据情况的比对指标
     else:
         err_msg = "index out of bounds error will occur in result_item_init, please check!\n" \
                   f"npu_info_struct is {n_info.struct}\n" \
@@ -311,8 +321,8 @@ def get_accuracy(result, n_dict, b_dict, dump_mode):
         has_stack = npu_stack_info and bench_stack_info
         if dump_mode == Const.ALL:
-            npu_data_name = n_dict.get("data_name", None)
-            bench_data_name = b_dict.get("data_name", None)
+            npu_data_name_list = n_dict.get("data_name", None)
+            bench_data_name_list = b_dict.get("data_name", None)
         for index in range(min_len):
             n_name = safe_get_value(n_dict, n_start + index, "n_dict", key="op_name")
@@ -343,7 +353,9 @@ def get_accuracy(result, n_dict, b_dict, dump_mode):
             result_item.append(err_msg)
             result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info)
             if dump_mode == Const.ALL:
-                result_item.append(safe_get_value(npu_data_name, n_start + index, "npu_data_name"))
+                npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list")
+                bench_data_name = safe_get_value(bench_data_name_list, b_start + index, "bench_data_name_list")
+                result_item.append([npu_data_name, bench_data_name])
             result.append(result_item)
@@ -361,7 +373,7 @@ def get_accuracy(result, n_dict, b_dict, dump_mode):
                         continue
                     result_item = [
                         n_name, CompareConst.NAN, n_struct[0], CompareConst.NAN, n_struct[1], CompareConst.NAN,
-                        " ", " ", " ", " ", " "
+                        " ", " ", " ", " ", " ", " "
                     ]
                     summary_data = n_dict.get(CompareConst.SUMMARY)[n_start + index]
                     result_item.extend(summary_data)
@@ -378,7 +390,8 @@ def get_accuracy(result, n_dict, b_dict, dump_mode):
                 result_item.append(err_msg)
                 result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info)
                 if dump_mode == Const.ALL:
-                    result_item.append(safe_get_value(npu_data_name, n_start + index, "npu_data_name"))
+                    npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list")
+                    result_item.append([npu_data_name, "-1"])
                 result.append(result_item)
@@ -443,9 +456,9 @@ def get_un_match_accuracy(result, n_dict, dump_mode):
             result.append(result_item)
             continue
         if dump_mode == Const.SUMMARY:
-            result_item.extend([CompareConst.N_A] * 8)
+            result_item.extend([CompareConst.N_A] * 8)  # 8个统计量数据情况的比对指标
         if dump_mode == Const.ALL:
-            result_item.extend([CompareConst.N_A] * 5)
+            result_item.extend([CompareConst.N_A] * 6)  # 6个真实数据情况的比对指标
         npu_summary_data = safe_get_value(summary_reorder, index, "summary_reorder")
         bench_summary_data = [CompareConst.N_A] * 4
@@ -457,7 +470,7 @@ def get_un_match_accuracy(result, n_dict, dump_mode):
         result_item.append(err_msg)
         append_stack_info(result_item, npu_stack_info, index)
         if dump_mode == Const.ALL and result_item[1] == CompareConst.N_A:
-            result_item.extend(["-1"])
+            result_item.extend([["-1", "-1"]])
         result.append(result_item)
@@ -532,10 +545,17 @@ def get_name_and_state(name):
     state type: input, output, kwargs, parameters, parameters_grad
     """
+    if not isinstance(name, str):
+        logger.error(f'Invalid name: {name}, type should be string, please check.')
+        raise CompareException(CompareException.INVALID_API_NAME_ERROR)
     if Const.PARAMS_GRAD in name.split(Const.SEP):
         return name.split(Const.PARAMS_GRAD)[0], Const.PARAMS_GRAD
     split = re.split(Const.REGEX_FORWARD_BACKWARD, name)
+    if len(split) < 3:
+        logger.error(f'Invalid name string: {name}, can not be split by forward/backward, please check.')
+        raise CompareException(CompareException.INVALID_API_NAME_ERROR)
     api = f'{split[0]}.{split[1]}.'
     state_str = split[2]
     match = re.match(r'^(\d+\.)?(input|output|kwargs|parameters)\..+$', state_str)

msprobe/core/data_dump/api_registry.py ADDED Viewed

@@ -0,0 +1,176 @@
+# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Any, Optional, Callable, Union, List, Tuple
+from msprobe.core.common.const import Const
+from msprobe.core.common.file_utils import load_yaml
+def _get_attr(module, attr_name):
+    if Const.SEP in attr_name:
+        sub_module_name, sub_attr = attr_name.rsplit(Const.SEP, 1)
+        sub_module = getattr(module, sub_module_name, None)
+        attr = getattr(sub_module, sub_attr, None)
+    else:
+        attr = getattr(module, attr_name, None)
+    return attr
+class ApiWrapper:
+    def __init__(
+        self, api_types: Dict[str, Dict[str, Any]],
+        api_list_paths: Union[str, List[str], Tuple[str]]
+    ):
+        self.api_types = api_types
+        if not isinstance(api_list_paths, (list, tuple)):
+            api_list_paths = [api_list_paths] * len(self.api_types)
+        elif len(api_list_paths) != len(self.api_types):
+            raise RuntimeError("The number of api_list_paths must be equal to the number of frameworks in 'api_types', "
+                               "when api_list_paths is a list or tuple.")
+        self.api_list_paths = api_list_paths
+        self.api_names = self._get_api_names()
+        self.wrapped_api_functions = dict()
+    def wrap_api(
+        self, api_templates, hook_build_func: Optional[Callable]
+    ):
+        api_types_num = sum([len(v) for v in self.api_types.values()])
+        if not isinstance(api_templates, (list, tuple)):
+            api_templates = [api_templates] * api_types_num
+        elif len(api_templates) != api_types_num:
+            raise RuntimeError("The number of api_templates must be equal to the number of api_types, "
+                               "when api_templates is a list or tuple.")
+        self.wrapped_api_functions.clear()
+        index = 0
+        for framework, api_types in self.api_types.items():
+            wrapped_functions_in_framework = dict()
+            for api_type, api_modules in api_types.items():
+                wrapped_functions = dict()
+                name_prefix = Const.API_DATA_PREFIX.get(framework, {}).get(api_type, "API")
+                api_template = api_templates[index]
+                index += 1
+                for api_name in self.api_names.get(framework, {}).get(api_type, []):
+                    ori_api = _get_attr(api_modules[0], api_name)
+                    if callable(ori_api):
+                        def wrap_api_func(api_name, api_func, prefix, hook_build_func, api_template):
+                            def api_function(*args, **kwargs):
+                                return api_template(api_name, api_func, prefix, hook_build_func)(*args, **kwargs)
+                            api_function.__name__ = api_name
+                            return api_function
+                        wrapped_functions[api_name] = wrap_api_func(api_name, ori_api, name_prefix,
+                                                                    hook_build_func, api_template)
+                wrapped_functions_in_framework[api_type] = wrapped_functions
+            self.wrapped_api_functions[framework] = wrapped_functions_in_framework
+        return self.wrapped_api_functions
+    def _get_api_names(self):
+        api_names = dict()
+        for index, framework in enumerate(self.api_types.keys()):
+            api_list = load_yaml(self.api_list_paths[index])
+            valid_names = dict()
+            for api_type, api_modules in self.api_types.get(framework, {}).items():
+                api_from_file = api_list.get(Const.SUPPORT_API_DICT_KEY_MAP.get(framework, {}).get(api_type), [])
+                names = set()
+                for api_name in api_from_file:
+                    target_attr = api_name
+                    target_module = api_modules[0]
+                    if Const.SEP in api_name:
+                        sub_module_name, target_attr = api_name.rsplit(Const.SEP, 1)
+                        target_module = getattr(api_modules[0], sub_module_name, None)
+                    if target_module and target_attr in dir(target_module):
+                        names.add(api_name)
+                valid_names[api_type] = names
+            api_names[framework] = valid_names
+        return api_names
+class ApiRegistry:
+    """
+    Base class for api registry.
+    """
+    def __init__(self, api_types, inner_used_api, supported_api_list_path, api_templates):
+        self.ori_api_attr = dict()
+        self.wrapped_api_attr = dict()
+        self.inner_used_ori_attr = dict()
+        self.inner_used_wrapped_attr = dict()
+        self.api_types = api_types
+        self.inner_used_api = inner_used_api
+        self.supported_api_list_path = supported_api_list_path
+        self.api_templates = api_templates
+    @staticmethod
+    def store_ori_attr(ori_api_group, api_list, api_ori_attr):
+        for api in api_list:
+            api_ori_attr[api] = _get_attr(ori_api_group, api)
+    @staticmethod
+    def set_api_attr(api_group, attr_dict):
+        for api, api_attr in attr_dict.items():
+            if Const.SEP in api:
+                sub_module_name, sub_op = api.rsplit(Const.SEP, 1)
+                sub_module = getattr(api_group, sub_module_name, None)
+                if sub_module is not None:
+                    setattr(sub_module, sub_op, api_attr)
+            else:
+                setattr(api_group, api, api_attr)
+    def register_all_api(self):
+        for framework, api_types in self.api_types.items():
+            for api_type, api_modules in api_types.items():
+                api_type_with_framework = framework + Const.SEP + api_type
+                for module in api_modules[1]:
+                    self.set_api_attr(module, self.wrapped_api_attr.get(api_type_with_framework, {}))
+    def register_inner_used_api(self):
+        for api_type in self.inner_used_api.keys():
+            self.set_api_attr(self.inner_used_api.get(api_type)[0], self.inner_used_wrapped_attr.get(api_type, {}))
+    def restore_all_api(self):
+        for framework, api_types in self.api_types.items():
+            for api_type, api_modules in api_types.items():
+                api_type_with_framework = framework + Const.SEP + api_type
+                for module in api_modules[1]:
+                    self.set_api_attr(module, self.ori_api_attr.get(api_type_with_framework, {}))
+    def restore_inner_used_api(self):
+        for api_type in self.inner_used_api.keys():
+            self.set_api_attr(self.inner_used_api.get(api_type)[0], self.inner_used_ori_attr.get(api_type, {}))
+    def initialize_hook(self, hook_build_func):
+        api_wrapper = ApiWrapper(self.api_types, self.supported_api_list_path)
+        wrapped_api_functions = api_wrapper.wrap_api(self.api_templates, hook_build_func)
+        for framework, api_types in self.api_types.items():
+            for api_type, api_modules in api_types.items():
+                ori_attr = dict()
+                self.store_ori_attr(api_modules[0], api_wrapper.api_names.get(framework).get(api_type), ori_attr)
+                api_type_with_framework = framework + Const.SEP + api_type
+                self.ori_api_attr[api_type_with_framework] = ori_attr
+                self.wrapped_api_attr[api_type_with_framework] = wrapped_api_functions.get(framework).get(api_type)
+        for inner_used_api_type, inner_used_api_list in self.inner_used_api.items():
+            ori_attr = dict()
+            wrapped_attr = dict()
+            for api_name in inner_used_api_list[1:]:
+                if self.ori_api_attr.get(inner_used_api_type, {}).get(api_name):
+                    ori_attr[api_name] = self.ori_api_attr.get(inner_used_api_type).get(api_name)
+                    wrapped_attr[api_name] = self.wrapped_api_attr.get(inner_used_api_type).get(api_name)
+            self.inner_used_ori_attr[inner_used_api_type] = ori_attr
+            self.inner_used_wrapped_attr[inner_used_api_type] = wrapped_attr

msprobe/core/data_dump/data_collector.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -40,6 +40,7 @@ class DataCollector:
         self.scope = ScopeFactory(self.config).build_scope()
         self.backward_module_names = {}
         self.optimizer_status = ""
+        self.optimizer_status_first_start = {Const.OPTIMIZER: True, Const.CLIP_GRAD: True}
         atexit.register(self.write_json)
     @property
@@ -54,6 +55,17 @@ class DataCollector:
     def check_scope_and_pid(scope, name, pid):
         return (not scope or scope.check(name)) and pid == os.getpid()
+    @staticmethod
+    def set_is_recomputable(data_info, is_recompute):
+        if data_info and len(data_info) == 1 and is_recompute is not None: # 正常情况下data_info的长度应改为1
+            data_info[list(data_info.keys())[0]]["is_recompute"] = is_recompute
+    def reset_status(self):
+        self.optimizer_status = ""
+        self.optimizer_status_first_start = {Const.OPTIMIZER: True, Const.CLIP_GRAD: True}
+        self.data_writer.reset_cache()
+        self.backward_module_names.clear()
     def if_return_forward_new_output(self):
         return self.data_processor.if_return_forward_new_output()
@@ -77,7 +89,7 @@ class DataCollector:
         logger.debug(msg)
         self.data_writer.update_data(data_info)
-    def forward_input_data_collect(self, name, module, pid, module_input_output):
+    def forward_input_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         if self.config.task == Const.FREE_BENCHMARK:
             backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
             if self.check_scope_and_pid(self.scope, backward_name, pid):
@@ -87,37 +99,48 @@ class DataCollector:
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_forward_input(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_forward_input(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         if self.config.level == Const.LEVEL_L2:
             return
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def forward_output_data_collect(self, name, module, pid, module_input_output):
+    def forward_output_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_forward_output(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_forward_output(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         if self.config.level == Const.LEVEL_L2:
             return
         self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def forward_data_collect(self, name, module, pid, module_input_output):
+    def forward_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_forward(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_forward(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def backward_data_collect(self, name, module, pid, module_input_output):
+    def backward_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_backward(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_backward(name, module, module_input_output)
         if self.config.level == Const.LEVEL_L2:
             return
         # 获取执行反向的模块名称
@@ -127,25 +150,34 @@ class DataCollector:
             self.backward_module_names[module_name] = True
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def backward_input_data_collect(self, name, module, pid, module_input_output):
+    def backward_input_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_backward_input(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_backward_input(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         self.handle_data(name, data_info)
-    def backward_output_data_collect(self, name, module, pid, module_input_output):
+    def backward_output_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_backward_output(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_backward_output(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         self.handle_data(name, data_info)
     def update_construct(self, name):
         if self.config.level not in DataCollector.level_without_construct:
             if self.optimizer_status in [Const.OPTIMIZER, Const.CLIP_GRAD]:
+                if self.optimizer_status_first_start[self.optimizer_status]:
+                    self.data_writer.update_construct({self.optimizer_status: None})
+                    self.optimizer_status_first_start[self.optimizer_status] = False
                 self.data_writer.update_construct({name: self.optimizer_status})
             else:
                 self.data_writer.update_construct({name: self.module_processor.api_parent_node})
@@ -183,3 +215,16 @@ class DataCollector:
     def fill_stack_tensor_data(self):
         self.data_writer.fill_stack_tensor_data()
+    def debug_data_collect_forward(self, variable, name_with_count):
+        data_info = self.data_processor.analyze_debug_forward(variable, name_with_count)
+        self.data_writer.update_debug({name_with_count: data_info})
+    def debug_data_collect_backward(self, variable, grad_name_with_count):
+        # prepare all None nested data structure
+        all_none_data_info = self.data_processor.analyze_element_to_all_none(variable)
+        self.data_writer.update_debug({grad_name_with_count: all_none_data_info})
+        # register tensor backward hook
+        self.data_processor.analyze_debug_backward(variable, grad_name_with_count, self.data_writer.cache_debug['data'])

msprobe/core/data_dump/data_processor/base.py CHANGED Viewed

@@ -17,6 +17,9 @@ import inspect
 import os
 from dataclasses import dataclass, is_dataclass
 from typing import Tuple, Dict, Optional, Any
+from functools import partial
+import copy
+from typing import Union
 import numpy as np
@@ -87,7 +90,7 @@ class TensorStatInfo:
 class BaseDataProcessor:
     _recursive_key_stack = []
     special_type = (
-        np.integer, np.floating, np.bool_, np.complexfloating, np.str_, np.byte, np.unicode_,
+        np.integer, np.floating, np.bool_, np.complexfloating, np.str_, np.byte, np.unicode_, np.ndarray,
         bool, int, float, str, slice,
         type(Ellipsis)
     )
@@ -143,6 +146,37 @@ class BaseDataProcessor:
         else:
             return data
+    @staticmethod
+    def set_value_into_nested_structure(data_structure, indexes, value):
+        '''
+        Args:
+            data_structure: nested data structure
+            indexes: List
+            value: value to be set
+        '''
+        if not indexes:
+            raise ValueError("set_value_into_nested_structure failed: "
+                             "indexes need to be non empty when set value to nested data structure")
+        current_level = data_structure
+        for i, index in enumerate(indexes):
+            valid_for_list = isinstance(current_level, list) and isinstance(index, int) and len(current_level) > index
+            valid_for_dict = isinstance(current_level, dict) and index in current_level
+            is_last = i == len(indexes) - 1
+            if valid_for_dict or valid_for_list:
+                if is_last:
+                    try:
+                        current_level[index] = value
+                    except Exception as e:
+                        raise IndexError("set_value_into_nested_structure failed: passed indexes wrong") from e
+                else:
+                    try:
+                        current_level = current_level[index]
+                    except Exception as e:
+                        raise IndexError("set_value_into_nested_structure failed: passed indexes wrong") from e
+            else:
+                raise ValueError("set_value_into_nested_structure failed: "
+                                 "invalid data_structure type or invalid index")
     @staticmethod
     def _convert_numpy_to_builtin(arg):
         type_mapping = {
@@ -183,8 +217,22 @@ class BaseDataProcessor:
         return single_arg
     @staticmethod
-    def _analyze_numpy(value, numpy_type):
-        return {"type": numpy_type, "value": value}
+    def _analyze_numpy(ndarray, numpy_type):
+        ndarray_json = {}
+        ndarray_json.update({'type': 'numpy.ndarray'})
+        ndarray_json.update({'dtype': str(ndarray.dtype)})
+        ndarray_json.update({'shape': ndarray.shape})
+        if ndarray.size > 0:
+            ndarray_json.update({"Max": np.max(ndarray).item()})
+            ndarray_json.update({"Min": np.min(ndarray).item()})
+            ndarray_json.update({"Mean": np.mean(ndarray).item()})
+            ndarray_json.update({"Norm": np.linalg.norm(ndarray).item()})
+        else:
+            ndarray_json.update({"Max": None})
+            ndarray_json.update({"Min": None})
+            ndarray_json.update({"Mean": None})
+            ndarray_json.update({"Norm": None})
+        return ndarray_json
     @staticmethod
     def _get_allowed_data_mode(data_mode):
@@ -203,9 +251,9 @@ class BaseDataProcessor:
         return cls.special_type
     @classmethod
-    def recursive_apply_transform(cls, args, transform, depth=0):
-        if depth > Const.MAX_DEPTH:
-            logger.error(f"The maximum depth of recursive transform, {Const.MAX_DEPTH} is reached.")
+    def recursive_apply_transform(cls, args, transform, depth=0) -> Union[dict, list, None]:
+        if depth > Const.DUMP_MAX_DEPTH:
+            logger.error(f"The maximum depth of recursive transform, {Const.DUMP_MAX_DEPTH} is reached.")
             raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
         if isinstance(args, cls.get_special_types()):
             arg_transform = transform(args, cls._recursive_key_stack)
@@ -220,7 +268,7 @@ class BaseDataProcessor:
             return cls.apply_transform_dict(args_dict, transform, depth)
         elif isinstance(args, (list, tuple)):
             result_list = cls.apply_transform_list(args, transform, depth)
-            return type(args)(result_list)
+            return result_list
         elif isinstance(args, dict):
             return cls.apply_transform_dict(args, transform, depth)
         elif args is not None:
@@ -228,12 +276,12 @@ class BaseDataProcessor:
             return None
         else:
             return None
     @classmethod
     def apply_transform_dict(cls, args, transform, depth):
         result_dict = {}
         for k, arg in args.items():
-            cls._recursive_key_stack.append(str(k))
+            cls._recursive_key_stack.append(k)
             result_dict[k] = cls.recursive_apply_transform(arg, transform, depth=depth + 1)
             cls._recursive_key_stack.pop()
         return result_dict
@@ -242,11 +290,21 @@ class BaseDataProcessor:
     def apply_transform_list(cls, args, transform, depth):
         result_list = []
         for i, arg in enumerate(args):
-            cls._recursive_key_stack.append(str(i))
+            cls._recursive_key_stack.append(i)
             result_list.append(cls.recursive_apply_transform(arg, transform, depth=depth + 1))
             cls._recursive_key_stack.pop()
         return result_list
+    @classmethod
+    def register_hook_single_element(cls, element, suffix_stack, hook_fn):
+        if cls.is_hookable_element(element):
+            indexes = copy.deepcopy(suffix_stack)
+            wrap_hook_fn = partial(hook_fn, indexes=indexes)
+            def real_hook_fn(grad):
+                return wrap_hook_fn(grad)
+            element.register_hook(real_hook_fn)
     def if_return_forward_new_output(self):
         return self._return_forward_new_output
@@ -383,3 +441,29 @@ class BaseDataProcessor:
                               suffix + file_format)
         file_path = os.path.join(self.data_writer.dump_tensor_data_dir, dump_data_name)
         return dump_data_name, file_path
+    def analyze_element_to_all_none(self, element):
+        return self.recursive_apply_transform(element, lambda element, stack: None)
+    def analyze_debug_forward(self, variable, name_with_count):
+        self.current_api_or_module_name = name_with_count
+        self.api_data_category = Const.TENSOR
+        # these two attributes are used to construct tensor file name {name_with_count}.tensor.{indexes}.npy/pt
+        data_info = self.analyze_element(variable)
+        return data_info
+    def analyze_debug_backward(self, variable, grad_name_with_count, nested_data_structure):
+        def hook_fn(grad, indexes):
+            suffix = Const.SEP.join([str(index) for index in indexes])
+            self.save_name = grad_name_with_count + Const.SEP + Const.TENSOR + Const.SEP + suffix
+            grad_data_info = self.analyze_element(grad)
+            self.save_name = None
+            full_index = [grad_name_with_count] + indexes
+            try:
+                self.set_value_into_nested_structure(nested_data_structure, full_index, grad_data_info)
+            except (ValueError, IndexError) as e:
+                logger.warning(f"error occured while recording statistics of {grad_name_with_count} variable, "
+                               f"skip current recording, detailed infomation: {e}")
+            return grad
+        wrap_register_hook_single_element = partial(self.register_hook_single_element, hook_fn=hook_fn)
+        self.recursive_apply_transform(variable, wrap_register_hook_single_element)

msprobe/core/data_dump/data_processor/factory.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 from msprobe.core.common.const import Const
+from msprobe.core.data_dump.data_processor.base import BaseDataProcessor
 class DataProcessorFactory:
@@ -62,6 +63,7 @@ class DataProcessorFactory:
             cls.register_processor(Const.PT_FRAMEWORK, Const.OVERFLOW_CHECK, PytorchOverflowCheckDataProcessor)
             cls.register_processor(Const.PT_FRAMEWORK, Const.FREE_BENCHMARK, PytorchFreeBenchmarkDataProcessor)
             cls.register_processor(Const.PT_FRAMEWORK, Const.KERNEL_DUMP, PytorchKernelDumpDataProcessor)
+            cls.register_processor(Const.PT_FRAMEWORK, Const.STRUCTURE, BaseDataProcessor)
             cls.register_module_processor(Const.PT_FRAMEWORK, ModuleProcesser)
         elif framework == Const.MS_FRAMEWORK:
             from msprobe.core.data_dump.data_processor.mindspore_processor import (
@@ -75,4 +77,5 @@ class DataProcessorFactory:
             cls.register_processor(Const.MS_FRAMEWORK, Const.TENSOR, MindsporeTensorDataProcessor)
             cls.register_processor(Const.MS_FRAMEWORK, Const.OVERFLOW_CHECK, MindsporeOverflowCheckDataProcessor)
             cls.register_processor(Const.MS_FRAMEWORK, Const.KERNEL_DUMP, MindsporeKernelDumpDataProcessor)
+            cls.register_processor(Const.MS_FRAMEWORK, Const.STRUCTURE, BaseDataProcessor)
             cls.register_module_processor(Const.MS_FRAMEWORK, CellProcessor)

mindstudio-probe 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

mindstudio-probe 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl