PyPI - mindstudio-probe - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (226) hide show

{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/METADATA +3 -2
mindstudio_probe-1.2.2.dist-info/RECORD +415 -0
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +16 -21
msprobe/config.json +1 -0
msprobe/core/common/const.py +185 -11
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +33 -7
msprobe/core/common/inplace_ops.yaml +4 -0
msprobe/core/common/utils.py +42 -14
msprobe/core/common_config.py +6 -0
msprobe/core/compare/acc_compare.py +139 -128
msprobe/core/compare/check.py +31 -29
msprobe/core/compare/compare_cli.py +17 -16
msprobe/core/compare/highlight.py +186 -99
msprobe/core/compare/layer_mapping/data_scope_parser.py +19 -8
msprobe/core/compare/layer_mapping/layer_mapping.py +21 -14
msprobe/core/compare/layer_mapping/postprocess_pass.py +4 -3
msprobe/core/compare/merge_result/merge_result.py +381 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/merge_result/utils.py +81 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +109 -147
msprobe/core/compare/utils.py +199 -69
msprobe/core/data_dump/data_collector.py +100 -25
msprobe/core/data_dump/data_processor/base.py +130 -28
msprobe/core/data_dump/data_processor/factory.py +8 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +170 -23
msprobe/core/data_dump/data_processor/pytorch_processor.py +175 -64
msprobe/core/data_dump/json_writer.py +54 -8
msprobe/core/data_dump/scope.py +19 -18
msprobe/core/overflow_check/abnormal_scene.py +9 -5
msprobe/core/overflow_check/checker.py +1 -1
msprobe/core/overflow_check/utils.py +1 -1
msprobe/docs/01.installation.md +121 -17
msprobe/docs/02.config_introduction.md +18 -16
msprobe/docs/03.config_examples.md +24 -0
msprobe/docs/05.data_dump_PyTorch.md +107 -58
msprobe/docs/06.data_dump_MindSpore.md +95 -34
msprobe/docs/07.accuracy_checker_PyTorch.md +18 -18
msprobe/docs/09.accuracy_checker_MindSpore.md +8 -6
msprobe/docs/10.accuracy_compare_PyTorch.md +99 -41
msprobe/docs/11.accuracy_compare_MindSpore.md +249 -48
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +310 -220
msprobe/docs/21.visualization_PyTorch.md +125 -35
msprobe/docs/22.visualization_MindSpore.md +149 -41
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/{23.tool_function_introduction.md → 25.tool_function_introduction.md} +1 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +525 -0
msprobe/docs/28.debugger_save_instruction.md +94 -0
msprobe/docs/28.kernel_dump_MindSpore.md +69 -0
msprobe/docs/FAQ.md +26 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/monitor/step_count_per_record.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +11 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +80 -28
msprobe/mindspore/api_accuracy_checker/api_runner.py +54 -16
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +2 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +52 -8
msprobe/mindspore/api_accuracy_checker/data_manager.py +37 -0
msprobe/mindspore/api_accuracy_checker/main.py +1 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +12 -6
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +3 -1
msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +129 -0
msprobe/mindspore/api_accuracy_checker/type_mapping.py +24 -1
msprobe/mindspore/api_accuracy_checker/utils.py +6 -1
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +3 -1
msprobe/mindspore/common/utils.py +68 -5
msprobe/mindspore/compare/distributed_compare.py +0 -2
msprobe/mindspore/compare/ms_compare.py +105 -63
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/debugger/debugger_config.py +28 -2
msprobe/mindspore/debugger/precision_debugger.py +100 -12
msprobe/mindspore/dump/hook_cell/api_registry.py +85 -16
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +33 -15
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +11 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/jit_dump.py +7 -6
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +7 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +13 -4
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +24 -12
msprobe/mindspore/grad_probe/hook.py +13 -4
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/monitor/anomaly_detect.py +404 -0
msprobe/mindspore/monitor/distributed/__init__.py +0 -0
msprobe/mindspore/monitor/distributed/distributed_ops.yaml +15 -0
msprobe/mindspore/monitor/distributed/stack_blacklist.yaml +5 -0
msprobe/mindspore/monitor/distributed/wrap_distributed.py +300 -0
msprobe/mindspore/monitor/features.py +63 -0
msprobe/mindspore/monitor/module_hook.py +821 -0
msprobe/mindspore/monitor/module_spec_verifier.py +94 -0
msprobe/mindspore/monitor/utils.py +267 -0
msprobe/mindspore/ms_config.py +13 -3
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +7 -0
msprobe/mindspore/service.py +347 -107
msprobe/msprobe.py +24 -3
msprobe/pytorch/__init__.py +7 -7
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +100 -267
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +55 -31
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +57 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -1
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +42 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +64 -19
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +34 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/bench_functions/apply_adam.py +215 -0
msprobe/pytorch/bench_functions/group_norm_silu.py +27 -0
msprobe/pytorch/bench_functions/mish.py +21 -0
msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +44 -0
msprobe/pytorch/bench_functions/npu_fusion_attention.py +42 -10
msprobe/pytorch/bench_functions/sort_v2.py +21 -0
msprobe/pytorch/common/parse_json.py +2 -1
msprobe/pytorch/common/utils.py +116 -2
msprobe/pytorch/compare/distributed_compare.py +17 -29
msprobe/pytorch/compare/pt_compare.py +40 -20
msprobe/pytorch/debugger/debugger_config.py +42 -17
msprobe/pytorch/debugger/precision_debugger.py +56 -12
msprobe/pytorch/dump/module_dump/__init__.py +0 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/dump/module_dump/module_processer.py +204 -0
msprobe/pytorch/free_benchmark/common/params.py +2 -1
msprobe/pytorch/free_benchmark/common/utils.py +3 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -2
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +31 -47
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/function_factory.py +7 -1
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +36 -1
msprobe/pytorch/hook_module/wrap_distributed.py +10 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -40
msprobe/pytorch/monitor/anomaly_analyse.py +1 -1
msprobe/pytorch/monitor/anomaly_detect.py +98 -28
msprobe/pytorch/monitor/csv2tb.py +164 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +25 -14
msprobe/pytorch/monitor/features.py +3 -3
msprobe/pytorch/monitor/module_hook.py +543 -318
msprobe/pytorch/monitor/module_metric.py +27 -48
msprobe/pytorch/monitor/module_spec_verifier.py +3 -1
msprobe/pytorch/monitor/optimizer_collect.py +76 -56
msprobe/pytorch/monitor/unittest/test_monitor.py +24 -9
msprobe/pytorch/monitor/utils.py +84 -48
msprobe/pytorch/online_dispatch/dispatch.py +8 -2
msprobe/pytorch/parse_tool/lib/compare.py +10 -10
msprobe/pytorch/parse_tool/lib/config.py +5 -7
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +11 -10
msprobe/pytorch/parse_tool/lib/utils.py +18 -19
msprobe/pytorch/parse_tool/lib/visualization.py +9 -10
msprobe/pytorch/pt_config.py +19 -22
msprobe/pytorch/service.py +264 -115
msprobe/visualization/builder/graph_builder.py +93 -10
msprobe/visualization/builder/msprobe_adapter.py +30 -6
msprobe/visualization/compare/graph_comparator.py +64 -14
msprobe/visualization/compare/mode_adapter.py +1 -15
msprobe/visualization/graph/base_node.py +15 -19
msprobe/visualization/graph/distributed_analyzer.py +395 -0
msprobe/visualization/graph/graph.py +9 -0
msprobe/visualization/graph/node_op.py +4 -2
msprobe/visualization/graph_service.py +100 -27
msprobe/visualization/utils.py +24 -31
mindstudio_probe-1.1.1.dist-info/RECORD +0 -341
msprobe/pytorch/functional/module_dump.py +0 -84
msprobe/pytorch/module_processer.py +0 -150
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.2.dist-info}/top_level.txt +0 -0
/msprobe/docs/{data_dump_Mindspore → data_dump_MindSpore}/dynamic_graph_quick_start_example.md +0 -0
/msprobe/{pytorch/functional → mindspore/code_mapping}/__init__.py +0 -0

msprobe/core/compare/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -21,7 +21,7 @@ from dataclasses import dataclass
 import numpy as np
-from msprobe.core.common.const import Const, CompareConst
+from msprobe.core.common.const import Const, CompareConst, FileCheckConst
 from msprobe.core.common.utils import CompareException, check_regex_prefix_format_valid, logger, safe_get_value
 from msprobe.core.common.file_utils import check_file_or_directory_path
@@ -37,13 +37,20 @@ def extract_json(dirname, stack_json=False):
     # Provide robustness on invalid directory inputs
     if not json_path:
         if stack_json:
-            logger.error(f'stack.json is not found in dump dir {dirname}.')
+            logger.warning(f'stack.json is not found in dump dir {dirname}.')
         else:
             logger.error(f'dump.json is not found in dump dir {dirname}.')
-        raise CompareException(CompareException.NO_DUMP_FILE_ERROR)
+            raise CompareException(CompareException.NO_DUMP_FILE_ERROR)
     return json_path
+def set_stack_json_path(input_param):
+    npu_data_dir = os.path.dirname(input_param.get("npu_json_path"))
+    stack_path = extract_json(npu_data_dir, stack_json=True)
+    input_param["stack_json_path"] = stack_path if stack_path else None
+    return bool(stack_path)
 def check_and_return_dir_contents(dump_dir, prefix):
     """
     check the given dump dir and validate files in dump dir by using the given prefix patterns to build a
@@ -75,6 +82,10 @@ def check_and_return_dir_contents(dump_dir, prefix):
 def rename_api(npu_name, process):
+    """
+    原api： {api_type}.{api_name}.{API调用次数}.{前向反向}.{input/output}.{参数序号}
+    rename后： {api_type}.{api_name}.{input/output}.{参数序号}
+    """
     npu_split = npu_name.split(process)
     try:
         torch_func_index, in_out = npu_split[0], npu_split[1]
@@ -87,17 +98,13 @@ def rename_api(npu_name, process):
 def read_op(op_data, op_name):
-    io_name_mapping = {
-        Const.INPUT_ARGS: '.input',
-        Const.INPUT_KWARGS: '.input',
-        Const.INPUT: '.input',
-        Const.OUTPUT: '.output'
-    }
-    op_parsed_list = []
-    for name in io_name_mapping:
-        if name in op_data:
-            op_parsed_list.extend(op_item_parse(op_data[name], op_name + io_name_mapping[name]))
+    if Const.PARAMS_GRAD in op_name.split(Const.SEP):
+        op_parsed_list = op_item_parse(op_data, op_name)
+    else:
+        op_parsed_list = []
+        for name in CompareConst.IO_NAME_MAPPING:
+            if name in op_data:
+                op_parsed_list.extend(op_item_parse(op_data[name], op_name + CompareConst.IO_NAME_MAPPING[name]))
     return op_parsed_list
@@ -124,11 +131,14 @@ def op_item_parse(op_data, op_name: str, depth: int = 0) -> list:
         return [default_item]
     elif not op_data:
         return []
     item_list = []
     if isinstance(op_data, list):
         for i, data in enumerate(op_data):
-            item_list.extend(op_item_parse(data, op_name + Const.SEP + str(i), depth + 1))
+            if Const.PARAMS_GRAD not in op_name.split(Const.SEP):
+                item_list.extend(op_item_parse(data, op_name + Const.SEP + str(i), depth + 1))
+            else:
+                item_list.extend(op_item_parse(data, op_name, depth + 1))
     elif isinstance(op_data, dict):
         if is_leaf_data(op_data):
             return [gen_op_item(op_data, op_name)]
@@ -144,14 +154,15 @@ def is_leaf_data(op_data):
 def gen_op_item(op_data, op_name):
     op_item = {}
     op_item.update(op_data)
-    op_item['full_op_name'] = op_name
-    op_item['data_name'] = op_data.get('data_name', '-1')
+    data_name = op_data.get('data_name') if op_data.get('data_name') else '-1'  # 如果是""也返回-1
+    op_item['data_name'] = data_name
+    op_item['full_op_name'] = data_name.rsplit(Const.SEP, 1)[0] if data_name != '-1' else op_name
     params = ['Max', 'Min', 'Mean', 'Norm']
     for i in params:
         if i not in op_item:
             op_item[i] = None
     if not op_item.get('dtype'):
         if op_item.get('type') == 'torch.Size':
             op_item['dtype'] = op_data.get('type')
@@ -159,6 +170,16 @@ def gen_op_item(op_data, op_name):
         elif op_item.get('type') == 'slice':
             op_item['dtype'] = op_data.get('type')
             op_item['shape'] = str(np.shape(np.array(op_data.get('value'))))
+        elif op_item.get('type') == 'ellipsis':
+            op_item['dtype'] = op_data.get('type')
+            op_item['shape'] = '[]'
+            for i in params:
+                op_item[i] = op_data.get('value')
+        elif op_item.get('type') == 'torch.ProcessGroup':
+            op_item['dtype'] = op_data.get('type')
+            op_item['shape'] = '[]'
+            for i in params:
+                op_item[i] = str(op_data.get('group_ranks'))
         else:
             op_item['dtype'] = str(type(op_data.get('value')))
             op_item['shape'] = '[]'
@@ -166,7 +187,7 @@ def gen_op_item(op_data, op_name):
                 op_item[i] = op_data.get('value')
     if not op_item.get('md5'):
         op_item['md5'] = f"{zlib.crc32(str(op_data.get('value', '')).encode()):08x}"
     return op_item
@@ -276,6 +297,22 @@ def result_item_init(n_info, b_info, dump_mode):
     return result_item
+def count_struct(op_dict):
+    parts = [
+        CompareConst.OP_NAME,
+        CompareConst.INPUT_STRUCT,
+        CompareConst.OUTPUT_STRUCT,
+        CompareConst.PARAMS_STRUCT,
+        CompareConst.PARAMS_GRAD_STRUCT
+    ]
+    lengths = [len(op_dict.get(part, [])) for part in parts]
+    num = lengths[0]
+    if num != sum(lengths[1:]):
+        logger.error(f"Length of names and structs of op_dict not match. Please check! op_dict: {op_dict}")
+        raise CompareException(CompareException.NAMES_STRUCTS_MATCH_ERROR)
+    return tuple(lengths)
 def get_accuracy(result, n_dict, b_dict, dump_mode):
     def get_accuracy_core(n_start, n_len, b_start, b_len, key):
         min_len = min(n_len, b_len)
@@ -355,31 +392,50 @@ def get_accuracy(result, n_dict, b_dict, dump_mode):
                 result.append(result_item)
-    n_num = len(n_dict['op_name'])
-    b_num = len(b_dict['op_name'])
-    n_num_input = len([name for name in n_dict['op_name']
-                       if Const.INPUT in name.split(Const.SEP) or Const.KWARGS in name.split(Const.SEP)])
-    b_num_input = len([name for name in b_dict['op_name']
-                       if Const.INPUT in name.split(Const.SEP) or Const.KWARGS in name.split(Const.SEP)])
-    n_num_output = n_num - n_num_input
-    b_num_output = b_num - b_num_input
-    get_accuracy_core(0, n_num_input, 0, b_num_input, 'input_struct')
-    get_accuracy_core(n_num_input, n_num_output, b_num_input, b_num_output, 'output_struct')
+    n_num, n_num_input, n_num_output, n_num_params, n_num_params_grad = count_struct(n_dict)
+    b_num, b_num_input, b_num_output, b_num_params, b_num_params_grad = count_struct(b_dict)
+    get_accuracy_core(0, n_num_input, 0, b_num_input, CompareConst.INPUT_STRUCT)
+    get_accuracy_core(n_num_input + n_num_output, n_num_params, b_num_input + b_num_output, b_num_params,
+                      CompareConst.PARAMS_STRUCT)
+    get_accuracy_core(n_num_input, n_num_output, b_num_input, b_num_output, CompareConst.OUTPUT_STRUCT)
+    get_accuracy_core(n_num_input + n_num_output + n_num_params, n_num_params_grad,
+                      b_num_input + b_num_output + b_num_params, b_num_params_grad,
+                      CompareConst.PARAMS_GRAD_STRUCT)
+def append_stack_info(result_item, npu_stack_info, index):
+    """添加堆栈信息到 result_item"""
+    if npu_stack_info and index == 0:
+        result_item.extend(npu_stack_info)
+    else:
+        result_item.append(CompareConst.NONE)
 def get_un_match_accuracy(result, n_dict, dump_mode):
-    index_out = 0
     npu_stack_info = n_dict.get("stack_info", None)
     bench_name, bench_type, bench_shape = CompareConst.N_A, CompareConst.N_A, CompareConst.N_A
-    err_msg = CompareConst.NO_BENCH
-    accuracy_check_res = CompareConst.N_A
-    for index, n_name in enumerate(n_dict["op_name"]):
-        name_ele_list = n_name.split(Const.SEP)
-        if Const.INPUT in name_ele_list or Const.KWARGS in name_ele_list:
-            n_struct = safe_get_value(n_dict, index, "n_dict", key=CompareConst.INPUT_STRUCT)
-        if Const.OUTPUT in name_ele_list:
-            n_struct = safe_get_value(n_dict, index_out, "n_dict", key=CompareConst.OUTPUT_STRUCT)
-            index_out += 1
+    struct_to_index_mapping = {
+        CompareConst.INPUT_STRUCT: 0,
+        CompareConst.OUTPUT_STRUCT: 0,
+        CompareConst.PARAMS_STRUCT: 0,
+        CompareConst.PARAMS_GRAD_STRUCT: 0
+    }
+    op_name_list = n_dict.get(CompareConst.OP_NAME)
+    summary_list = n_dict.get(Const.SUMMARY)
+    data_name_list = n_dict.get('data_name')
+    op_name_reorder, summary_reorder, _ = reorder_op_x_list(op_name_list,
+                                                            summary_list,
+                                                            data_name_list)
+    for index, n_name in enumerate(op_name_reorder):
+        _, state = get_name_and_state(n_name)
+        struct_key = CompareConst.STATE_TO_STRUCT_MAPPING.get(state)
+        if not struct_key:
+            continue
+        n_struct = safe_get_value(n_dict, struct_to_index_mapping.get(struct_key), "n_dict", key=struct_key)
+        struct_to_index_mapping[struct_key] += 1
         try:
             result_item = [n_name, bench_name, n_struct[0], bench_type, n_struct[1], bench_shape]
@@ -390,28 +446,26 @@ def get_un_match_accuracy(result, n_dict, dump_mode):
                       f"output_struct of n_dict is {n_dict[CompareConst.OUTPUT_STRUCT]}"
             logger.error(err_msg)
             raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from e
         if dump_mode == Const.MD5:
             result_item.extend([CompareConst.N_A] * 3)
-            if npu_stack_info and index == 0:
-                result_item.extend(npu_stack_info)
-            else:
-                result_item.append(CompareConst.NONE)
+            append_stack_info(result_item, npu_stack_info, index)
             result.append(result_item)
             continue
         if dump_mode == Const.SUMMARY:
             result_item.extend([CompareConst.N_A] * 8)
-        else:
+        if dump_mode == Const.ALL:
             result_item.extend([CompareConst.N_A] * 5)
-        npu_summary_data = safe_get_value(n_dict, index, "n_dict", key=CompareConst.SUMMARY)
-        result_item.extend(npu_summary_data)
+        npu_summary_data = safe_get_value(summary_reorder, index, "summary_reorder")
         bench_summary_data = [CompareConst.N_A] * 4
+        result_item.extend(npu_summary_data)
         result_item.extend(bench_summary_data)
+        err_msg = CompareConst.NO_BENCH
+        accuracy_check_res = CompareConst.N_A
         result_item.append(accuracy_check_res)
         result_item.append(err_msg)
-        if npu_stack_info and index == 0:
-            result_item.extend(npu_stack_info)
-        else:
-            result_item.append(CompareConst.NONE)
+        append_stack_info(result_item, npu_stack_info, index)
         if dump_mode == Const.ALL and result_item[1] == CompareConst.N_A:
             result_item.extend(["-1"])
         result.append(result_item)
@@ -423,6 +477,8 @@ def merge_tensor(tensor_list, dump_mode):
     op_dict[CompareConst.INPUT_STRUCT] = []
     op_dict[CompareConst.KWARGS_STRUCT] = []
     op_dict[CompareConst.OUTPUT_STRUCT] = []
+    op_dict[CompareConst.PARAMS_STRUCT] = []
+    op_dict[CompareConst.PARAMS_GRAD_STRUCT] = []
     op_dict[Const.SUMMARY] = []
     op_dict["stack_info"] = []
@@ -430,30 +486,25 @@ def merge_tensor(tensor_list, dump_mode):
         op_dict["data_name"] = []
     for tensor in tensor_list:
+        # A dict(len=2) with 'full_op_name' and 'full_info' is added to the tensor only if self.stack_mode is True
         if len(tensor) == 2:
             op_dict['stack_info'].append(tensor['full_info'])
             break
         op_dict["op_name"].append(tensor['full_op_name'])
-        name_ele_list = tensor['full_op_name'].split(Const.SEP)
-        name_to_struct_mapping = {
-            Const.INPUT: CompareConst.INPUT_STRUCT,
-            Const.KWARGS: CompareConst.KWARGS_STRUCT,
-            Const.OUTPUT: CompareConst.OUTPUT_STRUCT
-        }
-        for name_key, struct_key in name_to_struct_mapping.items():
-            if name_key in name_ele_list:
-                if dump_mode == Const.MD5:
-                    op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE], tensor[Const.MD5]))
-                else:
-                    op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE]))
-                break
+        _, state = get_name_and_state(tensor['full_op_name'])
+        struct_key = CompareConst.STATE_TO_STRUCT_MAPPING.get(state)
+        if not struct_key:
+            continue
+        if dump_mode == Const.MD5:
+            op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE], tensor[Const.MD5]))
+        else:
+            op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE]))
         op_dict[Const.SUMMARY].append([tensor[Const.MAX], tensor[Const.MIN], tensor[Const.MEAN], tensor[Const.NORM]])
         if dump_mode == Const.ALL:
             op_dict["data_name"].append(tensor['data_name'])
-            data_name = safe_get_value(op_dict, -1, "op_dict", key="data_name").rsplit(Const.SEP, 1)[0]
-            if data_name != "-1":
-                op_dict["op_name"][-1] = data_name
     if not op_dict[CompareConst.KWARGS_STRUCT]:
         del op_dict[CompareConst.KWARGS_STRUCT]
@@ -467,11 +518,90 @@ def print_compare_ends_info():
     logger.info('*' * total_len)
+def table_value_is_valid(value: str) -> bool:
+    if not isinstance(value, str):
+        return True
+    try:
+        # -1.00 or +1.00 should be consdiered as digit numbers
+        float(value)
+    except ValueError:
+        # otherwise, they will be considered as formular injections
+        return not bool(re.compile(FileCheckConst.CSV_BLACK_LIST).search(value))
+    return True
+def get_name_and_state(name):
+    """
+    Get api/module name and state
+    example:
+    name = 'conv2d.forward.1.input.0'
+    return: ('conv2d.forward.1.', 'input')
+    name = 'Functional.pad.0.backward.output.0'
+    return: ('Functional.pad.0.backward.', 'output')
+    state type: input, output, kwargs, parameters, parameters_grad
+    """
+    if Const.PARAMS_GRAD in name.split(Const.SEP):
+        return name.split(Const.PARAMS_GRAD)[0], Const.PARAMS_GRAD
+    split = re.split(Const.REGEX_FORWARD_BACKWARD, name)
+    api = f'{split[0]}.{split[1]}.'
+    state_str = split[2]
+    match = re.match(r'^(\d+\.)?(input|output|kwargs|parameters)\..+$', state_str)
+    if not match:
+        raise CompareException(f'Invalid name string: {name}')
+    if match.group(1):
+        api = f'{api}{match.group(1)}'
+    state = match.group(2)
+    return api, state
+def reorder_op_name_list(op_name_list):
+    if not op_name_list:
+        return op_name_list
+    parameters = []
+    output = []
+    parameters_grad = []
+    others = []
+    for x in op_name_list:
+        state = get_name_and_state(x)[1]
+        if state == Const.PARAMS:
+            parameters.append(x)
+        elif state == Const.OUTPUT:
+            output.append(x)
+        elif state == Const.PARAMS_GRAD:
+            parameters_grad.append(x)
+        else:
+            others.append(x)
+    # 合并others, parameters, 和output，确保parameters排在output前面
+    op_name_reorder = others + parameters + output + parameters_grad
+    return op_name_reorder
+def reorder_op_x_list(op_name_list, summary_list, data_name_list):
+    """对op_name, summary, data_name重新排序，把parameters放到input后output前，data_name由于统计量比对时，为None，单独处理"""
+    if not op_name_list or not summary_list:
+        return op_name_list, summary_list, data_name_list
+    index_map = {name: index for index, name in enumerate(op_name_list)}
+    op_name_reorder = reorder_op_name_list(op_name_list)
+    summary_reorder = [summary_list[index_map.get(name)] for name in op_name_reorder]
+    if data_name_list:
+        data_name_reorder = [data_name_list[index_map.get(name)] for name in op_name_reorder]
+    else:
+        data_name_reorder = data_name_list
+    return op_name_reorder, summary_reorder, data_name_reorder
 def _compare_parser(parser):
     parser.add_argument("-i", "--input_path", dest="input_path", type=str,
                         help="<Required> The compare input path, a dict json.", required=True)
     parser.add_argument("-o", "--output_path", dest="output_path", type=str,
-                        help="<Required> The compare task result out path. Default path: ./output",
+                        help="<Required> The compare task result out path. Default path: ./output",
                         required=False, default="./output", nargs="?", const="./output")
     parser.add_argument("-s", "--stack_mode", dest="stack_mode", action="store_true",
                         help="<optional> Whether to save stack info.", required=False)

msprobe/core/data_dump/data_collector.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -38,6 +38,9 @@ class DataCollector:
         self.module_processor = DataProcessorFactory.get_module_processor(self.config.framework)
         self.module_count = {}
         self.scope = ScopeFactory(self.config).build_scope()
+        self.backward_module_names = {}
+        self.optimizer_status = ""
+        self.optimizer_status_first_start = {Const.OPTIMIZER: True, Const.CLIP_GRAD: True}
         atexit.register(self.write_json)
     @property
@@ -53,8 +56,15 @@ class DataCollector:
         return (not scope or scope.check(name)) and pid == os.getpid()
     @staticmethod
-    def is_inplace(module):
-        return getattr(module, "op_is_inplace", False)
+    def set_is_recomputable(data_info, is_recompute):
+        if data_info and len(data_info) == 1 and is_recompute is not None: # 正常情况下data_info的长度应改为1
+            data_info[list(data_info.keys())[0]]["is_recompute"] = is_recompute
+    def reset_status(self):
+        self.optimizer_status = ""
+        self.optimizer_status_first_start = {Const.OPTIMIZER: True, Const.CLIP_GRAD: True}
+        self.data_writer.reset_cache()
+        self.backward_module_names.clear()
     def if_return_forward_new_output(self):
         return self.data_processor.if_return_forward_new_output()
@@ -79,69 +89,105 @@ class DataCollector:
         logger.debug(msg)
         self.data_writer.update_data(data_info)
-    def pre_forward_data_collect(self, name, module, pid, module_input_output):
-        if self.config.level == Const.LEVEL_L2 and self.check_scope_and_pid(self.scope, name, pid):
-            self.data_processor.analyze_pre_forward(name, module, module_input_output)
+    def forward_input_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
+        if self.config.task == Const.FREE_BENCHMARK:
+            backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
+            if self.check_scope_and_pid(self.scope, backward_name, pid):
+                self.data_processor.analyze_forward_input(backward_name, module, module_input_output)
+            return
+        if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
-        if self.check_scope_and_pid(self.scope, backward_name, pid):
-            self.data_processor.analyze_pre_forward(backward_name, module, module_input_output)
-        if not self.is_inplace(module) or not self.check_scope_and_pid(self.scope, name, pid):
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_forward_input(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
+        if self.config.level == Const.LEVEL_L2:
             return
-        logger.info(f"API {name} is inplace.")
-        data_info = self.data_processor.analyze_pre_forward_inplace(name, module_input_output)
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def forward_data_collect(self, name, module, pid, module_input_output):
+    def forward_output_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_forward_output(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         if self.config.level == Const.LEVEL_L2:
-            self.data_processor.analyze_forward(name, module, module_input_output)
+            return
+        self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
+        self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
+    def forward_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
+        self.update_construct(name)
+        if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        if not self.is_inplace(module):
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
             data_info = self.data_processor.analyze_forward(name, module, module_input_output)
-        else:
-            data_info = self.data_processor.analyze_forward_inplace(name, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def backward_data_collect(self, name, module, pid, module_input_output):
+    def backward_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_backward(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_backward(name, module, module_input_output)
         if self.config.level == Const.LEVEL_L2:
             return
+        # 获取执行反向的模块名称
+        if data_info and name.split(Const.SEP)[0] in Const.MODULE_PREFIX:
+            module_name = name.rsplit(Const.SEP, 2)[0]
+            # 将模块名称加入到反向模块名称集合中，用于梯度收集时判断是否需要收集梯度
+            self.backward_module_names[module_name] = True
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def backward_input_data_collect(self, name, module, pid, module_input_output):
+    def backward_input_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_backward_input(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_backward_input(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         self.handle_data(name, data_info)
-    def backward_output_data_collect(self, name, module, pid, module_input_output):
+    def backward_output_data_collect(self, name, module, pid, module_input_output, is_recompute=None):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        data_info = self.data_processor.analyze_backward_output(name, module, module_input_output)
+        data_info = {}
+        if self.config.task != Const.STRUCTURE:
+            data_info = self.data_processor.analyze_backward_output(name, module, module_input_output)
+        self.set_is_recomputable(data_info, is_recompute)
         self.handle_data(name, data_info)
     def update_construct(self, name):
         if self.config.level not in DataCollector.level_without_construct:
-            self.data_writer.update_construct({name: self.module_processor.api_parent_node})
+            if self.optimizer_status in [Const.OPTIMIZER, Const.CLIP_GRAD]:
+                if self.optimizer_status_first_start[self.optimizer_status]:
+                    self.data_writer.update_construct({self.optimizer_status: None})
+                    self.optimizer_status_first_start[self.optimizer_status] = False
+                self.data_writer.update_construct({name: self.optimizer_status})
+            else:
+                self.data_writer.update_construct({name: self.module_processor.api_parent_node})
             self.data_writer.update_construct(self.module_processor.module_node)
     def handle_data(self, name, data_info, flush=False):
         if data_info:
             self.update_data(name, data_info)
+        if self.config.async_dump:
+            return
         if not flush:
             self.data_writer.flush_data_periodically()
         else:
@@ -149,7 +195,36 @@ class DataCollector:
     def update_dump_paths(self, *args):
         self.data_writer.update_dump_paths(*args)
-        self.data_writer.initialize_json_file(task=self.config.task, level=self.config.level)
+    def initialize_json_file(self, framework=Const.UNKNOWN_FRAMEWORK):
+        self.data_writer.initialize_json_file(task=self.config.task, level=self.config.level, framework=framework)
     def update_iter(self, current_iter):
         self.data_processor.update_iter(current_iter)
+    def params_data_collect(self, name, param_name, pid, data):
+        grad_name = name + Const.SEP + Const.PARAMS_GRAD
+        # 校验scope和pid，以及当前name是否有过反向计算
+        if not self.check_scope_and_pid(self.scope, name, pid) and not self.backward_module_names.get(name):
+            # 如果没有反向计算，则需要清除之前占位写入的grad数据
+            if self.data_writer.cache_data.get("data"):
+                self.data_writer.cache_data.get("data").pop(grad_name, None)
+            return
+        data_info = self.data_processor.analyze_params(grad_name, param_name, data)
+        self.handle_data(grad_name, data_info, flush=self.data_processor.is_terminated)
+    def fill_stack_tensor_data(self):
+        self.data_writer.fill_stack_tensor_data()
+    def debug_data_collect_forward(self, variable, name_with_count):
+        data_info = self.data_processor.analyze_debug_forward(variable, name_with_count)
+        self.data_writer.update_debug({name_with_count: data_info})
+    def debug_data_collect_backward(self, variable, grad_name_with_count):
+        # prepare all None nested data structure
+        all_none_data_info = self.data_processor.analyze_element_to_all_none(variable)
+        self.data_writer.update_debug({grad_name_with_count: all_none_data_info})
+        # register tensor backward hook
+        self.data_processor.analyze_debug_backward(variable, grad_name_with_count, self.data_writer.cache_debug['data'])

mindstudio-probe 1.1.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.2py3-none-any.whl