PyPI - mindstudio-probe - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +3 -2
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/RECORD +196 -141
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +14 -19
msprobe/config.json +1 -0
msprobe/core/common/const.py +155 -6
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +33 -7
msprobe/core/common/inplace_ops.yaml +3 -0
msprobe/core/common/utils.py +28 -14
msprobe/core/common_config.py +6 -0
msprobe/core/compare/acc_compare.py +139 -128
msprobe/core/compare/check.py +31 -29
msprobe/core/compare/compare_cli.py +17 -16
msprobe/core/compare/highlight.py +186 -99
msprobe/core/compare/layer_mapping/data_scope_parser.py +18 -7
msprobe/core/compare/layer_mapping/layer_mapping.py +21 -14
msprobe/core/compare/layer_mapping/postprocess_pass.py +4 -3
msprobe/core/compare/merge_result/merge_result.py +380 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +109 -147
msprobe/core/compare/utils.py +189 -69
msprobe/core/data_dump/data_collector.py +51 -21
msprobe/core/data_dump/data_processor/base.py +38 -20
msprobe/core/data_dump/data_processor/factory.py +5 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +154 -20
msprobe/core/data_dump/data_processor/pytorch_processor.py +118 -58
msprobe/core/data_dump/json_writer.py +29 -1
msprobe/core/data_dump/scope.py +19 -18
msprobe/core/overflow_check/abnormal_scene.py +9 -5
msprobe/core/overflow_check/checker.py +1 -1
msprobe/core/overflow_check/utils.py +1 -1
msprobe/docs/01.installation.md +96 -17
msprobe/docs/02.config_introduction.md +5 -5
msprobe/docs/05.data_dump_PyTorch.md +91 -61
msprobe/docs/06.data_dump_MindSpore.md +57 -19
msprobe/docs/07.accuracy_checker_PyTorch.md +18 -18
msprobe/docs/09.accuracy_checker_MindSpore.md +4 -4
msprobe/docs/10.accuracy_compare_PyTorch.md +99 -41
msprobe/docs/11.accuracy_compare_MindSpore.md +249 -48
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +120 -27
msprobe/docs/21.visualization_PyTorch.md +115 -35
msprobe/docs/22.visualization_MindSpore.md +138 -41
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/{23.tool_function_introduction.md → 25.tool_function_introduction.md} +1 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +521 -0
msprobe/docs/FAQ.md +26 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +10 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +57 -25
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +2 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +5 -7
msprobe/mindspore/api_accuracy_checker/data_manager.py +37 -0
msprobe/mindspore/api_accuracy_checker/main.py +1 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +12 -6
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +3 -1
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +3 -1
msprobe/mindspore/common/utils.py +50 -5
msprobe/mindspore/compare/distributed_compare.py +0 -2
msprobe/mindspore/compare/ms_compare.py +105 -63
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/debugger/debugger_config.py +3 -0
msprobe/mindspore/debugger/precision_debugger.py +81 -12
msprobe/mindspore/dump/hook_cell/api_registry.py +83 -16
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +33 -15
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +11 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +7 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +13 -4
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +24 -12
msprobe/mindspore/grad_probe/hook.py +13 -4
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/ms_config.py +5 -1
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +7 -0
msprobe/mindspore/service.py +267 -101
msprobe/msprobe.py +24 -3
msprobe/pytorch/__init__.py +7 -6
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +100 -267
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +54 -30
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +57 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -1
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +42 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +64 -19
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +34 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/bench_functions/npu_fusion_attention.py +42 -10
msprobe/pytorch/common/parse_json.py +2 -1
msprobe/pytorch/common/utils.py +45 -2
msprobe/pytorch/compare/distributed_compare.py +17 -29
msprobe/pytorch/compare/pt_compare.py +40 -20
msprobe/pytorch/debugger/debugger_config.py +27 -12
msprobe/pytorch/debugger/precision_debugger.py +42 -12
msprobe/pytorch/dump/module_dump/__init__.py +0 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +80 -6
msprobe/pytorch/free_benchmark/common/params.py +2 -1
msprobe/pytorch/free_benchmark/common/utils.py +3 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -2
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +31 -47
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +34 -0
msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -40
msprobe/pytorch/monitor/anomaly_analyse.py +1 -1
msprobe/pytorch/monitor/anomaly_detect.py +107 -22
msprobe/pytorch/monitor/csv2tb.py +166 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +25 -14
msprobe/pytorch/monitor/features.py +3 -3
msprobe/pytorch/monitor/module_hook.py +483 -277
msprobe/pytorch/monitor/module_metric.py +27 -48
msprobe/pytorch/monitor/module_spec_verifier.py +3 -1
msprobe/pytorch/monitor/optimizer_collect.py +52 -14
msprobe/pytorch/monitor/unittest/test_monitor.py +24 -9
msprobe/pytorch/monitor/utils.py +77 -6
msprobe/pytorch/online_dispatch/dispatch.py +8 -2
msprobe/pytorch/parse_tool/lib/compare.py +10 -10
msprobe/pytorch/parse_tool/lib/config.py +5 -7
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +11 -10
msprobe/pytorch/parse_tool/lib/utils.py +18 -19
msprobe/pytorch/parse_tool/lib/visualization.py +9 -10
msprobe/pytorch/service.py +176 -106
msprobe/visualization/builder/graph_builder.py +62 -5
msprobe/visualization/builder/msprobe_adapter.py +24 -2
msprobe/visualization/compare/graph_comparator.py +64 -14
msprobe/visualization/compare/mode_adapter.py +1 -15
msprobe/visualization/graph/base_node.py +12 -17
msprobe/visualization/graph/distributed_analyzer.py +318 -0
msprobe/visualization/graph/graph.py +9 -0
msprobe/visualization/graph_service.py +97 -23
msprobe/visualization/utils.py +14 -29
msprobe/pytorch/functional/module_dump.py +0 -84
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
/msprobe/docs/{data_dump_Mindspore → data_dump_MindSpore}/dynamic_graph_quick_start_example.md +0 -0
/msprobe/{pytorch/functional → mindspore/code_mapping}/__init__.py +0 -0

msprobe/core/compare/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd.
 # All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0  (the "License");
@@ -21,7 +21,7 @@ from dataclasses import dataclass
 import numpy as np
-from msprobe.core.common.const import Const, CompareConst
+from msprobe.core.common.const import Const, CompareConst, FileCheckConst
 from msprobe.core.common.utils import CompareException, check_regex_prefix_format_valid, logger, safe_get_value
 from msprobe.core.common.file_utils import check_file_or_directory_path
@@ -37,13 +37,20 @@ def extract_json(dirname, stack_json=False):
     # Provide robustness on invalid directory inputs
     if not json_path:
         if stack_json:
-            logger.error(f'stack.json is not found in dump dir {dirname}.')
+            logger.warning(f'stack.json is not found in dump dir {dirname}.')
         else:
             logger.error(f'dump.json is not found in dump dir {dirname}.')
-        raise CompareException(CompareException.NO_DUMP_FILE_ERROR)
+            raise CompareException(CompareException.NO_DUMP_FILE_ERROR)
     return json_path
+def set_stack_json_path(input_param):
+    npu_data_dir = os.path.dirname(input_param.get("npu_json_path"))
+    stack_path = extract_json(npu_data_dir, stack_json=True)
+    input_param["stack_json_path"] = stack_path if stack_path else None
+    return bool(stack_path)
 def check_and_return_dir_contents(dump_dir, prefix):
     """
     check the given dump dir and validate files in dump dir by using the given prefix patterns to build a
@@ -75,6 +82,10 @@ def check_and_return_dir_contents(dump_dir, prefix):
 def rename_api(npu_name, process):
+    """
+    原api： {api_type}.{api_name}.{API调用次数}.{前向反向}.{input/output}.{参数序号}
+    rename后： {api_type}.{api_name}.{input/output}.{参数序号}
+    """
     npu_split = npu_name.split(process)
     try:
         torch_func_index, in_out = npu_split[0], npu_split[1]
@@ -87,17 +98,13 @@ def rename_api(npu_name, process):
 def read_op(op_data, op_name):
-    io_name_mapping = {
-        Const.INPUT_ARGS: '.input',
-        Const.INPUT_KWARGS: '.input',
-        Const.INPUT: '.input',
-        Const.OUTPUT: '.output'
-    }
-    op_parsed_list = []
-    for name in io_name_mapping:
-        if name in op_data:
-            op_parsed_list.extend(op_item_parse(op_data[name], op_name + io_name_mapping[name]))
+    if Const.PARAMS_GRAD in op_name.split(Const.SEP):
+        op_parsed_list = op_item_parse(op_data, op_name)
+    else:
+        op_parsed_list = []
+        for name in CompareConst.IO_NAME_MAPPING:
+            if name in op_data:
+                op_parsed_list.extend(op_item_parse(op_data[name], op_name + CompareConst.IO_NAME_MAPPING[name]))
     return op_parsed_list
@@ -124,11 +131,14 @@ def op_item_parse(op_data, op_name: str, depth: int = 0) -> list:
         return [default_item]
     elif not op_data:
         return []
     item_list = []
     if isinstance(op_data, list):
         for i, data in enumerate(op_data):
-            item_list.extend(op_item_parse(data, op_name + Const.SEP + str(i), depth + 1))
+            if Const.PARAMS_GRAD not in op_name.split(Const.SEP):
+                item_list.extend(op_item_parse(data, op_name + Const.SEP + str(i), depth + 1))
+            else:
+                item_list.extend(op_item_parse(data, op_name, depth + 1))
     elif isinstance(op_data, dict):
         if is_leaf_data(op_data):
             return [gen_op_item(op_data, op_name)]
@@ -144,14 +154,15 @@ def is_leaf_data(op_data):
 def gen_op_item(op_data, op_name):
     op_item = {}
     op_item.update(op_data)
-    op_item['full_op_name'] = op_name
-    op_item['data_name'] = op_data.get('data_name', '-1')
+    data_name = op_data.get('data_name') if op_data.get('data_name') else '-1'  # 如果是""也返回-1
+    op_item['data_name'] = data_name
+    op_item['full_op_name'] = data_name.rsplit(Const.SEP, 1)[0] if data_name != '-1' else op_name
     params = ['Max', 'Min', 'Mean', 'Norm']
     for i in params:
         if i not in op_item:
             op_item[i] = None
     if not op_item.get('dtype'):
         if op_item.get('type') == 'torch.Size':
             op_item['dtype'] = op_data.get('type')
@@ -166,7 +177,7 @@ def gen_op_item(op_data, op_name):
                 op_item[i] = op_data.get('value')
     if not op_item.get('md5'):
         op_item['md5'] = f"{zlib.crc32(str(op_data.get('value', '')).encode()):08x}"
     return op_item
@@ -276,6 +287,22 @@ def result_item_init(n_info, b_info, dump_mode):
     return result_item
+def count_struct(op_dict):
+    parts = [
+        CompareConst.OP_NAME,
+        CompareConst.INPUT_STRUCT,
+        CompareConst.OUTPUT_STRUCT,
+        CompareConst.PARAMS_STRUCT,
+        CompareConst.PARAMS_GRAD_STRUCT
+    ]
+    lengths = [len(op_dict.get(part, [])) for part in parts]
+    num = lengths[0]
+    if num != sum(lengths[1:]):
+        logger.error(f"Length of names and structs of op_dict not match. Please check! op_dict: {op_dict}")
+        raise CompareException(CompareException.NAMES_STRUCTS_MATCH_ERROR)
+    return tuple(lengths)
 def get_accuracy(result, n_dict, b_dict, dump_mode):
     def get_accuracy_core(n_start, n_len, b_start, b_len, key):
         min_len = min(n_len, b_len)
@@ -355,31 +382,50 @@ def get_accuracy(result, n_dict, b_dict, dump_mode):
                 result.append(result_item)
-    n_num = len(n_dict['op_name'])
-    b_num = len(b_dict['op_name'])
-    n_num_input = len([name for name in n_dict['op_name']
-                       if Const.INPUT in name.split(Const.SEP) or Const.KWARGS in name.split(Const.SEP)])
-    b_num_input = len([name for name in b_dict['op_name']
-                       if Const.INPUT in name.split(Const.SEP) or Const.KWARGS in name.split(Const.SEP)])
-    n_num_output = n_num - n_num_input
-    b_num_output = b_num - b_num_input
-    get_accuracy_core(0, n_num_input, 0, b_num_input, 'input_struct')
-    get_accuracy_core(n_num_input, n_num_output, b_num_input, b_num_output, 'output_struct')
+    n_num, n_num_input, n_num_output, n_num_params, n_num_params_grad = count_struct(n_dict)
+    b_num, b_num_input, b_num_output, b_num_params, b_num_params_grad = count_struct(b_dict)
+    get_accuracy_core(0, n_num_input, 0, b_num_input, CompareConst.INPUT_STRUCT)
+    get_accuracy_core(n_num_input + n_num_output, n_num_params, b_num_input + b_num_output, b_num_params,
+                      CompareConst.PARAMS_STRUCT)
+    get_accuracy_core(n_num_input, n_num_output, b_num_input, b_num_output, CompareConst.OUTPUT_STRUCT)
+    get_accuracy_core(n_num_input + n_num_output + n_num_params, n_num_params_grad,
+                      b_num_input + b_num_output + b_num_params, b_num_params_grad,
+                      CompareConst.PARAMS_GRAD_STRUCT)
+def append_stack_info(result_item, npu_stack_info, index):
+    """添加堆栈信息到 result_item"""
+    if npu_stack_info and index == 0:
+        result_item.extend(npu_stack_info)
+    else:
+        result_item.append(CompareConst.NONE)
 def get_un_match_accuracy(result, n_dict, dump_mode):
-    index_out = 0
     npu_stack_info = n_dict.get("stack_info", None)
     bench_name, bench_type, bench_shape = CompareConst.N_A, CompareConst.N_A, CompareConst.N_A
-    err_msg = CompareConst.NO_BENCH
-    accuracy_check_res = CompareConst.N_A
-    for index, n_name in enumerate(n_dict["op_name"]):
-        name_ele_list = n_name.split(Const.SEP)
-        if Const.INPUT in name_ele_list or Const.KWARGS in name_ele_list:
-            n_struct = safe_get_value(n_dict, index, "n_dict", key=CompareConst.INPUT_STRUCT)
-        if Const.OUTPUT in name_ele_list:
-            n_struct = safe_get_value(n_dict, index_out, "n_dict", key=CompareConst.OUTPUT_STRUCT)
-            index_out += 1
+    struct_to_index_mapping = {
+        CompareConst.INPUT_STRUCT: 0,
+        CompareConst.OUTPUT_STRUCT: 0,
+        CompareConst.PARAMS_STRUCT: 0,
+        CompareConst.PARAMS_GRAD_STRUCT: 0
+    }
+    op_name_list = n_dict.get(CompareConst.OP_NAME)
+    summary_list = n_dict.get(Const.SUMMARY)
+    data_name_list = n_dict.get('data_name')
+    op_name_reorder, summary_reorder, _ = reorder_op_x_list(op_name_list,
+                                                            summary_list,
+                                                            data_name_list)
+    for index, n_name in enumerate(op_name_reorder):
+        _, state = get_name_and_state(n_name)
+        struct_key = CompareConst.STATE_TO_STRUCT_MAPPING.get(state)
+        if not struct_key:
+            continue
+        n_struct = safe_get_value(n_dict, struct_to_index_mapping.get(struct_key), "n_dict", key=struct_key)
+        struct_to_index_mapping[struct_key] += 1
         try:
             result_item = [n_name, bench_name, n_struct[0], bench_type, n_struct[1], bench_shape]
@@ -390,28 +436,26 @@ def get_un_match_accuracy(result, n_dict, dump_mode):
                       f"output_struct of n_dict is {n_dict[CompareConst.OUTPUT_STRUCT]}"
             logger.error(err_msg)
             raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from e
         if dump_mode == Const.MD5:
             result_item.extend([CompareConst.N_A] * 3)
-            if npu_stack_info and index == 0:
-                result_item.extend(npu_stack_info)
-            else:
-                result_item.append(CompareConst.NONE)
+            append_stack_info(result_item, npu_stack_info, index)
             result.append(result_item)
             continue
         if dump_mode == Const.SUMMARY:
             result_item.extend([CompareConst.N_A] * 8)
-        else:
+        if dump_mode == Const.ALL:
             result_item.extend([CompareConst.N_A] * 5)
-        npu_summary_data = safe_get_value(n_dict, index, "n_dict", key=CompareConst.SUMMARY)
-        result_item.extend(npu_summary_data)
+        npu_summary_data = safe_get_value(summary_reorder, index, "summary_reorder")
         bench_summary_data = [CompareConst.N_A] * 4
+        result_item.extend(npu_summary_data)
         result_item.extend(bench_summary_data)
+        err_msg = CompareConst.NO_BENCH
+        accuracy_check_res = CompareConst.N_A
         result_item.append(accuracy_check_res)
         result_item.append(err_msg)
-        if npu_stack_info and index == 0:
-            result_item.extend(npu_stack_info)
-        else:
-            result_item.append(CompareConst.NONE)
+        append_stack_info(result_item, npu_stack_info, index)
         if dump_mode == Const.ALL and result_item[1] == CompareConst.N_A:
             result_item.extend(["-1"])
         result.append(result_item)
@@ -423,6 +467,8 @@ def merge_tensor(tensor_list, dump_mode):
     op_dict[CompareConst.INPUT_STRUCT] = []
     op_dict[CompareConst.KWARGS_STRUCT] = []
     op_dict[CompareConst.OUTPUT_STRUCT] = []
+    op_dict[CompareConst.PARAMS_STRUCT] = []
+    op_dict[CompareConst.PARAMS_GRAD_STRUCT] = []
     op_dict[Const.SUMMARY] = []
     op_dict["stack_info"] = []
@@ -430,30 +476,25 @@ def merge_tensor(tensor_list, dump_mode):
         op_dict["data_name"] = []
     for tensor in tensor_list:
+        # A dict(len=2) with 'full_op_name' and 'full_info' is added to the tensor only if self.stack_mode is True
         if len(tensor) == 2:
             op_dict['stack_info'].append(tensor['full_info'])
             break
         op_dict["op_name"].append(tensor['full_op_name'])
-        name_ele_list = tensor['full_op_name'].split(Const.SEP)
-        name_to_struct_mapping = {
-            Const.INPUT: CompareConst.INPUT_STRUCT,
-            Const.KWARGS: CompareConst.KWARGS_STRUCT,
-            Const.OUTPUT: CompareConst.OUTPUT_STRUCT
-        }
-        for name_key, struct_key in name_to_struct_mapping.items():
-            if name_key in name_ele_list:
-                if dump_mode == Const.MD5:
-                    op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE], tensor[Const.MD5]))
-                else:
-                    op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE]))
-                break
+        _, state = get_name_and_state(tensor['full_op_name'])
+        struct_key = CompareConst.STATE_TO_STRUCT_MAPPING.get(state)
+        if not struct_key:
+            continue
+        if dump_mode == Const.MD5:
+            op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE], tensor[Const.MD5]))
+        else:
+            op_dict.get(struct_key).append((tensor[Const.DTYPE], tensor[Const.SHAPE]))
         op_dict[Const.SUMMARY].append([tensor[Const.MAX], tensor[Const.MIN], tensor[Const.MEAN], tensor[Const.NORM]])
         if dump_mode == Const.ALL:
             op_dict["data_name"].append(tensor['data_name'])
-            data_name = safe_get_value(op_dict, -1, "op_dict", key="data_name").rsplit(Const.SEP, 1)[0]
-            if data_name != "-1":
-                op_dict["op_name"][-1] = data_name
     if not op_dict[CompareConst.KWARGS_STRUCT]:
         del op_dict[CompareConst.KWARGS_STRUCT]
@@ -467,11 +508,90 @@ def print_compare_ends_info():
     logger.info('*' * total_len)
+def table_value_is_valid(value: str) -> bool:
+    if not isinstance(value, str):
+        return True
+    try:
+        # -1.00 or +1.00 should be consdiered as digit numbers
+        float(value)
+    except ValueError:
+        # otherwise, they will be considered as formular injections
+        return not bool(re.compile(FileCheckConst.CSV_BLACK_LIST).search(value))
+    return True
+def get_name_and_state(name):
+    """
+    Get api/module name and state
+    example:
+    name = 'conv2d.forward.1.input.0'
+    return: ('conv2d.forward.1.', 'input')
+    name = 'Functional.pad.0.backward.output.0'
+    return: ('Functional.pad.0.backward.', 'output')
+    state type: input, output, kwargs, parameters, parameters_grad
+    """
+    if Const.PARAMS_GRAD in name.split(Const.SEP):
+        return name.split(Const.PARAMS_GRAD)[0], Const.PARAMS_GRAD
+    split = re.split(Const.REGEX_FORWARD_BACKWARD, name)
+    api = f'{split[0]}.{split[1]}.'
+    state_str = split[2]
+    match = re.match(r'^(\d+\.)?(input|output|kwargs|parameters)\..+$', state_str)
+    if not match:
+        raise CompareException(f'Invalid name string: {name}')
+    if match.group(1):
+        api = f'{api}{match.group(1)}'
+    state = match.group(2)
+    return api, state
+def reorder_op_name_list(op_name_list):
+    if not op_name_list:
+        return op_name_list
+    parameters = []
+    output = []
+    parameters_grad = []
+    others = []
+    for x in op_name_list:
+        state = get_name_and_state(x)[1]
+        if state == Const.PARAMS:
+            parameters.append(x)
+        elif state == Const.OUTPUT:
+            output.append(x)
+        elif state == Const.PARAMS_GRAD:
+            parameters_grad.append(x)
+        else:
+            others.append(x)
+    # 合并others, parameters, 和output，确保parameters排在output前面
+    op_name_reorder = others + parameters + output + parameters_grad
+    return op_name_reorder
+def reorder_op_x_list(op_name_list, summary_list, data_name_list):
+    """对op_name, summary, data_name重新排序，把parameters放到input后output前，data_name由于统计量比对时，为None，单独处理"""
+    if not op_name_list or not summary_list:
+        return op_name_list, summary_list, data_name_list
+    index_map = {name: index for index, name in enumerate(op_name_list)}
+    op_name_reorder = reorder_op_name_list(op_name_list)
+    summary_reorder = [summary_list[index_map.get(name)] for name in op_name_reorder]
+    if data_name_list:
+        data_name_reorder = [data_name_list[index_map.get(name)] for name in op_name_reorder]
+    else:
+        data_name_reorder = data_name_list
+    return op_name_reorder, summary_reorder, data_name_reorder
 def _compare_parser(parser):
     parser.add_argument("-i", "--input_path", dest="input_path", type=str,
                         help="<Required> The compare input path, a dict json.", required=True)
     parser.add_argument("-o", "--output_path", dest="output_path", type=str,
-                        help="<Required> The compare task result out path. Default path: ./output",
+                        help="<Required> The compare task result out path. Default path: ./output",
                         required=False, default="./output", nargs="?", const="./output")
     parser.add_argument("-s", "--stack_mode", dest="stack_mode", action="store_true",
                         help="<optional> Whether to save stack info.", required=False)

msprobe/core/data_dump/data_collector.py CHANGED Viewed

@@ -38,6 +38,8 @@ class DataCollector:
         self.module_processor = DataProcessorFactory.get_module_processor(self.config.framework)
         self.module_count = {}
         self.scope = ScopeFactory(self.config).build_scope()
+        self.backward_module_names = {}
+        self.optimizer_status = ""
         atexit.register(self.write_json)
     @property
@@ -52,10 +54,6 @@ class DataCollector:
     def check_scope_and_pid(scope, name, pid):
         return (not scope or scope.check(name)) and pid == os.getpid()
-    @staticmethod
-    def is_inplace(module):
-        return getattr(module, "op_is_inplace", False)
     def if_return_forward_new_output(self):
         return self.data_processor.if_return_forward_new_output()
@@ -79,32 +77,38 @@ class DataCollector:
         logger.debug(msg)
         self.data_writer.update_data(data_info)
-    def pre_forward_data_collect(self, name, module, pid, module_input_output):
-        if self.config.level == Const.LEVEL_L2 and self.check_scope_and_pid(self.scope, name, pid):
-            self.data_processor.analyze_pre_forward(name, module, module_input_output)
+    def forward_input_data_collect(self, name, module, pid, module_input_output):
+        if self.config.task == Const.FREE_BENCHMARK:
+            backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
+            if self.check_scope_and_pid(self.scope, backward_name, pid):
+                self.data_processor.analyze_forward_input(backward_name, module, module_input_output)
+            return
+        if not self.check_scope_and_pid(self.scope, name, pid):
             return
-        backward_name = name.replace(Const.FORWARD, Const.BACKWARD)
-        if self.check_scope_and_pid(self.scope, backward_name, pid):
-            self.data_processor.analyze_pre_forward(backward_name, module, module_input_output)
-        if not self.is_inplace(module) or not self.check_scope_and_pid(self.scope, name, pid):
+        data_info = self.data_processor.analyze_forward_input(name, module, module_input_output)
+        if self.config.level == Const.LEVEL_L2:
             return
-        logger.info(f"API {name} is inplace.")
-        data_info = self.data_processor.analyze_pre_forward_inplace(name, module_input_output)
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-    def forward_data_collect(self, name, module, pid, module_input_output):
+    def forward_output_data_collect(self, name, module, pid, module_input_output):
         self.update_construct(name)
         if not self.check_scope_and_pid(self.scope, name, pid):
             return
+        data_info = self.data_processor.analyze_forward_output(name, module, module_input_output)
         if self.config.level == Const.LEVEL_L2:
-            self.data_processor.analyze_forward(name, module, module_input_output)
             return
+        self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
+        self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
-        if not self.is_inplace(module):
-            data_info = self.data_processor.analyze_forward(name, module, module_input_output)
-        else:
-            data_info = self.data_processor.analyze_forward_inplace(name, module_input_output)
+    def forward_data_collect(self, name, module, pid, module_input_output):
+        self.update_construct(name)
+        if not self.check_scope_and_pid(self.scope, name, pid):
+            return
+        data_info = self.data_processor.analyze_forward(name, module, module_input_output)
         self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name))
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
@@ -116,6 +120,11 @@ class DataCollector:
         data_info = self.data_processor.analyze_backward(name, module, module_input_output)
         if self.config.level == Const.LEVEL_L2:
             return
+        # 获取执行反向的模块名称
+        if data_info and name.split(Const.SEP)[0] in Const.MODULE_PREFIX:
+            module_name = name.rsplit(Const.SEP, 2)[0]
+            # 将模块名称加入到反向模块名称集合中，用于梯度收集时判断是否需要收集梯度
+            self.backward_module_names[module_name] = True
         self.handle_data(name, data_info, flush=self.data_processor.is_terminated)
     def backward_input_data_collect(self, name, module, pid, module_input_output):
@@ -136,12 +145,17 @@ class DataCollector:
     def update_construct(self, name):
         if self.config.level not in DataCollector.level_without_construct:
-            self.data_writer.update_construct({name: self.module_processor.api_parent_node})
+            if self.optimizer_status in [Const.OPTIMIZER, Const.CLIP_GRAD]:
+                self.data_writer.update_construct({name: self.optimizer_status})
+            else:
+                self.data_writer.update_construct({name: self.module_processor.api_parent_node})
             self.data_writer.update_construct(self.module_processor.module_node)
     def handle_data(self, name, data_info, flush=False):
         if data_info:
             self.update_data(name, data_info)
+        if self.config.async_dump:
+            return
         if not flush:
             self.data_writer.flush_data_periodically()
         else:
@@ -149,7 +163,23 @@ class DataCollector:
     def update_dump_paths(self, *args):
         self.data_writer.update_dump_paths(*args)
-        self.data_writer.initialize_json_file(task=self.config.task, level=self.config.level)
+    def initialize_json_file(self, framework=Const.UNKNOWN_FRAMEWORK):
+        self.data_writer.initialize_json_file(task=self.config.task, level=self.config.level, framework=framework)
     def update_iter(self, current_iter):
         self.data_processor.update_iter(current_iter)
+    def params_data_collect(self, name, param_name, pid, data):
+        grad_name = name + Const.SEP + Const.PARAMS_GRAD
+        # 校验scope和pid，以及当前name是否有过反向计算
+        if not self.check_scope_and_pid(self.scope, name, pid) and not self.backward_module_names.get(name):
+            # 如果没有反向计算，则需要清除之前占位写入的grad数据
+            if self.data_writer.cache_data.get("data"):
+                self.data_writer.cache_data.get("data").pop(grad_name, None)
+            return
+        data_info = self.data_processor.analyze_params(grad_name, param_name, data)
+        self.handle_data(grad_name, data_info, flush=self.data_processor.is_terminated)
+    def fill_stack_tensor_data(self):
+        self.data_writer.fill_stack_tensor_data()

msprobe/core/data_dump/data_processor/base.py CHANGED Viewed

@@ -39,9 +39,8 @@ class ModuleForwardInputsOutputs:
     def output_tuple(self):
         return convert_tuple(self.output)
-    def concat_args_and_kwargs(self):
-        args = self.args + tuple(self.kwargs.values())
-        return args
+    def update_output_with_args_and_kwargs(self):
+        self.output = self.args + tuple(self.kwargs.values())
 @dataclass
@@ -77,11 +76,12 @@ class ModuleBackwardOutputs:
 class TensorStatInfo:
-    def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None):
+    def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None, stack_tensor_stat=None):
         self.max = max_val
         self.min = min_val
         self.mean = mean_val
         self.norm = norm_val
+        self.stack_tensor_stat = stack_tensor_stat
 class BaseDataProcessor:
@@ -102,6 +102,7 @@ class BaseDataProcessor:
         self.current_iter = 0
         self._return_forward_new_output = False
         self._forward_new_output = None
+        self.save_name = None
         if hasattr(config, "data_mode"):
             self.allowed_data_mode = self._get_allowed_data_mode(config.data_mode)
@@ -223,7 +224,7 @@ class BaseDataProcessor:
         elif isinstance(args, dict):
             return cls.apply_transform_dict(args, transform, depth)
         elif args is not None:
-            logger.warning(f"Data type {type(args)} is not supported.")
+            logger.debug(f"Data type {type(args)} is not supported.")
             return None
         else:
             return None
@@ -273,13 +274,10 @@ class BaseDataProcessor:
         """
         return forward_backward in self.allowed_data_mode and input_output in self.allowed_data_mode
-    def analyze_pre_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
-        pass
     def analyze_element(self, element):
         return self.recursive_apply_transform(element, self.analyze_single_element)
-    def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward_input(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         api_info_struct = {}
         # check whether data_mode contains forward or input
         if self.is_dump_for_data_mode(Const.FORWARD, Const.INPUT):
@@ -291,16 +289,22 @@ class BaseDataProcessor:
             kwargs_info_list = self.analyze_element(module_input_output.kwargs)
             api_info_struct[name][Const.INPUT_KWARGS] = kwargs_info_list
-        # check whether data_mode contains forward or output
+        return api_info_struct
+    def analyze_forward_output(self, name, module, module_input_output: ModuleForwardInputsOutputs):
+        api_info_struct = {}
+        # check whether data_mode contains forward or input
         if self.is_dump_for_data_mode(Const.FORWARD, Const.OUTPUT):
-            api_info_struct[name] = api_info_struct.get(name, {})
+            api_info_struct[name] = {}
             self.api_data_category = Const.OUTPUT
             output_info_list = self.analyze_element(module_input_output.output_tuple)
             api_info_struct[name][Const.OUTPUT] = output_info_list
         return api_info_struct
-    def analyze_pre_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
+    def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs):
         api_info_struct = {}
+        # check whether data_mode contains forward or input
         if self.is_dump_for_data_mode(Const.FORWARD, Const.INPUT):
             api_info_struct[name] = {}
             self.api_data_category = Const.INPUT
@@ -309,16 +313,18 @@ class BaseDataProcessor:
             self.api_data_category = Const.KWARGS
             kwargs_info_list = self.analyze_element(module_input_output.kwargs)
             api_info_struct[name][Const.INPUT_KWARGS] = kwargs_info_list
-        return api_info_struct
-    def analyze_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs):
-        concat_args = module_input_output.concat_args_and_kwargs()
-        api_info_struct = {}
+        # check whether data_mode contains forward or output
         if self.is_dump_for_data_mode(Const.FORWARD, Const.OUTPUT):
-            api_info_struct[name] = {}
+            api_info_struct[name] = api_info_struct.get(name, {})
             self.api_data_category = Const.OUTPUT
-            output_info_list = self.analyze_element(concat_args)
+            output_info_list = self.analyze_element(module_input_output.output_tuple)
             api_info_struct[name][Const.OUTPUT] = output_info_list
+        if name in api_info_struct and hasattr(module_input_output, Const.PARAMS):
+            self.api_data_category = Const.PARAMS
+            api_info_struct[name][Const.PARAMS] = self.analyze_element(getattr(module_input_output, Const.PARAMS))
         return api_info_struct
     def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs):
@@ -359,9 +365,21 @@ class BaseDataProcessor:
             api_info_struct[name][Const.OUTPUT] = output_info_list
         return api_info_struct
+    def analyze_params(self, name, param_name, grad):
+        api_info_struct = {}
+        self.save_name = name + Const.SEP + param_name
+        data_info = self.analyze_element(grad)
+        grad_info_dict = {param_name: [data_info]}
+        api_info_struct[name] = grad_info_dict
+        return api_info_struct
     def get_save_file_path(self, suffix):
         file_format = Const.PT_SUFFIX if self.config.framework == Const.PT_FRAMEWORK else Const.NUMPY_SUFFIX
-        dump_data_name = (self.current_api_or_module_name + Const.SEP + self.api_data_category + Const.SEP +
-                          suffix + file_format)
+        if self.save_name is not None:
+            dump_data_name = (self.save_name + file_format)
+            self.save_name = None
+        else:
+            dump_data_name = (self.current_api_or_module_name + Const.SEP + self.api_data_category + Const.SEP +
+                              suffix + file_format)
         file_path = os.path.join(self.data_writer.dump_tensor_data_dir, dump_data_name)
         return dump_data_name, file_path

mindstudio-probe 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl