PyPI - mindstudio-probe - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +1 -1
mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
msprobe/README.md +46 -16
msprobe/__init__.py +16 -1
msprobe/config.json +0 -2
msprobe/core/advisor/advisor.py +8 -8
msprobe/core/advisor/advisor_const.py +6 -7
msprobe/core/advisor/advisor_result.py +12 -12
msprobe/core/common/const.py +64 -3
msprobe/core/common/exceptions.py +2 -2
msprobe/core/common/file_utils.py +54 -9
msprobe/core/common/inplace_op_checker.py +38 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +21 -11
msprobe/core/common/utils.py +153 -167
msprobe/core/common_config.py +18 -25
msprobe/core/compare/acc_compare.py +209 -36
msprobe/core/compare/check.py +102 -17
msprobe/core/compare/compare_cli.py +21 -1
msprobe/core/compare/highlight.py +41 -5
msprobe/core/compare/multiprocessing_compute.py +33 -8
msprobe/core/compare/npy_compare.py +21 -6
msprobe/core/compare/utils.py +82 -48
msprobe/core/data_dump/data_collector.py +31 -32
msprobe/core/data_dump/data_processor/base.py +45 -22
msprobe/core/data_dump/data_processor/factory.py +20 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +11 -5
msprobe/core/data_dump/data_processor/pytorch_processor.py +24 -7
msprobe/core/data_dump/json_writer.py +63 -42
msprobe/core/data_dump/scope.py +32 -16
msprobe/core/grad_probe/constant.py +4 -0
msprobe/core/grad_probe/grad_compare.py +2 -3
msprobe/core/grad_probe/utils.py +16 -3
msprobe/docs/01.installation.md +19 -9
msprobe/docs/02.config_introduction.md +52 -80
msprobe/docs/03.config_examples.md +3 -13
msprobe/docs/04.acl_config_examples.md +11 -9
msprobe/docs/05.data_dump_PyTorch.md +140 -12
msprobe/docs/06.data_dump_MindSpore.md +47 -5
msprobe/docs/07.accuracy_checker_PyTorch.md +57 -34
msprobe/docs/08.accuracy_checker_online_PyTorch.md +51 -11
msprobe/docs/09.accuracy_checker_MindSpore.md +8 -8
msprobe/docs/10.accuracy_compare_PyTorch.md +181 -99
msprobe/docs/11.accuracy_compare_MindSpore.md +162 -31
msprobe/docs/13.overflow_check_MindSpore.md +1 -1
msprobe/docs/15.free_benchmarking_PyTorch.md +59 -53
msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
msprobe/docs/17.grad_probe.md +14 -16
msprobe/docs/18.online_dispatch.md +89 -0
msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +22 -10
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +1 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +35 -11
msprobe/mindspore/api_accuracy_checker/api_info.py +7 -0
msprobe/mindspore/cell_processor.py +27 -3
msprobe/mindspore/common/const.py +2 -0
msprobe/mindspore/common/utils.py +18 -2
msprobe/mindspore/compare/distributed_compare.py +9 -22
msprobe/mindspore/compare/layer_mapping.py +146 -0
msprobe/mindspore/compare/modify_mapping.py +107 -0
msprobe/mindspore/compare/ms_compare.py +173 -35
msprobe/mindspore/compare/ms_graph_compare.py +27 -11
msprobe/mindspore/debugger/debugger_config.py +16 -13
msprobe/mindspore/debugger/precision_debugger.py +37 -13
msprobe/mindspore/dump/dump_tool_factory.py +16 -1
msprobe/mindspore/dump/hook_cell/api_registry.py +11 -1
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +82 -10
msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
msprobe/mindspore/dump/jit_dump.py +41 -17
msprobe/mindspore/dump/kernel_graph_dump.py +19 -3
msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -4
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +19 -4
msprobe/mindspore/free_benchmark/common/config.py +15 -0
msprobe/mindspore/free_benchmark/common/handler_params.py +15 -0
msprobe/mindspore/free_benchmark/common/utils.py +19 -5
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +16 -2
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +18 -3
msprobe/mindspore/free_benchmark/handler/base_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/check_handler.py +18 -3
msprobe/mindspore/free_benchmark/handler/fix_handler.py +15 -0
msprobe/mindspore/free_benchmark/handler/handler_factory.py +18 -3
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -0
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +22 -7
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +44 -18
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +18 -4
msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +20 -5
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +15 -0
msprobe/mindspore/grad_probe/global_context.py +18 -8
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -4
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
msprobe/mindspore/service.py +42 -123
msprobe/pytorch/__init__.py +20 -1
msprobe/pytorch/api_accuracy_checker/common/config.py +19 -2
msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +47 -21
msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +67 -32
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +26 -5
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +19 -2
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +51 -125
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +146 -3
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +21 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +78 -33
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +36 -11
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +18 -3
msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
msprobe/pytorch/bench_functions/confusion_transpose.py +15 -0
msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
msprobe/pytorch/bench_functions/linear.py +15 -0
msprobe/pytorch/bench_functions/matmul_backward.py +21 -6
msprobe/pytorch/bench_functions/npu_fusion_attention.py +180 -151
msprobe/pytorch/bench_functions/rms_norm.py +15 -0
msprobe/pytorch/bench_functions/rotary_mul.py +28 -9
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
msprobe/pytorch/bench_functions/swiglu.py +20 -5
msprobe/pytorch/common/__init__.py +15 -0
msprobe/pytorch/common/log.py +18 -6
msprobe/pytorch/common/parse_json.py +26 -11
msprobe/pytorch/common/utils.py +40 -35
msprobe/pytorch/compare/distributed_compare.py +11 -11
msprobe/pytorch/compare/match.py +15 -0
msprobe/pytorch/compare/pt_compare.py +38 -6
msprobe/pytorch/debugger/debugger_config.py +52 -39
msprobe/pytorch/debugger/precision_debugger.py +72 -24
msprobe/pytorch/free_benchmark/__init__.py +20 -5
msprobe/pytorch/free_benchmark/common/enums.py +28 -0
msprobe/pytorch/free_benchmark/common/params.py +15 -0
msprobe/pytorch/free_benchmark/common/utils.py +17 -1
msprobe/pytorch/free_benchmark/compare/grad_saver.py +28 -7
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +15 -0
msprobe/pytorch/free_benchmark/main.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +26 -2
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +55 -16
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
msprobe/pytorch/function_factory.py +17 -2
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_stat_csv.py +2 -2
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +13 -8
msprobe/pytorch/hook_module/hook_module.py +17 -19
msprobe/pytorch/hook_module/utils.py +4 -6
msprobe/pytorch/hook_module/wrap_aten.py +12 -11
msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
msprobe/pytorch/hook_module/wrap_functional.py +10 -11
msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
msprobe/pytorch/hook_module/wrap_torch.py +4 -6
msprobe/pytorch/hook_module/wrap_vf.py +4 -6
msprobe/pytorch/module_processer.py +17 -2
msprobe/pytorch/online_dispatch/compare.py +11 -12
msprobe/pytorch/online_dispatch/single_compare.py +7 -7
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +8 -0
msprobe/pytorch/online_dispatch/utils.py +1 -4
msprobe/pytorch/parse.py +15 -0
msprobe/pytorch/parse_tool/cli.py +5 -6
msprobe/pytorch/parse_tool/lib/compare.py +9 -10
msprobe/pytorch/parse_tool/lib/parse_tool.py +3 -0
msprobe/pytorch/parse_tool/lib/utils.py +28 -24
msprobe/pytorch/parse_tool/lib/visualization.py +1 -1
msprobe/pytorch/pt_config.py +167 -38
msprobe/pytorch/service.py +97 -32
mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0

msprobe/mindspore/compare/layer_mapping.py ADDED Viewed

@@ -0,0 +1,146 @@
+import re
+from msprobe.core.common.const import Const
+from msprobe.core.common.log import logger
+from msprobe.core.common.utils import CompareException
+class Trie:
+    def __init__(self, type_name=None, has_data=False):
+        self.type_name = type_name
+        self.call_count_list = []
+        self.children = {}
+        self.has_data = has_data
+        self.node_type = None
+    def __repr__(self):
+        return (f"Node(type_name={self.type_name}, "
+                f"has_data={self.has_data}, call number={len(self.call_count_list)})")
+    def insert(self, word, word_type="func"):
+        parts = word.split(Const.SEP)
+        if len(parts) < 2:
+            logger.error('result dataframe elements can not be access.')
+            raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR)
+        """
+        xxx, node_name, type_name, execute_num
+        etc: Cell.network_with_loss.language_model.encoder.layers.1.attention.out_proj.RowParallelLinear.1
+        prefix_name_list: Cell.network_with_loss.language_model.encoder.layers.1.attention
+        node_name: out_proj
+        type_name: RowParallelLinear
+        call_count: 1
+        """
+        type_name = parts[-2]
+        call_count = parts[-1]
+        node = self
+        prefix_name_list = parts[:-2]
+        for name in prefix_name_list:
+            if name not in node.children:
+                node.children[name] = Trie()
+            node = node.children[name]
+            if node.type_name is None:
+                node.type_name = name
+        node.type_name = type_name
+        node.has_data = True
+        node.call_count_list.append(call_count)
+        node.node_type = word_type
+class DFSConverter:
+    def __init__(self, mapping, max_depth=100):
+        self.mapping = mapping
+        self.max_depth = max_depth
+        self.result = {}
+    def traverse_and_collect(self, node, path="", mapping_path="", depth=0):
+        if depth > self.max_depth:
+            logger.error("The converted data depth is too large, please check the data")
+            raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+        if node is None:
+            return self.result
+        type_name = node.type_name
+        if node.has_data:
+            for count in node.call_count_list:
+                origin_name = f"{path}.{count}" if node.node_type == "Cell" else f"{path}.{type_name}.{count}"
+                mapping_name = f"{mapping_path}.{count}" if node.node_type == "Cell" else f"{mapping_path}.{type_name}.{count}"
+                self.result[origin_name] = mapping_name
+        name_mapping = self.mapping.get(type_name, {})
+        for child_name, child_node in node.children.items():
+            new_path = f"{path}.{child_name}" if path else child_name
+            converted_name = name_mapping.get(child_name, child_name)
+            new_mapping_path = f"{mapping_path}.{converted_name}" if mapping_path else converted_name
+            self.traverse_and_collect(child_node, new_path, new_mapping_path, depth+1)
+        return self.result
+def get_mapping_list(ms_tree, mapping):
+    dfs_converter = DFSConverter(mapping)
+    ms_pt_mapping = dfs_converter.traverse_and_collect(ms_tree)
+    mapping_list = []
+    for ms_name, pt_name in ms_pt_mapping.items():
+        pt_name = re.sub(r"^Cell", "Module", pt_name)
+        mapping_list.append((ms_name, pt_name))
+    return mapping_list
+def get_prefix_mapping(scope_list):
+    """layer name to layer name.class_name"""
+    layer_mapping = {}
+    for name, v in scope_list.items():
+        origin_data = v.get("origin_data")
+        if not origin_data.startswith(("Cell", "Module")):
+            continue
+        name_list = name.split(Const.SEP)
+        if len(name_list) < 2:
+            logger.error('result dataframe elements can not be access.')
+            raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR)
+        prefix_name_list = name_list[:-2] + [name_list[-1]]
+        prefix_name = Const.SEP.join(prefix_name_list)
+        layer_mapping[prefix_name] = name
+    return layer_mapping
+def get_layer_mapping(ms_scope_list, pt_scope_list, mapping):
+    # 1. get layer prefix to full name mapping
+    # ect: Cell.network_with_loss.language_model.embedding.3 : Cell.network_with_loss.language_model.embedding.Embedding.3
+    ms_prefix2fullname = get_prefix_mapping(ms_scope_list)
+    # 2. build trie tree
+    ms_tree = Trie(type_name="Cell")
+    for k, r in ms_scope_list.items():
+        origin_data_name = r.get('origin_data')
+        data_type = origin_data_name.split(Const.SEP)[0]
+        ms_tree.insert(k, data_type)
+    msname2ptname = get_mapping_list(ms_tree, mapping)
+    # 3. get pt layer prefix to full name mapping
+    # ect: Module.network_with_loss.language_model.embedding.3 : Module.network_with_loss.language_model.embedding.Embedding.3
+    pt_prefix2fullname = get_prefix_mapping(pt_scope_list)
+    final_mapping = []
+    for ms_name, pt_name in msname2ptname:
+        final_ms_name = ms_name
+        final_pt_name = pt_name
+        # cell
+        if ms_name in ms_prefix2fullname:
+            final_ms_name = ms_prefix2fullname.get(ms_name)
+            final_pt_name = pt_prefix2fullname.get(pt_name, None)
+        # func
+        elif final_ms_name in ms_scope_list:
+            final_ms_name = ms_scope_list.get(ms_name)['origin_data']
+            # remove forward/backward
+            final_ms_name = Const.SEP.join(final_ms_name.split(Const.SEP)[:-1])
+            final_pt_name = pt_scope_list.get(pt_name, None)
+            if final_pt_name:
+                final_pt_name = final_pt_name['origin_data']
+                final_pt_name = Const.SEP.join(final_pt_name.split(Const.SEP)[:-1])
+        else:
+            continue
+        final_mapping.append((final_ms_name, final_pt_name))
+    return final_mapping

msprobe/mindspore/compare/modify_mapping.py ADDED Viewed

@@ -0,0 +1,107 @@
+from msprobe.core.common.const import Const
+from msprobe.core.common.log import logger
+def find_regard_scope(lines, start_sign, end_sign):
+    # 找出 start_pos 和 end_pos
+    start_pos = end_pos = -1
+    for idx, ii in enumerate(lines):
+        if start_sign in ii:
+            start_pos = idx
+        elif end_sign in ii:
+            end_pos = idx
+            break
+    return start_pos, end_pos
+def find_stack_func_list(lines):
+    res_list = []
+    # 过滤和处理 regard_scope
+    for line in lines:
+        ele_list = line.split(',')
+        file_ele = ele_list[Const.STACK_FILE_INDEX]
+        if any(ii in file_ele for ii in Const.FILE_SKIP_LIST):
+            continue
+        func_ele = ele_list[Const.STACK_FUNC_INDEX]
+        if any(ii in func_ele for ii in Const.FUNC_SKIP_LIST):
+            continue
+        in_func_name = func_ele.split()[Const.STACK_FUNC_ELE_INDEX]
+        res_list.append(in_func_name)
+    # 反转res_list并生成final_res
+    reversed_list = res_list[::-1]
+    return reversed_list
+def get_duplicated_name(components):
+    duplicated_components = components
+    if len(components) < 3 or components[Const.CONSTRUCT_NAME_INDEX].isdigit():
+        logger.warning("key in construct.json is shorter than 3 parts or not name valid.")
+    else:
+        # 重复name，如Functional.add.add.X ward
+        duplicated_components = components[:Const.CONSTRUCT_NAME_INDEX + 1] + components[Const.CONSTRUCT_NAME_INDEX:]
+    return duplicated_components
+def modify_mapping_with_stack(stack, construct):
+    if not stack or not construct:
+        return {}
+    # 是否是mindspore的数据结构
+    is_ms = any("Cell" in ii for ii in construct)
+    # 调整后的mapping结构
+    final_pres = {}
+    # 查看归属关系
+    for key in construct:
+        key_components = key.split(Const.SEP)
+        code_list = stack.get(key, None)
+        parent_node = construct.get(key, None)
+        # 名称如果非标准开头，转为标准开头
+        if not key.startswith(("Module", "Cell")):
+            # 如果没有拿到父属scope name，默认顶级域名为Module或Cell
+            if not parent_node:
+                # 将节点名字转为标准的Module或Cell
+                key_components[0] = "Cell" if is_ms else "Module"
+                # 重复该节点的名字作为类型 如add.add add在-3位置
+                duplicated_components = get_duplicated_name(key_components)
+                modified_key = Const.SEP.join(duplicated_components)
+                modified_key = modified_key.replace(".forward", "").replace(".backward", "")
+                final_pres[modified_key] = {Const.ORIGIN_DATA: key, Const.SCOPE: None, Const.STACK: None}
+                continue
+            parent = parent_node.split(Const.SEP)
+            if len(parent) < 4:
+                logger.info(f"Parent name in construct.json is not valid")
+                continue
+            parent_idx = Const.NAME_FIRST_POSSIBLE_INDEX if not \
+            parent[Const.NAME_FIRST_POSSIBLE_INDEX].isdigit() else Const.NAME_SECOND_POSSIBLE_INDEX
+            parent_name = parent[parent_idx]
+            if code_list:
+                # {name}.Class.count_number.X ward Or {name}.Class.count_number.X ward.ele_number
+                if parent_name.endswith('s'):
+                    parent_name = parent_name[:-1]
+                if len(key_components) < 3:
+                    logger.info("The length of key in construct is less than 3, please check")
+                    continue
+                # {name}.count_number.X ward
+                func_name = key_components[-3]
+                start_pos, end_pos = find_regard_scope(code_list, func_name, parent_name)
+                # 获取指定范围的代码
+                regard_scope = code_list[start_pos:end_pos]
+                func_stack_list = find_stack_func_list(regard_scope)
+            else:
+                func_stack_list = []
+            # 组合逻辑：parent的节点名（到节点名字为止）加上调用栈名[reversed_list]加上原来key重复key的节点名[key_components[1:-2] + key_components[-3:]]
+            final_res_key = Const.SEP.join(parent[:parent_idx + 1] + func_stack_list +
+                                     key_components[1:Const.CONSTRUCT_NAME_INDEX + 1] + key_components[Const.CONSTRUCT_NAME_INDEX:])
+            final_res_key = final_res_key.strip(".forward").strip(".backward")
+        else:
+            final_res_key = Const.SEP.join(key_components[:-2] + [key_components[-1]])
+            func_stack_list = []
+        final_pres[final_res_key] = {Const.ORIGIN_DATA: key, Const.SCOPE: parent_node,
+                                     Const.STACK: Const.SEP.join(func_stack_list) if func_stack_list else None}
+    return final_pres

msprobe/mindspore/compare/ms_compare.py CHANGED Viewed

@@ -1,29 +1,46 @@
 import os
+import re
 import copy
+import sys
+from itertools import zip_longest
 from msprobe.core.common.utils import check_compare_param, CompareException, check_configuration_param, \
-    task_dumppath_get
-from msprobe.core.common.file_utils import create_directory, load_yaml, load_npy
+    task_dumppath_get, struct_json_get, add_time_with_yaml
+from msprobe.core.common.file_utils import create_directory, load_yaml, load_npy, load_json, save_yaml, FileOpen
 from msprobe.core.common.const import Const, CompareConst
 from msprobe.core.common.log import logger
 from msprobe.core.common.exceptions import FileCheckException
 from msprobe.core.compare.acc_compare import Comparator
 from msprobe.core.compare.check import check_struct_match, fuzzy_check_op
+from msprobe.mindspore.compare.modify_mapping import modify_mapping_with_stack
+from msprobe.mindspore.compare.layer_mapping import get_layer_mapping
 class MSComparator(Comparator):
-    def __init__(self, cell_mapping=None, api_mapping=None):
+    def __init__(self, cell_mapping=None, api_mapping=None, data_mapping=None, is_cross_framework=False):
         self.frame_name = MSComparator.__name__
         self.cell_mapping = cell_mapping
         self.api_mapping = api_mapping
-        self.cross_frame = cell_mapping is not None or api_mapping is not None
+        self.data_mapping = data_mapping
+        if data_mapping:
+            self.cross_frame = is_cross_framework
+        else:
+            self.cross_frame = cell_mapping is not None or api_mapping is not None
         self.cell_mapping_dict = self.load_mapping_file(self.cell_mapping)
         self.api_mapping_dict = self.load_mapping_file(self.api_mapping)
         if api_mapping is not None:
             self.ms_to_pt_mapping = self.load_internal_api()
+        if isinstance(self.data_mapping, str) or self.data_mapping is None:
+            self.data_mapping_dict = self.load_mapping_file(self.data_mapping)
+        elif isinstance(self.data_mapping, dict):
+            self.data_mapping_dict = self.data_mapping
+        else:
+            raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got "
+                            f"{type(self.data_mapping)}")
     def load_internal_api(self):
         cur_path = os.path.dirname(os.path.realpath(__file__))
-        yaml_path = os.path.join(cur_path,"ms_to_pt_api.yaml")
+        yaml_path = os.path.join(cur_path, "ms_to_pt_api.yaml")
         return load_yaml(yaml_path)
     def load_mapping_file(self, mapping_file):
@@ -52,10 +69,12 @@ class MSComparator(Comparator):
         if self.api_mapping is not None:
             npu_op_name = self.process_internal_api_mapping(npu_op_name, bench_op_name)
             if isinstance(self.api_mapping, str):
-                npu_dict_new, bench_dict_new, target_dict = self.transform_user_mapping_api(npu_dict_new, bench_dict_new)
+                npu_dict_new, bench_dict_new, target_dict = self.transform_user_mapping_api(npu_dict_new,
+                                                                                            bench_dict_new)
                 if target_dict:
                     bench_dict = self.reconstitution_bench_dict(npu_dict, copy.deepcopy(bench_dict_new), target_dict)
-                    npu_op_name, bench_op_name = npu_dict_new.get(CompareConst.OP_NAME), bench_dict_new.get(CompareConst.OP_NAME)
+                    npu_op_name = npu_dict_new.get(CompareConst.OP_NAME)
+                    bench_op_name = bench_dict_new.get(CompareConst.OP_NAME)
         struct_match = check_struct_match(npu_dict_new, bench_dict_new, cross_frame=self.cross_frame)
         if not fuzzy_match:
             return npu_op_name == bench_op_name and struct_match
@@ -72,7 +91,7 @@ class MSComparator(Comparator):
         if load_pt_file:
             import torch
             from msprobe.pytorch.common.utils import load_pt
-            data_value = load_pt(data_path).detach()
+            data_value = load_pt(data_path, True).detach()
             if data_value.dtype == torch.bfloat16:
                 data_value = data_value.to(torch.float32)
             data_value = data_value.numpy()
@@ -99,7 +118,7 @@ class MSComparator(Comparator):
         elif self.ms_to_pt_mapping.get(ms_api_name) == pt_api_name:
             return self.api_replace(npu_op_name, ms_api_name, pt_api_name)
         else:
-            return npu_op_name
+            return npu_op_name
     def remove_element(self, op_name, struct, summary, idx):
         del op_name[idx]
@@ -107,7 +126,12 @@ class MSComparator(Comparator):
         del summary[idx]
     def get_api_name(self, api_list):
-        return api_list[0] + Const.SEP + api_list[1]
+        try:
+            api_name = api_list[0] + Const.SEP + api_list[1]
+        except IndexError as error:
+            logger.error(f'Failed to retrieve API name, please check if the dump data is reasonable')
+            raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error
+        return api_name
     def transform_user_mapping_api(self, new_npu_dict, new_bench_dict):
         """
@@ -119,10 +143,13 @@ class MSComparator(Comparator):
             tuple: Updated NPU and benchmark dictionaries, along with the target dictionary.
         """
         npu_op_name, bench_op_name = new_npu_dict.get(CompareConst.OP_NAME), new_bench_dict.get(CompareConst.OP_NAME)
-        npu_struct_in, bench_struct_in = new_npu_dict.get(CompareConst.INPUT_STRUCT), new_bench_dict.get(CompareConst.INPUT_STRUCT)
-        npu_struct_out, bench_struct_out = new_npu_dict.get(CompareConst.OUTPUT_STRUCT), new_bench_dict.get(CompareConst.OUTPUT_STRUCT)
+        npu_struct_in = new_npu_dict.get(CompareConst.INPUT_STRUCT)
+        bench_struct_in = new_bench_dict.get(CompareConst.INPUT_STRUCT)
+        npu_struct_out = new_npu_dict.get(CompareConst.OUTPUT_STRUCT)
+        bench_struct_out =  new_bench_dict.get(CompareConst.OUTPUT_STRUCT)
         npu_summary, bench_summary = new_npu_dict.get(CompareConst.SUMMARY), new_bench_dict.get(CompareConst.SUMMARY)
-        npu_in_len, bench_in_len, npu_out_len, bench_out_len  = len(npu_struct_in), len(bench_struct_in), len(npu_struct_out), len(bench_struct_out)
+        npu_in_len, bench_in_len = len(npu_struct_in), len(bench_struct_in)
+        npu_out_len, bench_out_len = len(npu_struct_out), len(bench_struct_out)
         ms_api_list, pt_api_list = npu_op_name[0].split(Const.SEP), bench_op_name[0].split(Const.SEP)
         ms_api_name = self.get_api_name(ms_api_list)
         pt_api_name = self.get_api_name(pt_api_list)
@@ -130,22 +157,25 @@ class MSComparator(Comparator):
         for api_dict in self.api_mapping_dict:
             if api_dict.get("pt_api") == pt_api_name and api_dict.get("ms_api") == ms_api_name:
                 ms_user_args_len, pt_user_args_len = len(api_dict.get("ms_args")), len(api_dict.get("pt_args"))
-                ms_user_output_len, pt_user_output_len  = len(api_dict.get("ms_output")), len(api_dict.get("pt_output"))
+                ms_user_output_len, pt_user_output_len = len(api_dict.get("ms_output")), len(api_dict.get("pt_output"))
                 if ms_user_args_len != pt_user_args_len or ms_user_output_len != pt_user_output_len:
-                    logger.warning("The user-defined mapping table is incorrect, make sure that the number of parameters is equal" )
+                    logger.warning("The user-defined mapping table is incorrect,\
+                        make sure that the number of parameters is equal")
                     break
                 ms_out_list = api_dict.get("ms_output", [])
                 for idx in reversed(range(npu_out_len)):
                     if idx not in ms_out_list:
                         del npu_struct_out[idx]
-                        del npu_summary[idx + npu_in_len]
-                        del npu_op_name[idx + npu_in_len]
+                        if idx + npu_in_len < len(npu_summary) and idx + npu_in_len < len(npu_op_name):
+                            del npu_summary[idx + npu_in_len]
+                            del npu_op_name[idx + npu_in_len]
                 pt_out_list = api_dict.get("pt_output", [])
                 for idx in reversed(range(bench_out_len)):
                     if idx not in pt_out_list:
                         del bench_struct_out[idx]
-                        del bench_summary[idx + bench_in_len]
-                        del bench_op_name[idx + bench_in_len]
+                        if idx + bench_in_len < len(bench_summary) and idx + bench_in_len < len(bench_op_name):
+                            del bench_summary[idx + bench_in_len]
+                            del bench_op_name[idx + bench_in_len]
                 ms_para_list = api_dict.get("ms_args", [])
                 for idx in reversed(range(npu_in_len)):
                     if idx not in ms_para_list:
@@ -159,8 +189,10 @@ class MSComparator(Comparator):
                 target_dict = api_dict
                 break
         if target_dict:
-            new_npu_dict.update({CompareConst.OP_NAME: npu_op_name, CompareConst.INPUT_STRUCT: npu_struct_in, CompareConst.OUTPUT_STRUCT: npu_struct_out, CompareConst.SUMMARY: npu_summary})
-            new_bench_dict.update({CompareConst.OP_NAME: bench_op_name, CompareConst.INPUT_STRUCT: bench_struct_in, CompareConst.OUTPUT_STRUCT: bench_struct_out, CompareConst.SUMMARY: bench_summary})
+            new_npu_dict.update({CompareConst.OP_NAME: npu_op_name, CompareConst.INPUT_STRUCT: npu_struct_in,
+                                 CompareConst.OUTPUT_STRUCT: npu_struct_out, CompareConst.SUMMARY: npu_summary})
+            new_bench_dict.update({CompareConst.OP_NAME: bench_op_name, CompareConst.INPUT_STRUCT: bench_struct_in,
+                                   CompareConst.OUTPUT_STRUCT: bench_struct_out, CompareConst.SUMMARY: bench_summary})
         return new_npu_dict, new_bench_dict, target_dict
     def para_sequence_update(self, npu_op_name, bench_op_name):
@@ -180,25 +212,115 @@ class MSComparator(Comparator):
         if npu_in_len == len(ms_user_args_list) and npu_out_len == len(ms_user_output_list):
             return del_bench_dict
         ms_input_args_list = [i for i in range(npu_in_len)]
-        input_sub_list =list(set(ms_input_args_list) - set(ms_user_args_list))
+        input_sub_list = list(set(ms_input_args_list) - set(ms_user_args_list))
         ms_output_args_list = [i for i in range(npu_out_len)]
-        output_sub_list =list(set(ms_output_args_list) - set(ms_user_output_list))
+        output_sub_list = list(set(ms_output_args_list) - set(ms_user_output_list))
         bench_op_name = del_bench_dict.get(CompareConst.OP_NAME, [])
         bench_struct_in = del_bench_dict.get(CompareConst.INPUT_STRUCT, [])
         bench_struct_out = del_bench_dict.get(CompareConst.OUTPUT_STRUCT, [])
         bench_summary = del_bench_dict.get(CompareConst.SUMMARY, [])
         for idx in input_sub_list:  # Fill in the blank value field in the pt dictionary
-            bench_op_name.insert(idx, CompareConst.NAN)
-            bench_struct_in.insert(idx, CompareConst.NAN)
-            bench_summary.insert(idx, CompareConst.NAN)
+            bench_op_name.insert(idx, CompareConst.N_A)
+            bench_struct_in.insert(idx, CompareConst.N_A)
+            bench_summary.insert(idx, CompareConst.N_A)
         for idx in output_sub_list:  # Fill in the blank value field in the pt dictionary
-            bench_op_name.insert(npu_in_len + idx, CompareConst.NAN)
-            bench_struct_out.insert(idx, CompareConst.NAN)
-            bench_summary.insert(npu_in_len + idx, CompareConst.NAN)
-        del_bench_dict.update({CompareConst.OP_NAME: bench_op_name, CompareConst.INPUT_STRUCT: bench_struct_in, CompareConst.OUTPUT_STRUCT: bench_struct_out, CompareConst.SUMMARY: bench_summary})
+            bench_op_name.insert(npu_in_len + idx, CompareConst.N_A)
+            bench_struct_out.insert(idx, CompareConst.N_A)
+            bench_summary.insert(npu_in_len + idx, CompareConst.N_A)
+        del_bench_dict.update({CompareConst.OP_NAME: bench_op_name, CompareConst.INPUT_STRUCT: bench_struct_in,
+                               CompareConst.OUTPUT_STRUCT: bench_struct_out, CompareConst.SUMMARY: bench_summary})
         return del_bench_dict
+def sort_by_execution_sequence(npu_data, bench_data, mapping_list, flag):
+    def generate_execution_sequence(data):
+        sequence_map = {}
+        for index, item in enumerate(data.keys()):
+            if flag in item:
+                item_split = item.split(Const.SEP)
+                item_name = Const.SEP.join(item_split[0:-2])
+                item_index = item_split[-1]
+                if item_index == 'forward' or item_index == 'backward':
+                    item_index = item_split[-2]
+                item_key = f"{item_name}.{item_index}"
+                sequence_map[item_key] = index
+        return sequence_map
+    npu_map = generate_execution_sequence(npu_data)
+    bench_map = generate_execution_sequence(bench_data)
+    def sort_by_map(item):
+        first_key = npu_map.get(item[0], sys.maxsize)
+        second_key = bench_map.get(item[1], sys.maxsize)
+        return first_key, second_key
+    return sorted(mapping_list, key=sort_by_map)
+def generate_kernel_data(map_value, data, flag):
+    if not map_value:
+        return [], []
+    inputs_name = []
+    outputs_name = []
+    map_split = map_value.split(Const.SEP)
+    map_name = Const.SEP.join(map_split[0:-1])
+    map_index = map_split[-1]
+    for key, value in data.items():
+        if key.find(flag) != -1 and key.find(map_name) != -1:
+            if key.split(Const.SEP)[-1] != map_index and key.split(Const.SEP)[-2] != map_index :
+                continue
+            if flag == 'forward':
+                input_args = value.get('input_args', {})
+            else:
+                input_args = value.get('input', {})
+            output_args = value.get('output', {})
+            for i in range(len(input_args)):
+                inputs_name.append(f"{key}.input.{i}")
+            for i in range(len(output_args)):
+                outputs_name.append(f"{key}.output.{i}")
+    return inputs_name, outputs_name
+def generate_file_mapping(npu_json_path, bench_json_path, mapping_list):
+    npu_data = load_json(npu_json_path).get("data", {})
+    bench_data = load_json(bench_json_path).get("data", {})
+    forward_data = []
+    mapping_list = sort_by_execution_sequence(npu_data, bench_data, mapping_list, Const.FORWARD)
+    for map_value in mapping_list:
+        npu_forward_inputs, npu_backward_outputs = generate_kernel_data(map_value[0], npu_data, "forward")
+        bench_forward_inputs, bench_backward_outputs = generate_kernel_data(map_value[1], bench_data, "forward")
+        inputs_zip = list(zip_longest(npu_forward_inputs, bench_forward_inputs))
+        outputs_zip = list(zip_longest(npu_backward_outputs, bench_backward_outputs))
+        forward_data.extend(inputs_zip)
+        forward_data.extend(outputs_zip)
+    backward_data = []
+    mapping_list = sort_by_execution_sequence(npu_data, bench_data, mapping_list, Const.BACKWARD)
+    for map_value in mapping_list:
+        npu_forward_inputs, npu_backward_outputs = generate_kernel_data(map_value[0], npu_data, "backward")
+        bench_forward_inputs, bench_backward_outputs = generate_kernel_data(map_value[1], bench_data, "backward")
+        inputs_zip = list(zip_longest(npu_forward_inputs, bench_forward_inputs))
+        outputs_zip = list(zip_longest(npu_backward_outputs, bench_backward_outputs))
+        backward_data.extend(inputs_zip)
+        backward_data.extend(outputs_zip)
+    kernel_data = forward_data + backward_data
+    result = {key: value for key, value in kernel_data if key is not None}
+    return result
+def check_cross_framework(bench_json_path):
+    pattern = r'"data_name":\s*"[^"]+\.pt"'
+    with FileOpen(bench_json_path, 'r') as file:
+        for line in file:
+            if re.search(pattern, line):
+                return True
+    return False
 def ms_compare(input_param, output_path, **kwargs):
     try:
         stack_mode = kwargs.get('stack_mode', False)
@@ -206,14 +328,30 @@ def ms_compare(input_param, output_path, **kwargs):
         fuzzy_match = kwargs.get('fuzzy_match', False)
         cell_mapping = kwargs.get('cell_mapping', None)
         api_mapping = kwargs.get('api_mapping', None)
+        data_mapping = kwargs.get('data_mapping', None)
+        layer_mapping = kwargs.get('layer_mapping', None)
         summary_compare, md5_compare = task_dumppath_get(input_param)
-        check_configuration_param(stack_mode, auto_analyze, fuzzy_match)
+        check_configuration_param(stack_mode, auto_analyze, fuzzy_match, input_param.get('is_print_compare_log', True))
         create_directory(output_path)
         check_compare_param(input_param, output_path, summary_compare, md5_compare)
     except (CompareException, FileCheckException) as error:
         logger.error('Compare failed. Please check the arguments and do it again!')
         raise CompareException(error.code) from error
-    ms_comparator = MSComparator(cell_mapping, api_mapping)
+    if layer_mapping:
+        pt_stack, pt_construct = struct_json_get(input_param, Const.PT_FRAMEWORK)
+        ms_stack, ms_construct = struct_json_get(input_param, Const.MS_FRAMEWORK)
+        mapping = load_yaml(layer_mapping)
+        ms_mapping_result = modify_mapping_with_stack(ms_stack, ms_construct)
+        pt_mapping_result = modify_mapping_with_stack(pt_stack, pt_construct)
+        layer_mapping = get_layer_mapping(ms_mapping_result, pt_mapping_result, mapping)
+        data_mapping = generate_file_mapping(input_param.get("npu_json_path"), input_param.get("bench_json_path"), layer_mapping)
+        data_mapping_name = add_time_with_yaml(f"data_mapping")
+        data_mapping_path = os.path.join(os.path.realpath(output_path), f"{data_mapping_name}")
+        save_yaml(data_mapping_path, data_mapping)
+    is_cross_framework = check_cross_framework(input_param.get("bench_json_path"))
+    ms_comparator = MSComparator(cell_mapping, api_mapping, data_mapping, is_cross_framework)
     ms_comparator.compare_core(input_param, output_path, stack_mode=stack_mode,
                  auto_analyze=auto_analyze, fuzzy_match=fuzzy_match, summary_compare=summary_compare,
                  md5_compare=md5_compare)

msprobe/mindspore/compare/ms_graph_compare.py CHANGED Viewed

@@ -47,8 +47,10 @@ def npy_data_read(data_path, npy_file_list, mapping_dict):
 def statistic_data_read(statistic_file_list, statistic_file_path):
     data_list = []
     statistic_data_list = []
-    header_index = {'Data Type': None, 'Shape': None, 'Max Value': None, 'Min Value': None,
-                    'Avg Value': None, 'L2Norm Value': None}
+    header_index = {
+        'Data Type': None, 'Shape': None, 'Max Value': None,
+        'Min Value': None,'Avg Value': None, 'L2Norm Value': None
+    }
     for statistic_file in statistic_file_list:
         with FileOpen(statistic_file, "r") as f:
             csv_reader = csv.reader(f, delimiter=",")
@@ -65,8 +67,9 @@ def statistic_data_read(statistic_file_list, statistic_file_path):
     for data in statistic_data_list:
         compare_key = f"{data[1]}.{data[2]}.{data[3]}.{data[5]}"
+        op_name = f"{compare_key} {statistic_file_path}"
         timestamp = int(data[4])
-        result_data = [statistic_file_path, compare_key, timestamp]
+        result_data = [op_name, compare_key, timestamp]
         for key in header_index.keys():
             if header_index[key] is None:
                 result_data.append(np.nan)
@@ -239,9 +242,20 @@ class GraphMSComparator:
             compare_result_name = add_time_with_xlsx(f"compare_result_{str(rank_id)}_{str(step_id)}")
             compare_result_path = os.path.join(os.path.realpath(self.output_path), f"{compare_result_name}")
             check_path_before_create(compare_result_path)
+            self.to_excel(compare_result_df, compare_result_path)
+            logger.info(f"Compare rank: {rank_id} step: {step_id} finish. Compare result: {compare_result_path}.")
+    def to_excel(self, compare_result_df: pd.DataFrame, compare_result_path: str, slice_num=0, need_slice=False) -> int:
+        size = len(compare_result_df)
+        # sheet size cannot be larger than 1048576
+        if size < CompareConst.MAX_EXCEL_LENGTH:
+            compare_result_path = compare_result_path.replace('.xlsx', f'_slice_{slice_num}.xlsx') if need_slice else compare_result_path
             compare_result_df.to_excel(compare_result_path, index=False)
             change_mode(compare_result_path, FileCheckConst.DATA_FILE_AUTHORITY)
-            logger.info(f"Compare rank: {rank_id} step: {step_id} finish. Compare result: {compare_result_path}.")
+            return slice_num + 1
+        else:
+            slice_num = self.to_excel(compare_result_df.iloc[0: size//2], compare_result_path, slice_num, True)
+            return self.to_excel(compare_result_df.iloc[size//2:], compare_result_path, slice_num, True)
     def compare_process(self, rank_id, step_id):
         # generate data_path
@@ -251,8 +265,8 @@ class GraphMSComparator:
             return [], ''
         # generate file name
-        npu_mode = 'ERROR_MODE'
-        bench_mode = 'ERROR_MODE'
+        npu_mode = GraphMode.ERROR_MODE
+        bench_mode = GraphMode.ERROR_MODE
         npu_data_list = []
         bench_data_list = []
         for npu_data_path in npu_data_path_list:
@@ -262,7 +276,7 @@ class GraphMSComparator:
             bench_mode, data_list = generate_data_name(bench_data_path)
             bench_data_list.extend(data_list)
-        if npu_mode == "ERROR_MODE" or bench_mode == "ERROR_MODE":
+        if npu_mode == GraphMode.ERROR_MODE or bench_mode == GraphMode.ERROR_MODE:
             logger.warning(f"Data_path {npu_data_path} or {bench_data_path} is not exist.")
             return [], ''
         if npu_mode != bench_mode:
@@ -286,11 +300,13 @@ class GraphMSComparator:
                                                   CompareConst.BENCH_NORM])
             npu_float_type = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, CompareConst.NPU_NORM]
-            npu_data_df[npu_float_type] = npu_data_df[npu_float_type].astype(np.float32)
+            npu_data_df[npu_float_type] = npu_data_df[npu_float_type].astype(float)
-            bench_float_type = [CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, CompareConst.BENCH_MEAN,
-                                CompareConst.BENCH_NORM]
-            bench_data_df[bench_float_type] = bench_data_df[bench_float_type].astype(np.float32)
+            bench_float_type = [
+                CompareConst.BENCH_MAX, CompareConst.BENCH_MIN,
+                CompareConst.BENCH_MEAN,CompareConst.BENCH_NORM
+            ]
+            bench_data_df[bench_float_type] = bench_data_df[bench_float_type].astype(float)
         npu_data_df['Local Index'] = npu_data_df.sort_values('TimeStamp').groupby('Compare Key').cumcount()
         bench_data_df['Local Index'] = bench_data_df.sort_values('TimeStamp').groupby('Compare Key').cumcount()

mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

mindstudio-probe 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl