PyPI - mindstudio-probe - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +3 -2
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/RECORD +196 -141
msprobe/CMakeLists.txt +5 -0
msprobe/README.md +14 -19
msprobe/config.json +1 -0
msprobe/core/common/const.py +155 -6
msprobe/core/common/exceptions.py +3 -1
msprobe/core/common/file_utils.py +33 -7
msprobe/core/common/inplace_ops.yaml +3 -0
msprobe/core/common/utils.py +28 -14
msprobe/core/common_config.py +6 -0
msprobe/core/compare/acc_compare.py +139 -128
msprobe/core/compare/check.py +31 -29
msprobe/core/compare/compare_cli.py +17 -16
msprobe/core/compare/highlight.py +186 -99
msprobe/core/compare/layer_mapping/data_scope_parser.py +18 -7
msprobe/core/compare/layer_mapping/layer_mapping.py +21 -14
msprobe/core/compare/layer_mapping/postprocess_pass.py +4 -3
msprobe/core/compare/merge_result/merge_result.py +380 -0
msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
msprobe/core/compare/multiprocessing_compute.py +2 -2
msprobe/core/compare/npy_compare.py +109 -147
msprobe/core/compare/utils.py +189 -69
msprobe/core/data_dump/data_collector.py +51 -21
msprobe/core/data_dump/data_processor/base.py +38 -20
msprobe/core/data_dump/data_processor/factory.py +5 -3
msprobe/core/data_dump/data_processor/mindspore_processor.py +154 -20
msprobe/core/data_dump/data_processor/pytorch_processor.py +118 -58
msprobe/core/data_dump/json_writer.py +29 -1
msprobe/core/data_dump/scope.py +19 -18
msprobe/core/overflow_check/abnormal_scene.py +9 -5
msprobe/core/overflow_check/checker.py +1 -1
msprobe/core/overflow_check/utils.py +1 -1
msprobe/docs/01.installation.md +96 -17
msprobe/docs/02.config_introduction.md +5 -5
msprobe/docs/05.data_dump_PyTorch.md +91 -61
msprobe/docs/06.data_dump_MindSpore.md +57 -19
msprobe/docs/07.accuracy_checker_PyTorch.md +18 -18
msprobe/docs/09.accuracy_checker_MindSpore.md +4 -4
msprobe/docs/10.accuracy_compare_PyTorch.md +99 -41
msprobe/docs/11.accuracy_compare_MindSpore.md +249 -48
msprobe/docs/12.overflow_check_PyTorch.md +1 -1
msprobe/docs/19.monitor.md +120 -27
msprobe/docs/21.visualization_PyTorch.md +115 -35
msprobe/docs/22.visualization_MindSpore.md +138 -41
msprobe/docs/23.generate_operator_PyTorch.md +107 -0
msprobe/docs/24.code_mapping_Mindspore.md +28 -0
msprobe/docs/{23.tool_function_introduction.md → 25.tool_function_introduction.md} +1 -0
msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
msprobe/docs/27.dump_json_instruction.md +521 -0
msprobe/docs/FAQ.md +26 -2
msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
msprobe/docs/img/merge_result.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
msprobe/docs/img/visualization/tensorboard_1.png +0 -0
msprobe/docs/img/visualization/tensorboard_2.png +0 -0
msprobe/docs/img/visualization/vis_browser_1.png +0 -0
msprobe/docs/img/visualization/vis_browser_2.png +0 -0
msprobe/docs/img/visualization/vis_precision_info.png +0 -0
msprobe/docs/img/visualization/vis_search_info.png +0 -0
msprobe/docs/img/visualization/vis_show_info.png +0 -0
msprobe/docs/img/visualization/vis_showcase.png +0 -0
msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
msprobe/docs/visualization/GPTModel.png +0 -0
msprobe/docs/visualization/ParallelMLP.png +0 -0
msprobe/docs/visualization/layer_mapping_example.md +132 -0
msprobe/docs/visualization/mapping.png +0 -0
msprobe/docs/visualization/mapping1.png +0 -0
msprobe/docs/visualization/module_name.png +0 -0
msprobe/docs/visualization/module_name1.png +0 -0
msprobe/docs/visualization/no_mapping.png +0 -0
msprobe/docs/visualization/no_mapping1.png +0 -0
msprobe/docs/visualization/no_mapping_analyze.png +0 -0
msprobe/docs/visualization/top_layer.png +0 -0
msprobe/mindspore/__init__.py +10 -0
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +57 -25
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +2 -1
msprobe/mindspore/api_accuracy_checker/compute_element.py +5 -7
msprobe/mindspore/api_accuracy_checker/data_manager.py +37 -0
msprobe/mindspore/api_accuracy_checker/main.py +1 -0
msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +12 -6
msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +3 -1
msprobe/mindspore/code_mapping/bind.py +264 -0
msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
msprobe/mindspore/code_mapping/graph.py +49 -0
msprobe/mindspore/code_mapping/graph_parser.py +226 -0
msprobe/mindspore/code_mapping/main.py +24 -0
msprobe/mindspore/code_mapping/processor.py +34 -0
msprobe/mindspore/common/const.py +3 -1
msprobe/mindspore/common/utils.py +50 -5
msprobe/mindspore/compare/distributed_compare.py +0 -2
msprobe/mindspore/compare/ms_compare.py +105 -63
msprobe/mindspore/compare/ms_graph_compare.py +14 -5
msprobe/mindspore/debugger/debugger_config.py +3 -0
msprobe/mindspore/debugger/precision_debugger.py +81 -12
msprobe/mindspore/dump/hook_cell/api_registry.py +83 -16
msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +33 -15
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +11 -1
msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
msprobe/mindspore/dump/kernel_graph_dump.py +7 -0
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +13 -4
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
msprobe/mindspore/grad_probe/grad_analyzer.py +24 -12
msprobe/mindspore/grad_probe/hook.py +13 -4
msprobe/mindspore/mindtorch/__init__.py +18 -0
msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
msprobe/mindspore/ms_config.py +5 -1
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +7 -0
msprobe/mindspore/service.py +267 -101
msprobe/msprobe.py +24 -3
msprobe/pytorch/__init__.py +7 -6
msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +100 -267
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +54 -30
msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +57 -1
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -1
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +42 -14
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +64 -19
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +34 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
msprobe/pytorch/bench_functions/npu_fusion_attention.py +42 -10
msprobe/pytorch/common/parse_json.py +2 -1
msprobe/pytorch/common/utils.py +45 -2
msprobe/pytorch/compare/distributed_compare.py +17 -29
msprobe/pytorch/compare/pt_compare.py +40 -20
msprobe/pytorch/debugger/debugger_config.py +27 -12
msprobe/pytorch/debugger/precision_debugger.py +42 -12
msprobe/pytorch/dump/module_dump/__init__.py +0 -0
msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +80 -6
msprobe/pytorch/free_benchmark/common/params.py +2 -1
msprobe/pytorch/free_benchmark/common/utils.py +3 -0
msprobe/pytorch/free_benchmark/compare/grad_saver.py +0 -2
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +31 -47
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
msprobe/pytorch/hook_module/__init__.py +1 -1
msprobe/pytorch/hook_module/hook_module.py +14 -11
msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
msprobe/pytorch/hook_module/support_wrap_ops.yaml +34 -0
msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
msprobe/pytorch/hook_module/wrap_functional.py +0 -40
msprobe/pytorch/monitor/anomaly_analyse.py +1 -1
msprobe/pytorch/monitor/anomaly_detect.py +107 -22
msprobe/pytorch/monitor/csv2tb.py +166 -0
msprobe/pytorch/monitor/distributed/wrap_distributed.py +25 -14
msprobe/pytorch/monitor/features.py +3 -3
msprobe/pytorch/monitor/module_hook.py +483 -277
msprobe/pytorch/monitor/module_metric.py +27 -48
msprobe/pytorch/monitor/module_spec_verifier.py +3 -1
msprobe/pytorch/monitor/optimizer_collect.py +52 -14
msprobe/pytorch/monitor/unittest/test_monitor.py +24 -9
msprobe/pytorch/monitor/utils.py +77 -6
msprobe/pytorch/online_dispatch/dispatch.py +8 -2
msprobe/pytorch/parse_tool/lib/compare.py +10 -10
msprobe/pytorch/parse_tool/lib/config.py +5 -7
msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
msprobe/pytorch/parse_tool/lib/parse_tool.py +11 -10
msprobe/pytorch/parse_tool/lib/utils.py +18 -19
msprobe/pytorch/parse_tool/lib/visualization.py +9 -10
msprobe/pytorch/service.py +176 -106
msprobe/visualization/builder/graph_builder.py +62 -5
msprobe/visualization/builder/msprobe_adapter.py +24 -2
msprobe/visualization/compare/graph_comparator.py +64 -14
msprobe/visualization/compare/mode_adapter.py +1 -15
msprobe/visualization/graph/base_node.py +12 -17
msprobe/visualization/graph/distributed_analyzer.py +318 -0
msprobe/visualization/graph/graph.py +9 -0
msprobe/visualization/graph_service.py +97 -23
msprobe/visualization/utils.py +14 -29
msprobe/pytorch/functional/module_dump.py +0 -84
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -0
{mindstudio_probe-1.1.1.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
/msprobe/docs/{data_dump_Mindspore → data_dump_MindSpore}/dynamic_graph_quick_start_example.md +0 -0
/msprobe/{pytorch/functional → mindspore/code_mapping}/__init__.py +0 -0

msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py CHANGED Viewed

@@ -41,6 +41,7 @@ from msprobe.core.common.utils import CompareException
 def split_json_file(input_file, num_splits, filter_api):
     forward_data, backward_data, real_data_path = parse_json_info_forward_backward(input_file)
+    input_dir = os.path.dirname(os.path.abspath(input_file))
     if filter_api:
         forward_data = preprocess_forward_content(forward_data)
     for data_name in list(forward_data.keys()):
@@ -71,7 +72,7 @@ def split_json_file(input_file, num_splits, filter_api):
                 **backward_data
             }
         }
-        split_filename = f"temp_part{i}.json"
+        split_filename = os.path.join(input_dir, f"temp_part{i}.json")
         save_json(split_filename, temp_data)
         split_files.append(split_filename)

msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py CHANGED Viewed

@@ -23,12 +23,14 @@ try:
     import torch_npu
 except ImportError:
     is_gpu = True
+    current_device = "cuda"
 else:
     is_gpu = False
+    current_device = "npu"
 import torch
 from tqdm import tqdm
 from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import generate_device_params, get_api_info
-from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import exec_api, is_unsupported_api
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import exec_api, is_unsupported_api, ExecParams
 from msprobe.core.common.file_utils import check_link, FileChecker
 from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments
 from msprobe.core.common.const import FileCheckConst, Const
@@ -61,19 +63,33 @@ def check_tensor_overflow(x):
         return False
-def check_data_overflow(x):
-    if isinstance(x, (tuple, list)) and x:
-        for _, item in enumerate(x):
-            if check_data_overflow(item):
-                return True
-        return False
+def check_data_overflow(x, device):
+    if isinstance(x, (tuple, list)):
+        if not x:
+            return False
+        return any(check_data_overflow(item, device) for item in x)
     else:
-        return check_tensor_overflow(x)
+        if device == Const.CPU_LOWERCASE:
+            return check_tensor_overflow(x)
+        else:
+            return torch_npu.npu.utils.npu_check_overflow(x)
+def is_bool_output(x):
+    if isinstance(x, (tuple, list)):
+        if not x:
+            return False
+        return any(is_bool_output(item) for item in x)
+    else:
+        return isinstance(x, bool)
 def run_overflow_check(forward_file):
     logger.info("start UT test")
     forward_content, _, real_data_path = parse_json_info_forward_backward(forward_file)
+    if real_data_path:
+        dump_path = os.path.dirname(forward_file)
+        real_data_path = os.path.join(dump_path, Const.DUMP_TENSOR_DATA)
     for api_full_name, api_info_dict in tqdm(forward_content.items()):
         if is_unsupported_api(api_full_name, is_overflow_check=True):
             continue
@@ -87,6 +103,9 @@ def run_overflow_check(forward_file):
             elif "expected scalar type Long" in str(err):
                 logger.warning(f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
                                "'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
+            elif "could not create a primitive descriptor for a matmul primitive" in str(err):
+                logger.warning(f"API {api_name} not support matmul primitive in CPU due to pytorch bug, "
+                               "so it will be skipped.")
             else:
                 logger.error(f"Run {api_full_name} UT Error: %s" % str(err))
@@ -98,17 +117,26 @@ def run_torch_api(api_full_name, api_info_dict, real_data_path):
     if not need_grad:
         logger.warning("%s function with out=... arguments don't support automatic differentiation, skip backward."
                        % api_full_name)
+    device_info_kwargs = kwargs.get(Const.DEVICE)
+    if device_info_kwargs and device_info_kwargs.get(Const.VALUE):
+        kwargs[Const.DEVICE] = current_device
     npu_args, npu_kwargs = generate_device_params(args, kwargs, False, api_name)
-    if kwargs.get("device"):
-        del kwargs["device"]
-    out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, args, kwargs)
-    npu_out = exec_api(api_type, api_name, Const.NPU_LOWERCASE, npu_args, npu_kwargs)
+    if kwargs.get(Const.DEVICE):
+        del kwargs[Const.DEVICE]
+    cpu_exec_params = ExecParams(api_type, api_name, Const.CPU_LOWERCASE, args, kwargs, False, None)
+    device_exec_params = ExecParams(api_type, api_name, Const.NPU_LOWERCASE, npu_args, npu_kwargs, False, None)
+    out = exec_api(cpu_exec_params)
+    npu_out = exec_api(device_exec_params)
     if out is None and npu_out is None:
         logger.warning("The %s overflow is a normal overflow, out and npu_out is None." % api_full_name)
         return
+    if is_bool_output(out) or is_bool_output(npu_out):
+        logger.warning("The output of %s is bool type.This dtype not support overflow, so it will be skipped."
+                       % api_full_name)
+        return
-    cpu_overflow = check_data_overflow(out)
-    npu_overflow = torch_npu.npu.utils.npu_check_overflow(npu_out)
+    cpu_overflow = check_data_overflow(out, Const.CPU_LOWERCASE)
+    npu_overflow = check_data_overflow(npu_out, Const.NPU_LOWERCASE)
     if cpu_overflow == npu_overflow:
         logger.warning("The %s overflow is a normal overflow." % api_full_name)
     else:

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py CHANGED Viewed

@@ -31,6 +31,7 @@ except ImportError:
 else:
     is_gpu = False
     current_device = "npu"
 import torch
 from tqdm import tqdm
@@ -48,10 +49,12 @@ from msprobe.core.common.file_utils import FileChecker, change_mode, \
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.pt_config import parse_json_config
 from msprobe.core.common.const import Const, FileCheckConst, CompareConst
-from msprobe.core.common.utils import safe_get_value
+from msprobe.core.common.utils import safe_get_value, CompareException
+from msprobe.pytorch.common.utils import seed_all
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import ATTL, ATTLConfig, move2device_exec
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.device_dispatch import ConsumerDispatcher
-from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params, generate_device_params
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params, generate_device_params, \
+    ExecParams
 current_time = time.strftime("%Y%m%d%H%M%S")
@@ -61,6 +64,7 @@ DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv"
 not_backward_list = ['repeat_interleave']
+unsupported_backward_list = ['masked_select']
 tqdm_params = {
@@ -237,7 +241,8 @@ def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict
     in_fwd_data_list = []
     backward_message = ''
     api_type, api_name = extract_basic_api_segments(api_full_name)
-    args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path)
+    args, kwargs, output_dtype = get_api_info(api_info_dict, api_name, real_data_path)
+    need_grad = check_need_grad(api_info_dict)
     in_fwd_data_list.append(args)
     in_fwd_data_list.append(kwargs)
     need_backward = api_full_name in backward_content
@@ -248,14 +253,30 @@ def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict
         need_grad = False
         logger.info("%s %s" % (api_full_name, BackwardMessage.NO_BACKWARD_RESULT_MESSAGE))
         backward_message += BackwardMessage.NO_BACKWARD_RESULT_MESSAGE
+    if api_name in unsupported_backward_list:
+        need_grad = False
+        logger.info("%s %s" % (api_full_name, BackwardMessage.UNSUPPORT_API_MESSAGE))
+        backward_message += BackwardMessage.UNSUPPORT_API_MESSAGE
     need_backward = need_backward and need_grad
-    if kwargs.get("device"):
-        del kwargs["device"]
-    cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, need_backward, api_name)
+    device_info_kwargs = kwargs.get(Const.DEVICE)
+    if device_info_kwargs and device_info_kwargs.get(Const.VALUE):
+        kwargs[Const.DEVICE] = current_device
     device_args, device_kwargs = generate_device_params(args, kwargs, need_backward, api_name)
+    if kwargs.get(Const.DEVICE):
+        del kwargs[Const.DEVICE]
+    cpu_params = generate_cpu_params(args, kwargs, need_backward, api_name)
+    cpu_args, cpu_kwargs = cpu_params.cpu_args, cpu_params.cpu_kwargs
+    autocast_dtype, is_autocast = cpu_params.autocast_dtype, cpu_params.is_autocast
+    if not is_autocast and output_dtype:
+        is_autocast = autocast_dtype != output_dtype
+        autocast_dtype = output_dtype
     bench_grad_out, device_grad_out = None, None
-    out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
-    device_out = exec_api(api_type, api_name, current_device, device_args, device_kwargs)
+    cpu_exec_params = ExecParams(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs, False, autocast_dtype)
+    out = exec_api(cpu_exec_params)
+    device_exec_params = ExecParams(api_type, api_name, current_device, device_args, device_kwargs, is_autocast,
+                                     autocast_dtype)
+    device_out = exec_api(device_exec_params)
     current_path = os.path.dirname(os.path.realpath(__file__))
     ut_setting_path = os.path.join(current_path, "torch_ut_setting.json")
     api_setting_dict = get_json_contents(ut_setting_path)
@@ -273,7 +294,8 @@ def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict
             }
             grad = gen_args(backward_args, api_name, func_options)
             grad = safe_get_value(grad, 0, "grad")
-            bench_grad, _ = generate_cpu_params(grad, {}, False, api_name)
+            grad_params = generate_cpu_params(grad, {}, False, api_name)
+            bench_grad = grad_params.cpu_args
             bench_grad_out = run_backward(cpu_args, bench_grad, grad_index, out)
             device_grad = grad.clone().detach().to(current_device)
             device_grad_out = run_backward(device_args, device_grad, grad_index, device_out)
@@ -300,13 +322,18 @@ def run_torch_api_online(api_full_name, api_data, backward_content):
     return UtDataInfo(None, None, out, device_out, None, in_fwd_data_list, None, rank=api_data.rank)
-def get_api_info(api_info_dict, api_name, real_data_path):
-    convert_type, api_info_dict = api_info_preprocess(api_name, api_info_dict)
+def check_need_grad(api_info_dict):
     need_grad = True
-    if api_info_dict.get("input_kwargs") and "out" in api_info_dict.get("input_kwargs"):
+    if api_info_dict.get(Const.INPUT_KWARGS) and "out" in api_info_dict.get(Const.INPUT_KWARGS):
         need_grad = False
-    args, kwargs = gen_api_params(api_info_dict, api_name, need_grad, convert_type, real_data_path)
-    return args, kwargs, need_grad
+    return need_grad
+def get_api_info(api_info_dict, api_name, real_data_path):
+    convert_type, api_info_dict = api_info_preprocess(api_name, api_info_dict)
+    need_grad = check_need_grad(api_info_dict)
+    args, kwargs, output_dtype = gen_api_params(api_info_dict, api_name, need_grad, convert_type, real_data_path)
+    return args, kwargs, output_dtype
 def need_to_backward(grad_index, out):
@@ -323,15 +350,25 @@ def run_backward(args, grad, grad_index, out):
         out[grad_index].backward(grad)
     else:
         out.backward(grad)
-    args_grad = []
-    for arg in args:
-        if isinstance(arg, torch.Tensor):
-            args_grad.append(arg.grad)
-    grad_out = args_grad
+    grad_out = extract_tensors_grad(args)
     return grad_out
+def extract_tensors_grad(args, depth=0):
+    if depth > Const.MAX_DEPTH:
+        logger.error("The depth of arg_in is too large, please check the arg_in.")
+        raise CompareException(CompareException.RECURSION_LIMIT_ERROR)
+    grads = []
+    for arg in args:
+        if isinstance(arg, torch.Tensor):
+            grads.append(arg.grad)
+        elif isinstance(arg, (list, tuple)):
+            grads.extend(extract_tensors_grad(arg, depth+1))
+    return grads
 def initialize_save_error_data(error_data_path):
     create_directory(error_data_path)
     error_data_path_checker = FileChecker(error_data_path, FileCheckConst.DIR,
@@ -479,6 +516,10 @@ def run_ut_command(args):
     if not is_gpu:
         torch.npu.set_compile_mode(jit_compile=args.jit_compile)
+        if args.jit_compile:
+            torch.npu.config.allow_internal_format = True
+        else:
+            torch.npu.config.allow_internal_format = False
     used_device = current_device + ":" + str(args.device_id[0])
     try:
         if is_gpu:
@@ -497,6 +538,9 @@ def run_ut_command(args):
                                             ability=FileCheckConst.READ_ABLE, file_type=FileCheckConst.JSON_SUFFIX)
         checked_api_info = api_info_file_checker.common_check()
         forward_content, backward_content, real_data_path = parse_json_info_forward_backward(checked_api_info)
+        if real_data_path:
+            dump_path = os.path.dirname(checked_api_info)
+            real_data_path = os.path.join(dump_path, Const.DUMP_TENSOR_DATA)
         if args.filter_api:
             logger.info("Start filtering the api in the api_info_file.")
             forward_content = preprocess_forward_content(forward_content)
@@ -538,5 +582,6 @@ def run_ut_command(args):
 if __name__ == '__main__':
+    seed_all()
     _run_ut()
     logger.info("UT task completed.")

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py CHANGED Viewed

@@ -16,6 +16,7 @@
 # limitations under the License.
 import os
+from collections import namedtuple
 import re
 import torch
@@ -23,8 +24,10 @@ try:
     import torch_npu
 except ImportError:
     current_device = "cuda"
+    from torch.cuda.amp import autocast
 else:
     current_device = "npu"
+    from torch_npu.npu.amp import autocast
 from msprobe.core.common.const import FileCheckConst, Const, CompareConst
 from msprobe.core.common.file_utils import FileChecker
@@ -47,11 +50,17 @@ PRECISION_MAPPING = {
 }
+CpuParams = namedtuple("CpuArgs", ["cpu_args", "cpu_kwargs", "autocast_dtype", "is_autocast"])
+ExecParams = namedtuple("ExecParams", ["api_type", "api_name", "device", "args", "kwargs",
+                                       "is_autocast", "autocast_dtype"])
 class BackwardMessage:
     MULTIPLE_BACKWARD_MESSAGE = "Multiple backward is not supported."
     UNSUPPORT_BACKWARD_MESSAGE = "function with out=... arguments don't support automatic differentiation, " \
                                   "skip backward."
     NO_BACKWARD_RESULT_MESSAGE = "This API does not have backward input data, skip backward."
+    UNSUPPORT_API_MESSAGE = "This API does not support backward ut, skip backward."
 class UtDataInfo:
@@ -91,7 +100,15 @@ def get_validated_details_csv_path(validated_result_csv_path):
     return validated_details_csv_path
-def exec_api(api_type, api_name, device, args, kwargs):
+def exec_api(exec_params):
+    api_type = exec_params.api_type
+    api_name = exec_params.api_name
+    device = exec_params.device
+    args = exec_params.args
+    kwargs = exec_params.kwargs
+    is_autocast = exec_params.is_autocast
+    autocast_dtype = exec_params.autocast_dtype
     if api_type == "Functional":
         torch_api = FunctionalOPTemplate(api_name, str, False)
     if api_type == "Tensor":
@@ -102,7 +119,11 @@ def exec_api(api_type, api_name, device, args, kwargs):
         torch_api = AtenOPTemplate(api_name, None, False)
     if api_type == "NPU":
         torch_api = NpuOPTemplate(api_name, None, False, device)
-    out = torch_api.forward(*args, **kwargs)
+    if is_autocast:
+        with autocast(dtype=autocast_dtype):
+            out = torch_api.forward(*args, **kwargs)
+    else:
+        out = torch_api.forward(*args, **kwargs)
     return out
@@ -196,19 +217,28 @@ def generate_cpu_params(input_args, input_kwargs, need_backward, api_name):
         return set()
     raise_dtype = None
+    autocast_dtype = None
+    is_autocast = False
     need_raise_dtypes = recursive_find_dtypes(input_args)
     need_raise_dtypes.update(recursive_find_dtypes(input_kwargs, check_kwargs=True))
     if len(need_raise_dtypes) == 1:
-        raise_dtype = PRECISION_MAPPING.get(need_raise_dtypes.pop(), torch.float32)
+        origin_dtype = need_raise_dtypes.pop()
+        raise_dtype = PRECISION_MAPPING.get(origin_dtype, torch.float32)
+        autocast_dtype = origin_dtype
     elif len(need_raise_dtypes) >= 2:
         raise_dtype = torch.float32
+        need_raise_dtypes.discard(torch.float32)
+        autocast_dtype = need_raise_dtypes.pop()
+        is_autocast = True
     raise_dtype = None if api_name in not_raise_dtype_set else raise_dtype
     is_detach = api_name not in not_detach_set
     cpu_args = recursive_arg_to_cpu(input_args, is_detach, raise_dtype=raise_dtype)
     cpu_kwargs = {key: recursive_arg_to_cpu(value, key != "out" and is_detach, raise_dtype=raise_dtype) for
                   key, value in input_kwargs.items()}
-    return cpu_args, cpu_kwargs
+    cpu_params = CpuParams(cpu_args, cpu_kwargs, autocast_dtype, is_autocast)
+    return cpu_params
 def record_skip_info(api_full_name, compare, compare_alg_results):

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py CHANGED Viewed

@@ -24,7 +24,7 @@ from msprobe.core.common.const import Const, CompareConst
 from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import online_api_precision_compare
 from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TEST_ROWS, thousandth_standard_api, \
     binary_standard_api, absolute_standard_api
-from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api, ExecParams
 from msprobe.pytorch.common.log import logger
 from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
 from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params
@@ -92,8 +92,10 @@ def online_precision_compare(api_data, device, common_config, api_precision_csv_
     try:
         # NPU vs CPU
-        cpu_args, cpu_kwargs = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
-        cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
+        cpu_params = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
+        cpu_args, cpu_kwargs = cpu_params.cpu_args, cpu_params.cpu_kwargs
+        cpu_exec_params = ExecParams(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs, False, None)
+        cpu_out = exec_api(cpu_exec_params)
         npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
         npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
         npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])

msprobe/pytorch/bench_functions/npu_fusion_attention.py CHANGED Viewed

@@ -30,6 +30,7 @@
                                        numels=0, prefix=None, sparse_mode=0, gen_mask_parallel=True, sync=False
 """
+from collections import namedtuple
 import torch
 import numpy as np
 from einops import rearrange
@@ -54,6 +55,14 @@ GTYPE = torch.float64  # arm host必须选择float64，x86环境选择float32即
 SOFTMAX_BUILD_MODE = "QKV"  # "MAX_SUM"
+FaForwardParams = namedtuple("FaForwardParams",
+                            ["q", "k", "v", "drop_mask", "atten_mask", "pse", "scale", "keep_prob"])
+FaBackwardParams = namedtuple("FaBackwardParams",
+                            ["dx", "q", "k", "v", "softmax_res", "drop_mask", "pse", "scale", "keep_prob"])
+RebuildSoftmaxParams = namedtuple("RebuildSoftmaxParams",
+                                ["q", "k", "atten_mask", "pse", "scale", "softmax_max", "softmax_sum"])
 def softmax_forward(x):
     x_max = torch.max(x, dim=-1, keepdims=True)[0]
     x_sub = x.sub(x_max)
@@ -99,7 +108,15 @@ def calculate_qk(q, k, atten_mask, pse, scale):
     return qk
-def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_prob):
+def fusion_attention_forward(forward_params):
+    q = forward_params.q
+    k = forward_params.k
+    v = forward_params.v
+    drop_mask = forward_params.drop_mask
+    atten_mask = forward_params.atten_mask
+    pse = forward_params.pse
+    scale = forward_params.scale
+    keep_prob = forward_params.keep_prob
     qk = calculate_qk(q, k, atten_mask, pse, scale)
     softmax_res, softmax_max, softmax_sum = softmax_forward(qk)
     if drop_mask is None or len(drop_mask.shape) == 0:
@@ -110,7 +127,16 @@ def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_pr
     return y, softmax_max, softmax_sum
-def fusion_attention_backward(dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob):
+def fusion_attention_backward(backward_params):
+    dx = backward_params.dx
+    q = backward_params.q
+    k = backward_params.k
+    v = backward_params.v
+    softmax_res = backward_params.softmax_res
+    drop_mask = backward_params.drop_mask
+    pse = backward_params.pse
+    scale = backward_params.scale
+    keep_prob = backward_params.keep_prob
     dp = torch.matmul(dx, v.permute(0, 1, 3, 2))
     if drop_mask is None or len(drop_mask.shape) == 0:
         drop_res = softmax_res.permute(0, 1, 3, 2)
@@ -368,11 +394,18 @@ def rebuid_softmax_by_qkv(q, k, atten_mask, pse, scale):
     return softmax_res
-def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softmax_sum):
+def rebuild_softmax_by_max_sum(softmax_params):
     """
     attention = softmax(QK^T/sqrt(d))V
     softmax(x_i) = e^(x_i - x_max_i) / x_sum_i)
     """
+    q = softmax_params.q
+    k = softmax_params.k
+    atten_mask = softmax_params.atten_mask
+    pse = softmax_params.pse
+    scale = softmax_params.scale
+    softmax_max = softmax_params.softmax_max
+    softmax_sum = softmax_params.softmax_sum
     logger.info("Using softmax_max and softmax_sum to rebuild original softmax")
     qk = calculate_qk(q, k, atten_mask, pse, scale)
     if softmax_max.shape[-1] == 0:
@@ -502,10 +535,8 @@ def npu_fusion_attention(*args, **kwargs):
     key = convert_to_bnsd(key, n2, input_layout)
     value = convert_to_bnsd(value, n2, input_layout)
     k_new, v_new = generate_kv(key, value, n1, n2)
-    out_golden, softmax_max, softmax_sum = fusion_attention_forward(q=query, k=k_new, v=v_new,
-                                                                    drop_mask=None, atten_mask=atten_mask,
-                                                                    pse=pse, scale=scale,
-                                                                    keep_prob=keep_prob)
+    forward_params = FaForwardParams(query, k_new, v_new, None, atten_mask, pse, scale, keep_prob)
+    out_golden, softmax_max, softmax_sum = fusion_attention_forward(forward_params)
     if out_golden.dim() == 5:
         out_golden = out_golden.reshape(out_golden.size(0), out_golden.size(1) * out_golden.size(2), out_golden.size(3),
                                         out_golden.size(4))
@@ -546,9 +577,10 @@ def npu_fusion_attention_grad(*args, **kwargs):
     if SOFTMAX_BUILD_MODE == "QKV":
         softmax_res = rebuid_softmax_by_qkv(query, k_new, atten_mask, pse, scale_value)
     else:
-        softmax_res = rebuild_softmax_by_max_sum(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum)
-    dq, dk, dv = fusion_attention_backward(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob)
+        softmax_params = RebuildSoftmaxParams(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum)
+        softmax_res = rebuild_softmax_by_max_sum(softmax_params)
+    backward_params = FaBackwardParams(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob)
+    dq, dk, dv = fusion_attention_backward(backward_params)
     # N不等长适配by cdy
     if not (n1 == n2):

msprobe/pytorch/common/parse_json.py CHANGED Viewed

@@ -24,7 +24,8 @@ def parse_json_info_forward_backward(json_path):
     real_data_path = dump_json.get("dump_data_dir")
     dump_data = dump_json.get("data")
     if dump_data is None:
-        raise ParseJsonException(ParseJsonException.InvalidDumpJson, "something wrong with dump, no data found in dump.json")
+        raise ParseJsonException(ParseJsonException.InvalidDumpJson,
+                                 "something wrong with dump, no data found in dump.json")
     if not dump_data:
         logger.warning("data field is empty, no overflow data found.")

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -105,8 +105,49 @@ def get_rank_if_initialized():
         raise DistributedNotInitializedError("torch distributed environment is not initialized")
-def seed_all(seed=1234, mode=False):
-    check_seed_all(seed, mode)
+def remove_dropout():
+    if torch.__version__ > "1.8":
+        logger.info_on_rank_0("For precision comparison, the probability p in the dropout method is set to 0.")
+        import torch.nn.functional as F
+        from torch import _VF
+        from torch.overrides import has_torch_function_unary, handle_torch_function
+        def function_dropout(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
+                             inplace: bool = False) -> torch.Tensor:
+            if has_torch_function_unary(input_tensor):
+                return handle_torch_function(
+                    function_dropout, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
+            if p < 0.0 or p > 1.0:
+                raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
+            return _VF.dropout_(input_tensor, 0., training) if inplace else _VF.dropout(input_tensor, 0., training)
+        def function_dropout2d(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
+                               inplace: bool = False) -> torch.Tensor:
+            if has_torch_function_unary(input_tensor):
+                return handle_torch_function(
+                    function_dropout2d, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
+            if p < 0.0 or p > 1.0:
+                raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
+            return _VF.feature_dropout_(input_tensor, 0., training) if inplace else _VF.feature_dropout(input_tensor,
+                                                                                                        0., training)
+        def function_dropout3d(input_tensor: torch.Tensor, p: float = 0.5, training: bool = True,
+                               inplace: bool = False) -> torch.Tensor:
+            if has_torch_function_unary(input_tensor):
+                return handle_torch_function(
+                    function_dropout3d, (input_tensor,), input_tensor, p=0., training=training, inplace=inplace)
+            if p < 0.0 or p > 1.0:
+                raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
+            return _VF.feature_dropout_(input_tensor, 0., training) if inplace else _VF.feature_dropout(input_tensor,
+                                                                                                        0., training)
+        F.dropout = function_dropout
+        F.dropout2d = function_dropout2d
+        F.dropout3d = function_dropout3d
+def seed_all(seed=1234, mode=False, rm_dropout=True):
+    check_seed_all(seed, mode, rm_dropout)
     try:
         random.seed(seed)
         os.environ['PYTHONHASHSEED'] = str(seed)
@@ -126,6 +167,8 @@ def seed_all(seed=1234, mode=False):
         else:
             torch_npu.npu.manual_seed_all(seed)
             torch_npu.npu.manual_seed(seed)
+        if rm_dropout:
+            remove_dropout()
     except Exception as e:
         logger.error(f"There is an unexpected error while determinating randomness. {e}")

msprobe/pytorch/compare/distributed_compare.py CHANGED Viewed

@@ -14,52 +14,40 @@
 # limitations under the License.
 import os
-from msprobe.core.common.utils import CompareException, check_compare_param, \
-    check_configuration_param, set_dump_path, get_dump_mode
-from msprobe.core.common.file_utils import create_directory
 from msprobe.core.common.exceptions import FileCheckException
+from msprobe.core.common.file_utils import create_directory
+from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, get_dump_mode, \
+    set_dump_path
+from msprobe.core.compare.acc_compare import ModeConfig
+from msprobe.core.compare.utils import check_and_return_dir_contents, extract_json, set_stack_json_path
 from msprobe.pytorch.common.log import logger
-from msprobe.pytorch.compare.pt_compare import PTComparator
-from msprobe.core.compare.utils import check_and_return_dir_contents, extract_json
+from msprobe.pytorch.compare.pt_compare import PTComparator, compare
 def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs):
-    if kwargs.get('suffix'):
+    if kwargs.get("suffix"):
         logger.error("Argument 'suffix' is not supported for compare_distributed.")
         raise CompareException(CompareException.INVALID_PARAM_ERROR)
-    stack_mode = kwargs.get('stack_mode', False)
-    auto_analyze = kwargs.get('auto_analyze', True)
-    fuzzy_match = kwargs.get('fuzzy_match', False)
-    is_print_compare_log = kwargs.get('is_print_compare_log', True)
+    is_print_compare_log = kwargs.get("is_print_compare_log", True)
     # get the ranks and match by order
     npu_ranks = sorted(check_and_return_dir_contents(npu_dump_dir, 'rank'))
     bench_ranks = sorted(check_and_return_dir_contents(bench_dump_dir, 'rank'))
     if len(npu_ranks) != len(bench_ranks):
-        logger.error('The number of ranks in the two runs are different. '
-                        'Unable to match the ranks. Please use another folder to compare '
-                        'or use compare() api and manually match the ranks.')
+        logger.error(
+            "The number of ranks in the two runs are different. "
+            "Unable to match the ranks. "
+            "Please use another folder to compare or use compare() api and manually match the ranks.")
         raise CompareException(CompareException.INVALID_PATH_ERROR)
     for nr, br in zip(npu_ranks, bench_ranks):
         npu_data_dir = os.path.join(npu_dump_dir, nr)
         bench_data_dir = os.path.join(bench_dump_dir, br)
         npu_path = extract_json(npu_data_dir, stack_json=False)
         bench_path = extract_json(bench_data_dir, stack_json=False)
-        stack_path = extract_json(npu_data_dir, stack_json=True)
         dump_result_param = {
-            'npu_json_path': npu_path,
-            'bench_json_path': bench_path,
-            'stack_json_path': stack_path,
-            'is_print_compare_log': is_print_compare_log
+            "npu_json_path": npu_path,
+            "bench_json_path": bench_path,
+            "is_print_compare_log": is_print_compare_log
         }
-        try:
-            set_dump_path(dump_result_param)
-            dump_mode = get_dump_mode(dump_result_param)
-            check_configuration_param(stack_mode, auto_analyze, fuzzy_match, is_print_compare_log)
-            create_directory(output_path)
-            check_compare_param(dump_result_param, output_path, dump_mode)
-        except (CompareException, FileCheckException) as error:
-            logger.error('Compare failed. Please check the arguments and do it again!')
-            raise CompareException(error.code) from error
-        pt_comparator = PTComparator()
-        pt_comparator.compare_core(dump_result_param, output_path, suffix=f'_{nr}-{br}', dump_mode=dump_mode, **kwargs)
+        compare(input_param=dump_result_param, output_path=output_path, suffix=f'_{nr}-{br}', **kwargs)

mindstudio-probe 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

mindstudio-probe 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl