PyPI - mindstudio-probe - Versions diffs - 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.0.3py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +201 -201
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +36 -34
mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +1 -0
msprobe/README.md +131 -237
msprobe/__init__.py +16 -1
msprobe/{config/config.json → config.json} +47 -49
msprobe/core/advisor/advisor.py +124 -124
msprobe/core/advisor/advisor_const.py +58 -59
msprobe/core/advisor/advisor_result.py +58 -58
msprobe/core/common/const.py +402 -318
msprobe/core/common/exceptions.py +99 -99
msprobe/core/common/{file_check.py → file_utils.py} +523 -283
msprobe/core/common/inplace_op_checker.py +38 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +86 -69
msprobe/core/common/utils.py +371 -616
msprobe/core/common_config.py +78 -71
msprobe/core/compare/acc_compare.py +472 -298
msprobe/core/compare/check.py +180 -95
msprobe/core/compare/compare_cli.py +69 -49
msprobe/core/compare/highlight.py +259 -222
msprobe/core/compare/multiprocessing_compute.py +174 -149
msprobe/core/compare/npy_compare.py +310 -295
msprobe/core/compare/utils.py +464 -429
msprobe/core/data_dump/data_collector.py +153 -144
msprobe/core/data_dump/data_processor/base.py +337 -293
msprobe/core/data_dump/data_processor/factory.py +76 -59
msprobe/core/data_dump/data_processor/mindspore_processor.py +192 -198
msprobe/core/data_dump/data_processor/pytorch_processor.py +383 -389
msprobe/core/data_dump/json_writer.py +117 -116
msprobe/core/data_dump/scope.py +194 -178
msprobe/core/grad_probe/constant.py +74 -70
msprobe/core/grad_probe/grad_compare.py +170 -175
msprobe/core/grad_probe/utils.py +77 -52
msprobe/docs/01.installation.md +99 -0
msprobe/docs/02.config_introduction.md +137 -0
msprobe/docs/03.config_examples.md +237 -0
msprobe/docs/04.acl_config_examples.md +78 -0
msprobe/docs/05.data_dump_PyTorch.md +326 -0
msprobe/docs/06.data_dump_MindSpore.md +285 -0
msprobe/docs/07.accuracy_checker_PyTorch.md +297 -0
msprobe/docs/08.accuracy_checker_online_PyTorch.md +238 -0
msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
msprobe/docs/10.accuracy_compare_PyTorch.md +327 -0
msprobe/docs/11.accuracy_compare_MindSpore.md +333 -0
msprobe/docs/12.overflow_check_PyTorch.md +79 -0
msprobe/docs/13.overflow_check_MindSpore.md +31 -0
msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
msprobe/docs/15.free_benchmarking_PyTorch.md +170 -0
msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +205 -207
msprobe/{pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md → docs/18.online_dispatch.md} +89 -90
msprobe/docs/FAQ.md +189 -0
msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
msprobe/docs/img/free_benchmark_framework.png +0 -0
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +2 -1
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +278 -245
msprobe/mindspore/api_accuracy_checker/api_info.py +76 -69
msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
msprobe/mindspore/api_accuracy_checker/main.py +8 -15
msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
msprobe/mindspore/cell_processor.py +58 -34
msprobe/mindspore/common/const.py +108 -87
msprobe/mindspore/common/log.py +37 -37
msprobe/mindspore/common/utils.py +97 -57
msprobe/mindspore/compare/distributed_compare.py +62 -75
msprobe/mindspore/compare/layer_mapping.py +146 -0
msprobe/mindspore/compare/modify_mapping.py +107 -0
msprobe/mindspore/compare/ms_compare.py +357 -117
msprobe/mindspore/compare/ms_graph_compare.py +364 -317
msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
msprobe/mindspore/debugger/debugger_config.py +69 -74
msprobe/mindspore/debugger/precision_debugger.py +150 -107
msprobe/mindspore/dump/dump_tool_factory.py +50 -35
msprobe/mindspore/dump/hook_cell/api_registry.py +128 -104
msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +994 -925
msprobe/mindspore/dump/hook_cell/wrap_api.py +121 -0
msprobe/mindspore/dump/jit_dump.py +96 -56
msprobe/mindspore/dump/kernel_graph_dump.py +75 -60
msprobe/mindspore/dump/kernel_kbyk_dump.py +79 -65
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +131 -116
msprobe/mindspore/free_benchmark/common/config.py +27 -12
msprobe/mindspore/free_benchmark/common/handler_params.py +32 -17
msprobe/mindspore/free_benchmark/common/utils.py +85 -71
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +57 -42
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +122 -107
msprobe/mindspore/free_benchmark/handler/base_handler.py +105 -90
msprobe/mindspore/free_benchmark/handler/check_handler.py +56 -41
msprobe/mindspore/free_benchmark/handler/fix_handler.py +51 -36
msprobe/mindspore/free_benchmark/handler/handler_factory.py +36 -21
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +82 -67
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +36 -21
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +78 -63
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +77 -0
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +49 -34
msprobe/mindspore/free_benchmark/perturbation/no_change.py +27 -12
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +44 -27
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +48 -33
msprobe/mindspore/grad_probe/global_context.py +100 -91
msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
msprobe/mindspore/grad_probe/hook.py +94 -92
msprobe/mindspore/grad_probe/utils.py +29 -28
msprobe/mindspore/ms_config.py +128 -126
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +60 -45
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +49 -34
msprobe/mindspore/runtime.py +4 -4
msprobe/mindspore/service.py +297 -354
msprobe/mindspore/task_handler_factory.py +24 -24
msprobe/msprobe.py +105 -107
msprobe/pytorch/__init__.py +23 -4
msprobe/pytorch/api_accuracy_checker/common/config.py +70 -55
msprobe/pytorch/api_accuracy_checker/common/utils.py +246 -165
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +230 -213
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +632 -581
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
msprobe/pytorch/api_accuracy_checker/compare/compare.py +416 -381
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +90 -73
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +265 -244
msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +370 -332
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +221 -199
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +150 -134
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +518 -581
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +213 -74
msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +218 -202
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +370 -324
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +227 -204
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +244 -218
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +30 -15
msprobe/pytorch/bench_functions/apply_adam_w.py +43 -28
msprobe/pytorch/bench_functions/confusion_transpose.py +34 -19
msprobe/pytorch/bench_functions/fast_gelu.py +70 -55
msprobe/pytorch/bench_functions/layer_norm_eval.py +21 -6
msprobe/pytorch/bench_functions/linear.py +27 -12
msprobe/pytorch/bench_functions/matmul_backward.py +63 -48
msprobe/pytorch/bench_functions/npu_fusion_attention.py +538 -421
msprobe/pytorch/bench_functions/rms_norm.py +30 -15
msprobe/pytorch/bench_functions/rotary_mul.py +71 -52
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +41 -26
msprobe/pytorch/bench_functions/swiglu.py +70 -55
msprobe/pytorch/common/__init__.py +17 -2
msprobe/pytorch/common/compare_script.template +14 -14
msprobe/pytorch/common/log.py +33 -32
msprobe/pytorch/common/parse_json.py +54 -39
msprobe/pytorch/common/utils.py +310 -300
msprobe/pytorch/compare/distributed_compare.py +66 -66
msprobe/pytorch/compare/mapping.yaml +607 -607
msprobe/pytorch/compare/match.py +49 -33
msprobe/pytorch/compare/pt_compare.py +82 -40
msprobe/pytorch/debugger/debugger_config.py +108 -95
msprobe/pytorch/debugger/precision_debugger.py +173 -125
msprobe/pytorch/free_benchmark/__init__.py +23 -8
msprobe/pytorch/free_benchmark/common/constant.py +70 -70
msprobe/pytorch/free_benchmark/common/counter.py +71 -71
msprobe/pytorch/free_benchmark/common/enums.py +65 -37
msprobe/pytorch/free_benchmark/common/params.py +144 -129
msprobe/pytorch/free_benchmark/common/utils.py +118 -102
msprobe/pytorch/free_benchmark/compare/grad_saver.py +200 -179
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +119 -104
msprobe/pytorch/free_benchmark/main.py +120 -105
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +28 -13
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +56 -41
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +105 -90
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +119 -104
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +87 -63
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +83 -68
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +43 -28
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +60 -45
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +34 -19
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +256 -217
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +54 -39
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +38 -23
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +45 -30
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +185 -170
msprobe/pytorch/function_factory.py +91 -75
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +166 -161
msprobe/pytorch/hook_module/hook_module.py +118 -120
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
msprobe/pytorch/hook_module/utils.py +28 -29
msprobe/pytorch/hook_module/wrap_aten.py +111 -110
msprobe/pytorch/hook_module/wrap_distributed.py +77 -78
msprobe/pytorch/hook_module/wrap_functional.py +104 -105
msprobe/pytorch/hook_module/wrap_npu_custom.py +85 -84
msprobe/pytorch/hook_module/wrap_tensor.py +69 -71
msprobe/pytorch/hook_module/wrap_torch.py +84 -86
msprobe/pytorch/hook_module/wrap_vf.py +60 -62
msprobe/pytorch/module_processer.py +153 -138
msprobe/pytorch/online_dispatch/__init__.py +20 -20
msprobe/pytorch/online_dispatch/compare.py +235 -236
msprobe/pytorch/online_dispatch/dispatch.py +271 -271
msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
msprobe/pytorch/online_dispatch/single_compare.py +391 -391
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +57 -49
msprobe/pytorch/online_dispatch/utils.py +127 -146
msprobe/pytorch/parse.py +19 -4
msprobe/pytorch/parse_tool/cli.py +31 -32
msprobe/pytorch/parse_tool/lib/compare.py +259 -271
msprobe/pytorch/parse_tool/lib/config.py +52 -52
msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
msprobe/pytorch/parse_tool/lib/parse_tool.py +161 -158
msprobe/pytorch/parse_tool/lib/utils.py +320 -321
msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
msprobe/pytorch/pt_config.py +317 -187
msprobe/pytorch/service.py +311 -252
mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
msprobe/config/README.md +0 -539
msprobe/mindspore/doc/compare.md +0 -58
msprobe/mindspore/doc/dump.md +0 -217
msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/doc/FAQ.md +0 -193
msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
msprobe/pytorch/doc/dump.md +0 -260
msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
msprobe/pytorch/doc/run_overflow_check.md +0 -25
msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
/msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
/msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
/msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
/msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
/msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
/msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
/msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
/msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
/msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
/msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
/msprobe/{config → docs}/img/free_benchmark.png +0 -0
/msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
/msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
/msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
/msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
/msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
/msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0

msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py CHANGED Viewed

@@ -1,581 +1,518 @@
-import argparse
-import os
-import csv
-import sys
-import time
-import gc
-from collections import namedtuple
-try:
-    import torch_npu
-except ImportError:
-    is_gpu = True
-    current_device = "cuda"
-else:
-    is_gpu = False
-    current_device = "npu"
-import torch
-from tqdm import tqdm
-from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import Backward_Message, hf_32_standard_api, UtDataInfo, \
-    get_validated_result_csv_path, get_validated_details_csv_path, exec_api
-from msprobe.pytorch.api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args
-from msprobe.pytorch.api_accuracy_checker.common.utils import api_info_preprocess, \
-    initialize_save_path, UtDataProcessor
-from msprobe.pytorch.api_accuracy_checker.compare.compare import Comparator
-from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn
-from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig
-from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward
-from msprobe.core.common.file_check import FileOpen, FileChecker, \
-    change_mode, check_path_before_create, create_directory
-from msprobe.pytorch.common.log import logger
-from msprobe.core.common.utils import get_json_contents
-from msprobe.pytorch.pt_config import parse_json_config
-from msprobe.core.common.const import Const, FileCheckConst, CompareConst
-from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import ATTL, ATTLConfig, ApiData, move2device_exec
-from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.device_dispatch import ConsumerDispatcher
-current_time = time.strftime("%Y%m%d%H%M%S")
-UT_ERROR_DATA_DIR = 'ut_error_data' + current_time
-RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv"
-DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv"
-RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path',
-                                         'save_error_data', 'is_continue_run_ut', 'real_data_path', 'white_list',
-                                         'black_list', 'error_data_path', 'online_config'])
-OnlineConfig = namedtuple('OnlineConfig', ['is_online', 'nfs_path', 'host', 'port', 'rank_list', 'tls_path'])
-not_backward_list = ['repeat_interleave']
-not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'}
-not_raise_dtype_set = {'type_as'}
-RAISE_PRECISION = {
-    torch.float16: torch.float32,
-    torch.bfloat16: torch.float32,
-    torch.float32: torch.float64
-}
-tqdm_params = {
-    'smoothing': 0,  # 平滑进度条的预计剩余时间，取值范围0到1
-    'desc': 'Processing',  # 进度条前的描述文字
-    'leave': True,  # 迭代完成后保留进度条的显示
-    'ncols': 75,  # 进度条的固定宽度
-    'mininterval': 0.1,  # 更新进度条的最小间隔秒数
-    'maxinterval': 1.0,  # 更新进度条的最大间隔秒数
-    'miniters': 1,  # 更新进度条之间的最小迭代次数
-    'ascii': None,  # 根据环境自动使用ASCII或Unicode字符
-    'unit': 'it',  # 迭代单位
-    'unit_scale': True,  # 自动根据单位缩放
-    'dynamic_ncols': True,  # 动态调整进度条宽度以适应控制台
-    'bar_format': '{l_bar}{bar}| {n}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]'  # 自定义进度条输出格式
-}
-def deal_detach(arg, to_detach=True):
-    return arg.detach() if to_detach else arg
-def raise_bench_data_dtype(api_name, arg, raise_dtype=None):
-    '''
-    将标杆数据的dtype转换为raise_dtype
-    输入：
-        api_name：api名称
-        arg：标杆输入
-        raise_dtype：需要转换的dtype
-    输出：
-        arg: 转换dtype的标杆输入
-    '''
-    if api_name in hf_32_standard_api and arg.dtype == torch.float32:
-        return arg
-    if raise_dtype is None or arg.dtype not in RAISE_PRECISION or raise_dtype == arg.dtype:
-        return arg
-    return arg.type(raise_dtype)
-def generate_device_params(input_args, input_kwargs, need_backward, api_name):
-    def recursive_arg_to_device(arg_in, to_detach):
-        if isinstance(arg_in, (list, tuple)):
-            return type(arg_in)(recursive_arg_to_device(arg, to_detach) for arg in arg_in)
-        elif isinstance(arg_in, torch.Tensor):
-            if need_backward and arg_in.requires_grad:
-                arg_in = deal_detach(arg_in.clone(), to_detach).to(current_device).requires_grad_()
-                temp_arg_in = arg_in * 1
-                arg_in = temp_arg_in.type_as(arg_in)
-                arg_in.retain_grad()
-                return arg_in
-            else:
-                return deal_detach(arg_in.clone(), to_detach).to(current_device)
-        else:
-            return arg_in
-    is_detach = api_name not in not_detach_set
-    device_args = recursive_arg_to_device(input_args, is_detach)
-    device_kwargs = \
-        {key: recursive_arg_to_device(value, key != "out" and is_detach) for key, value in input_kwargs.items()}
-    return device_args, device_kwargs
-def generate_cpu_params(input_args, input_kwargs, need_backward, api_name):
-    def recursive_arg_to_cpu(arg_in, to_detach, raise_dtype=None):
-        if isinstance(arg_in, (list, tuple)):
-            return type(arg_in)(recursive_arg_to_cpu(arg, to_detach, raise_dtype=raise_dtype) for arg in arg_in)
-        elif isinstance(arg_in, torch.Tensor):
-            if need_backward and arg_in.requires_grad:
-                arg_in = deal_detach(raise_bench_data_dtype(
-                                     api_name, arg_in.clone(), raise_dtype=raise_dtype), to_detach).requires_grad_()
-                temp_arg_in = arg_in * 1
-                arg_in = temp_arg_in.type_as(arg_in)
-                arg_in.retain_grad()
-                return arg_in
-            else:
-                return deal_detach(raise_bench_data_dtype(api_name, arg_in.clone(), raise_dtype=raise_dtype), to_detach)
-        else:
-            return arg_in
-    def is_tensor_with_raise_precision(arg_in, check_kwargs=False):
-        if arg_in.dtype in RAISE_PRECISION:
-            return True
-        if check_kwargs and arg_in.dtype in [torch.half, torch.bfloat16]:
-            return True
-        return False
-    def recursive_find_dtypes(arg_in, kwargs=None, check_kwargs=False):
-        if isinstance(arg_in, (list, tuple)):
-            return set().union(*tuple(recursive_find_dtypes(arg, kwargs, check_kwargs=check_kwargs) for arg in arg_in))
-        elif isinstance(arg_in, torch.Tensor) and is_tensor_with_raise_precision(arg_in, check_kwargs):
-            return set([arg_in.dtype])
-        elif isinstance(arg_in, dict) and check_kwargs:
-            return set().union(*tuple(recursive_find_dtypes(v, kwargs, check_kwargs=True) for v in arg_in.values()))
-        return set()
-    raise_dtype = None
-    need_raise_dtypes = recursive_find_dtypes(input_args)
-    need_raise_dtypes.update(recursive_find_dtypes(input_kwargs, check_kwargs=True))
-    if len(need_raise_dtypes) == 1:
-        raise_dtype = RAISE_PRECISION.get(need_raise_dtypes.pop(), torch.float32)
-    elif len(need_raise_dtypes) >= 2:
-        raise_dtype = torch.float32
-    raise_dtype = None if api_name in not_raise_dtype_set else raise_dtype
-    is_detach = api_name not in not_detach_set
-    cpu_args = recursive_arg_to_cpu(input_args, is_detach, raise_dtype=raise_dtype)
-    cpu_kwargs = {key: recursive_arg_to_cpu(value, key != "out" and is_detach, raise_dtype=raise_dtype) for key, value in input_kwargs.items()}
-    return cpu_args, cpu_kwargs
-def run_ut(config):
-    logger.info("start UT test")
-    if config.online_config.is_online:
-        logger.info(f"UT task result will be saved in {config.result_csv_path}".replace(".csv", "_rank*.csv"))
-        logger.info(f"UT task details will be saved in {config.details_csv_path}".replace(".csv", "_rank*.csv"))
-    else:
-        logger.info(f"UT task result will be saved in {config.result_csv_path}")
-        logger.info(f"UT task details will be saved in {config.details_csv_path}")
-    if config.save_error_data:
-        logger.info(f"UT task error_datas will be saved in {config.error_data_path}")
-    compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut, config=config)
-    if config.online_config.is_online:
-        run_api_online(config, compare)
-    else:
-        with FileOpen(config.result_csv_path, 'r') as file:
-            csv_reader = csv.reader(file)
-            next(csv_reader)
-            api_name_set = {row[0] for row in csv_reader}
-        run_api_offline(config, compare, api_name_set)
-    for result_csv_path, details_csv_path in zip(compare.save_path_list, compare.detail_save_path_list):
-        change_mode(result_csv_path, FileCheckConst.DATA_FILE_AUTHORITY)
-        change_mode(details_csv_path, FileCheckConst.DATA_FILE_AUTHORITY)
-        logger.info(f"UT task result csv is saved in {result_csv_path}")
-        logger.info(f"UT task details csv is saved in {details_csv_path}")
-    compare.print_pretest_result()
-def run_api_offline(config, compare, api_name_set):
-    for _, (api_full_name, api_info_dict) in enumerate(tqdm(config.forward_content.items(), **tqdm_params)):
-        if api_full_name in api_name_set:
-            continue
-        if is_unsupported_api(api_full_name):
-            continue
-        [_, api_name, _] = api_full_name.split(Const.SEP)
-        try:
-            if blacklist_and_whitelist_filter(api_name, config.black_list, config.white_list):
-                continue
-            data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict)
-            is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info)
-            if config.save_error_data:
-                do_save_error_data(api_full_name, data_info, config.error_data_path, is_fwd_success, is_bwd_success)
-        except Exception as err:
-            if "expected scalar type Long" in str(err):
-                logger.warning(f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
-                               f"'int32_to_int64' list in accuracy_tools/api_accuracy_check/common/utils.py file.")
-            else:
-                logger.error(f"Run {api_full_name} UT Error: %s" % str(err))
-            err_column = CompareColumn()
-            fwd_compare_alg_results = err_column.to_column_value(CompareConst.SKIP, str(err))
-            result_info = (api_full_name, CompareConst.SKIP, CompareConst.SKIP, [fwd_compare_alg_results], None, 0)
-            compare.record_results(result_info)
-        finally:
-            if is_gpu:
-                torch.cuda.empty_cache()
-            else:
-                torch.npu.empty_cache()
-            gc.collect()
-def run_api_online(config, compare):
-    attl = init_attl(config.online_config)
-    dispatcher = ConsumerDispatcher(compare=compare)
-    dispatcher.start(handle_func=run_torch_api_online, config=config)
-    def tcp_communication_flow():
-        while True:
-            api_data = attl.recv()
-            if api_data == 'STOP_':
-                continue
-            if api_data == 'KILL_':
-                time.sleep(1)
-                logger.info("==========接收到STOP信号==========")
-                dispatcher.stop()
-                attl.stop_serve()
-                time.sleep(1)
-                break
-            if not isinstance(api_data, ApiData):
-                continue
-            api_full_name = api_data.name
-            [_, api_name, _] = api_full_name.split(Const.SEP)
-            if blacklist_and_whitelist_filter(api_name, config.black_list, config.white_list):
-                continue
-            dispatcher.update_consume_queue(api_data)
-    def shared_storage_communication_flow():
-        flag_num = -1
-        while True:
-            api_data = attl.download()
-            if api_data == "start":
-                if flag_num == -1:
-                    flag_num += 1
-                flag_num += 1
-            if api_data == "end":
-                flag_num -= 1
-            if flag_num == 0:
-                dispatcher.stop()
-                break
-            if not isinstance(api_data, ApiData):
-                continue
-            api_full_name = api_data.name
-            [_, api_name, _] = api_full_name.split(Const.SEP)
-            if blacklist_and_whitelist_filter(api_name, config.black_list, config.white_list):
-                continue
-            dispatcher.update_consume_queue(api_data)
-    if config.online_config.nfs_path:
-        shared_storage_communication_flow()
-    else:
-        tcp_communication_flow()
-def blacklist_and_whitelist_filter(api_name, black_list, white_list):
-    """
-    run api(api_name) if api_name not in black_list and in white_list.
-    If api is both in black_list and black_list, black_list first.
-    return: False for exec api, True for not exec
-    """
-    if black_list and api_name in black_list:
-        return True
-    if white_list and api_name not in white_list:
-        return True
-    return False
-def is_unsupported_api(api_name):
-    split_name = api_name.split(Const.SEP)[0]
-    flag = split_name in [Const.NPU, Const.DISTRIBUTED]
-    if flag:
-        logger.info(f"{split_name} api is not supported for run ut. SKIP.")
-    return flag
-def do_save_error_data(api_full_name, data_info, error_data_path, is_fwd_success, is_bwd_success):
-    if not is_fwd_success or not is_bwd_success:
-        processor = UtDataProcessor(error_data_path)
-        for element in data_info.in_fwd_data_list:
-            processor.save_tensors_in_element(api_full_name + '.forward.input', element)
-        processor.save_tensors_in_element(api_full_name + '.forward.output.bench', data_info.bench_output)
-        processor.save_tensors_in_element(api_full_name + '.forward.output.device', data_info.device_output)
-        processor.save_tensors_in_element(api_full_name + '.backward.input', data_info.grad_in)
-        processor.save_tensors_in_element(api_full_name + '.backward.output.bench', data_info.bench_grad)
-        processor.save_tensors_in_element(api_full_name + '.backward.output.device', data_info.device_grad)
-def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict):
-    in_fwd_data_list = []
-    backward_message = ''
-    [api_type, api_name, _] = api_full_name.split(Const.SEP)
-    args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path)
-    in_fwd_data_list.append(args)
-    in_fwd_data_list.append(kwargs)
-    need_backward = api_full_name in backward_content
-    if not need_grad:
-        logger.warning("%s %s" % (api_full_name, Backward_Message.UNSUPPORT_BACKWARD_MESSAGE))
-        backward_message += Backward_Message.UNSUPPORT_BACKWARD_MESSAGE
-    if api_name in not_backward_list:
-        need_grad = False
-        logger.warning("%s %s" % (api_full_name, Backward_Message.NO_BACKWARD_RESULT_MESSAGE))
-        backward_message += Backward_Message.NO_BACKWARD_RESULT_MESSAGE
-    need_backward = need_backward and need_grad
-    if kwargs.get("device"):
-        del kwargs["device"]
-    cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, need_backward, api_name)
-    device_args, device_kwargs = generate_device_params(args, kwargs, need_backward, api_name)
-    bench_grad_out, device_grad_out = None, None
-    out = exec_api(api_type, api_name, cpu_args, cpu_kwargs)
-    device_out = exec_api(api_type, api_name, device_args, device_kwargs)
-    current_path = os.path.dirname(os.path.realpath(__file__))
-    ut_setting_path = os.path.join(current_path, "torch_ut_setting.json")
-    api_setting_dict = get_json_contents(ut_setting_path)
-    grad_input_index = api_setting_dict.get(api_name)
-    grad_index = None
-    grad, bench_grad = None, None
-    if grad_input_index is not None:
-        grad_index = grad_input_index.get('grad_index')
-    if need_backward:
-        if need_to_backward(grad_index, out):
-            backward_args = backward_content[api_full_name].get("input")
-            grad = gen_args(backward_args, api_name, real_data_path=real_data_path)[0]
-            bench_grad, _ = generate_cpu_params(grad, {}, False, api_name)
-            bench_grad_out = run_backward(cpu_args, bench_grad, grad_index, out)
-            device_grad = grad.clone().detach().to(current_device)
-            device_grad_out = run_backward(device_args, device_grad, grad_index, device_out)
-        else:
-            backward_message += Backward_Message.MULTIPLE_BACKWARD_MESSAGE
-    return UtDataInfo(bench_grad_out, device_grad_out, device_out, out, bench_grad, in_fwd_data_list, backward_message)
-def run_torch_api_online(api_full_name, api_data, backward_content):
-    in_fwd_data_list = []
-    [api_type, api_name, _] = api_full_name.split(Const.SEP)
-    args, kwargs, out = api_data.args, api_data.kwargs, api_data.result
-    in_fwd_data_list.append(args)
-    in_fwd_data_list.append(kwargs)
-    if kwargs.get("device"):
-        del kwargs["device"]
-    device_out = exec_api(api_type, api_name, args, kwargs)
-    device_out = move2device_exec(device_out, "cpu")
-    return UtDataInfo(None, None, out, device_out, None, in_fwd_data_list, None, rank=api_data.rank)
-def get_api_info(api_info_dict, api_name, real_data_path):
-    convert_type, api_info_dict = api_info_preprocess(api_name, api_info_dict)
-    need_grad = True
-    if api_info_dict.get("input_kwargs") and "out" in api_info_dict.get("input_kwargs"):
-        need_grad = False
-    args, kwargs = gen_api_params(api_info_dict, api_name, need_grad, convert_type, real_data_path)
-    return args, kwargs, need_grad
-def need_to_backward(grad_index, out):
-    if grad_index is None and isinstance(out, (list, tuple)):
-        return False
-    return True
-def run_backward(args, grad, grad_index, out):
-    if grad_index is not None:
-        out[grad_index].backward(grad)
-    else:
-        out.backward(grad)
-    args_grad = []
-    for arg in args:
-        if isinstance(arg, torch.Tensor):
-            args_grad.append(arg.grad)
-    grad_out = args_grad
-    return grad_out
-def initialize_save_error_data(error_data_path):
-    check_path_before_create(error_data_path)
-    create_directory(error_data_path)
-    error_data_path_checker = FileChecker(error_data_path, FileCheckConst.DIR,
-                                          ability=FileCheckConst.WRITE_ABLE)
-    error_data_path = error_data_path_checker.common_check()
-    error_data_path =initialize_save_path(error_data_path, UT_ERROR_DATA_DIR)
-    return error_data_path
-def init_attl(config):
-    """config: OnlineConfig"""
-    attl = ATTL('gpu', ATTLConfig(is_benchmark_device=True,
-                                  connect_ip=config.host,
-                                  connect_port=config.port,
-                                  nfs_path=config.nfs_path,
-                                  tls_path=config.tls_path))
-    return attl
-def _run_ut_parser(parser):
-    parser.add_argument("-api_info", "--api_info_file", dest="api_info_file", default="", type=str,
-                        help="<Optional> The api param tool result file: generate from api param tool, "
-                             "a json file.",
-                        required=False)
-    parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str,
-                        help="<optional> The ut task result out path.",
-                        required=False)
-    parser.add_argument('-save_error_data', dest="save_error_data", action="store_true",
-                        help="<optional> Save compare failed api output.", required=False)
-    parser.add_argument("-j", "--jit_compile", dest="jit_compile", action="store_true",
-                        help="<optional> whether to turn on jit compile", required=False)
-    class UniqueDeviceAction(argparse.Action):
-        def __call__(self, parser, namespace, values, option_string=None):
-            unique_values = set(values)
-            if len(values) != len(unique_values):
-                parser.error("device id must be unique")
-            for device_id in values:
-                if not 0 <= device_id:
-                    parser.error("device id must be greater than or equal to 0")
-            setattr(namespace, self.dest, values)
-    parser.add_argument("-d", "--device", dest="device_id", nargs='+', type=int,
-                        help="<optional> set device id to run ut, must be unique and in range 0-7",
-                        default=[0], required=False, action=UniqueDeviceAction)
-    parser.add_argument("-csv_path", "--result_csv_path", dest="result_csv_path", default="", type=str,
-                        help="<optional> The path of accuracy_checking_result_{timestamp}.csv, "
-                             "when run ut is interrupted, enter the file path to continue run ut.",
-                        required=False)
-    parser.add_argument("-f", "--filter_api", dest="filter_api", action="store_true",
-                        help="<optional> Whether to filter the api in the api_info_file.", required=False)
-    parser.add_argument("-config", "--config_path", dest="config_path", default="", type=str,
-                        help="<optional> The path of config.json", required=False)
-def preprocess_forward_content(forward_content):
-    processed_content = {}
-    base_keys_variants = {}
-    arg_cache = {}
-    for key, value in forward_content.items():
-        base_key = key.rsplit(Const.SEP, 1)[0]
-        if key not in arg_cache:
-            filtered_new_args = [
-                {k: v for k, v in arg.items() if k not in ['Max', 'Min']}
-                for arg in value['input_args'] if isinstance(arg, dict)
-            ]
-            arg_cache[key] = (filtered_new_args, value['input_kwargs'])
-        filtered_new_args, new_kwargs = arg_cache[key]
-        if base_key not in base_keys_variants:
-            processed_content[key] = value
-            base_keys_variants[base_key] = {key}
-        else:
-            is_duplicate = False
-            for variant in base_keys_variants.get(base_key, []):
-                try:
-                    existing_args, existing_kwargs = arg_cache.get(variant)
-                except KeyError as e:
-                    logger.error(f"KeyError: {e} when processing {key}")
-                if existing_args == filtered_new_args and existing_kwargs == new_kwargs:
-                    is_duplicate = True
-                    break
-            if not is_duplicate:
-                processed_content[key] = value
-                base_keys_variants[base_key].add(key)
-    return processed_content
-def _run_ut(parser=None):
-    if not parser:
-        parser = argparse.ArgumentParser()
-    _run_ut_parser(parser)
-    args = parser.parse_args(sys.argv[1:])
-    run_ut_command(args)
-def run_ut_command(args):
-    if not is_gpu:
-        torch.npu.set_compile_mode(jit_compile=args.jit_compile)
-    used_device = current_device + ":" + str(args.device_id[0])
-    try:
-        if is_gpu:
-            torch.cuda.set_device(used_device)
-        else:
-            torch.npu.set_device(used_device)
-    except Exception as error:
-        logger.error(f"Set device id failed. device id is: {args.device_id}")
-        raise NotImplementedError from error
-    # 在线预检场景下，不需要外出输出api信息，forward_content, backward_content, real_data_path设置为None
-    # 离线场景下，forward_content, backward_content, real_data_path从api_info_file中解析
-    forward_content, backward_content, real_data_path = None, None, None
-    if args.api_info_file:
-        api_info_file_checker = FileChecker(file_path = args.api_info_file, path_type = FileCheckConst.FILE,
-                                            ability = FileCheckConst.READ_ABLE, file_type = FileCheckConst.JSON_SUFFIX)
-        checked_api_info = api_info_file_checker.common_check()
-        forward_content, backward_content, real_data_path = parse_json_info_forward_backward(checked_api_info)
-        if args.filter_api:
-            logger.info("Start filtering the api in the forward_input_file.")
-            forward_content = preprocess_forward_content(forward_content)
-            logger.info("Finish filtering the api in the forward_input_file.")
-    out_path = os.path.realpath(args.out_path) if args.out_path else "./"
-    check_path_before_create(out_path)
-    create_directory(out_path)
-    out_path_checker = FileChecker(out_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE)
-    out_path = out_path_checker.common_check()
-    save_error_data = args.save_error_data
-    result_csv_path = os.path.join(out_path, RESULT_FILE_NAME)
-    details_csv_path = os.path.join(out_path, DETAILS_FILE_NAME)
-    if args.result_csv_path:
-        result_csv_path = get_validated_result_csv_path(args.result_csv_path, 'result')
-        details_csv_path = get_validated_details_csv_path(result_csv_path)
-    white_list = msCheckerConfig.white_list
-    black_list = msCheckerConfig.black_list
-    error_data_path = msCheckerConfig.error_data_path
-    is_online = msCheckerConfig.is_online
-    nfs_path = msCheckerConfig.nfs_path
-    host = msCheckerConfig.host
-    port = msCheckerConfig.port
-    rank_list = msCheckerConfig.rank_list
-    tls_path = msCheckerConfig.tls_path
-    if args.config_path:
-        config_path_checker = FileChecker(args.config_path, FileCheckConst.FILE,
-                                          FileCheckConst.READ_ABLE, FileCheckConst.JSON_SUFFIX)
-        checked_config_path = config_path_checker.common_check()
-        _, task_config = parse_json_config(checked_config_path, Const.RUN_UT)
-        white_list = task_config.white_list
-        black_list = task_config.black_list
-        error_data_path = task_config.error_data_path
-        is_online = task_config.is_online
-        nfs_path = task_config.nfs_path
-        host = task_config.host
-        port = task_config.port
-        rank_list = task_config.rank_list
-        tls_path = task_config.tls_path
-    if save_error_data:
-        if args.result_csv_path:
-            time_info = result_csv_path.split('.')[0].split('_')[-1]
-            global UT_ERROR_DATA_DIR
-            UT_ERROR_DATA_DIR = 'ut_error_data' + time_info
-        error_data_path = initialize_save_error_data(error_data_path)
-    online_config = OnlineConfig(is_online, nfs_path, host, port, rank_list, tls_path)
-    run_ut_config = RunUTConfig(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data,
-                                args.result_csv_path, real_data_path, set(white_list), set(black_list), error_data_path,
-                                online_config)
-    run_ut(run_ut_config)
-if __name__ == '__main__':
-    _run_ut()
-    logger.info("UT task completed.")
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import csv
+import sys
+import time
+import gc
+from collections import namedtuple
+try:
+    import torch_npu
+except ImportError:
+    is_gpu = True
+    current_device = "cuda"
+else:
+    is_gpu = False
+    current_device = "npu"
+import torch
+from tqdm import tqdm
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import BackwardMessage, UtDataInfo, \
+    get_validated_result_csv_path, get_validated_details_csv_path, exec_api, record_skip_info
+from msprobe.pytorch.api_accuracy_checker.run_ut.data_generate import gen_api_params, gen_args
+from msprobe.pytorch.api_accuracy_checker.common.utils import api_info_preprocess, \
+    initialize_save_path, UtDataProcessor, extract_basic_api_segments, ApiData
+from msprobe.pytorch.api_accuracy_checker.compare.compare import Comparator
+from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareColumn
+from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig
+from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward
+from msprobe.core.common.file_utils import FileChecker, change_mode, check_path_before_create, \
+    create_directory, get_json_contents, read_csv
+from msprobe.pytorch.common.log import logger
+from msprobe.pytorch.pt_config import parse_json_config
+from msprobe.core.common.const import Const, FileCheckConst, CompareConst
+from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import ATTL, ATTLConfig, move2device_exec
+from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.device_dispatch import ConsumerDispatcher
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params, generate_device_params
+current_time = time.strftime("%Y%m%d%H%M%S")
+UT_ERROR_DATA_DIR = 'ut_error_data' + current_time
+RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv"
+DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv"
+RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path',
+                                         'save_error_data', 'is_continue_run_ut', 'real_data_path', 'white_list',
+                                         'black_list', 'error_data_path', 'online_config'])
+OnlineConfig = namedtuple('OnlineConfig', ['is_online', 'nfs_path', 'host', 'port', 'rank_list', 'tls_path'])
+not_backward_list = ['repeat_interleave']
+tqdm_params = {
+    'smoothing': 0,  # 平滑进度条的预计剩余时间，取值范围0到1
+    'desc': 'Processing',  # 进度条前的描述文字
+    'leave': True,  # 迭代完成后保留进度条的显示
+    'ncols': 75,  # 进度条的固定宽度
+    'mininterval': 0.1,  # 更新进度条的最小间隔秒数
+    'maxinterval': 1.0,  # 更新进度条的最大间隔秒数
+    'miniters': 1,  # 更新进度条之间的最小迭代次数
+    'ascii': None,  # 根据环境自动使用ASCII或Unicode字符
+    'unit': 'it',  # 迭代单位
+    'unit_scale': True,  # 自动根据单位缩放
+    'dynamic_ncols': True,  # 动态调整进度条宽度以适应控制台
+    'bar_format': '{l_bar}{bar}| {n}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]'  # 自定义进度条输出格式
+}
+def run_ut(config):
+    logger.info("start UT test")
+    if config.online_config.is_online:
+        logger.info(f"UT task result will be saved in {config.result_csv_path}".replace(".csv", "_rank*.csv"))
+        logger.info(f"UT task details will be saved in {config.details_csv_path}".replace(".csv", "_rank*.csv"))
+    else:
+        logger.info(f"UT task result will be saved in {config.result_csv_path}")
+        logger.info(f"UT task details will be saved in {config.details_csv_path}")
+    if config.save_error_data:
+        logger.info(f"UT task error_datas will be saved in {config.error_data_path}")
+    compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut, config=config)
+    if config.online_config.is_online:
+        run_api_online(config, compare)
+    else:
+        csv_df = read_csv(config.result_csv_path)
+        api_name_set = {row[0] for row in csv_df.itertuples(index=False, name=None)}
+        run_api_offline(config, compare, api_name_set)
+    for result_csv_path, details_csv_path in zip(compare.save_path_list, compare.detail_save_path_list):
+        change_mode(result_csv_path, FileCheckConst.DATA_FILE_AUTHORITY)
+        change_mode(details_csv_path, FileCheckConst.DATA_FILE_AUTHORITY)
+        logger.info(f"UT task result csv is saved in {result_csv_path}")
+        logger.info(f"UT task details csv is saved in {details_csv_path}")
+    compare.print_pretest_result()
+def run_api_offline(config, compare, api_name_set):
+    err_column = CompareColumn()
+    for _, (api_full_name, api_info_dict) in enumerate(tqdm(config.forward_content.items(), **tqdm_params)):
+        if api_full_name in api_name_set:
+            continue
+        if is_unsupported_api(api_full_name):
+            skip_message = f"API {api_full_name} not support for run ut. SKIP."
+            compare_alg_results = err_column.to_column_value(CompareConst.SKIP, skip_message)
+            record_skip_info(api_full_name, compare, compare_alg_results)
+            continue
+        _, api_name = extract_basic_api_segments(api_full_name)
+        if not api_name:
+            err_message = f"API {api_full_name} not support for run ut. SKIP."
+            logger.error(err_message)
+            compare_alg_results = err_column.to_column_value(CompareConst.SKIP, err_message)
+            record_skip_info(api_full_name, compare, compare_alg_results)
+            continue
+        try:
+            if blacklist_and_whitelist_filter(api_name, config.black_list, config.white_list):
+                skip_message = f"API {api_name} in black list or not in white list. SKIP."
+                logger.info(skip_message)
+                compare_alg_results = err_column.to_column_value(CompareConst.SKIP, skip_message)
+                record_skip_info(api_full_name, compare, compare_alg_results)
+                continue
+            data_info = run_torch_api(api_full_name, config.real_data_path, config.backward_content, api_info_dict)
+            is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info)
+            if config.save_error_data:
+                do_save_error_data(api_full_name, data_info, config.error_data_path, is_fwd_success, is_bwd_success)
+        except Exception as err:
+            if "expected scalar type Long" in str(err):
+                logger.warning(f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
+                               f"'int32_to_int64' list in accuracy_tools/api_accuracy_check/common/utils.py file.")
+            else:
+                logger.error(f"Run {api_full_name} UT Error: %s" % str(err))
+            compare_alg_results = err_column.to_column_value(CompareConst.SKIP, str(err))
+            record_skip_info(api_full_name, compare, compare_alg_results)
+        finally:
+            if is_gpu:
+                torch.cuda.empty_cache()
+            else:
+                torch.npu.empty_cache()
+            gc.collect()
+def run_api_online(config, compare):
+    attl = init_attl(config.online_config)
+    dispatcher = ConsumerDispatcher(compare=compare)
+    dispatcher.start(handle_func=run_torch_api_online, config=config)
+    def tcp_communication_flow():
+        while True:
+            api_data = attl.recv()
+            if api_data == 'STOP_':
+                continue
+            if api_data == 'KILL_':
+                time.sleep(1)
+                logger.info("==========接收到STOP信号==========")
+                dispatcher.stop()
+                attl.stop_serve()
+                time.sleep(1)
+                break
+            if not isinstance(api_data, ApiData):
+                continue
+            api_full_name = api_data.name
+            _, api_name = extract_basic_api_segments(api_full_name)
+            if blacklist_and_whitelist_filter(api_name, config.black_list, config.white_list):
+                continue
+            if api_data.rank in config.online_config.rank_list:
+                dispatcher.update_consume_queue(api_data)
+    def shared_storage_communication_flow():
+        flag_num = -1
+        while True:
+            api_data = attl.download()
+            if api_data == "start":
+                if flag_num == -1:
+                    flag_num += 1
+                flag_num += 1
+            if api_data == "end":
+                flag_num -= 1
+            if flag_num == 0:
+                dispatcher.stop()
+                break
+            if not isinstance(api_data, ApiData):
+                continue
+            api_full_name = api_data.name
+            _, api_name = extract_basic_api_segments(api_full_name)
+            if blacklist_and_whitelist_filter(api_name, config.black_list, config.white_list):
+                continue
+            if api_data.rank in config.online_config.rank_list:
+                dispatcher.update_consume_queue(api_data)
+    if config.online_config.nfs_path:
+        shared_storage_communication_flow()
+    else:
+        tcp_communication_flow()
+def blacklist_and_whitelist_filter(api_name, black_list, white_list):
+    """
+    run api(api_name) if api_name not in black_list and in white_list.
+    If api is both in black_list and black_list, black_list first.
+    return: False for exec api, True for not exec
+    """
+    if black_list and api_name in black_list:
+        return True
+    if white_list and api_name not in white_list:
+        return True
+    return False
+def is_unsupported_api(api_name):
+    split_name = api_name.split(Const.SEP)[0]
+    flag = split_name == Const.DISTRIBUTED
+    if flag:
+        logger.info(f"{split_name} api is not supported for run ut. SKIP.")
+    return flag
+def do_save_error_data(api_full_name, data_info, error_data_path, is_fwd_success, is_bwd_success):
+    if not is_fwd_success or not is_bwd_success:
+        processor = UtDataProcessor(error_data_path)
+        for element in data_info.in_fwd_data_list:
+            processor.save_tensors_in_element(api_full_name + '.forward.input', element)
+        processor.save_tensors_in_element(api_full_name + '.forward.output.bench', data_info.bench_output)
+        processor.save_tensors_in_element(api_full_name + '.forward.output.device', data_info.device_output)
+        processor.save_tensors_in_element(api_full_name + '.backward.input', data_info.grad_in)
+        processor.save_tensors_in_element(api_full_name + '.backward.output.bench', data_info.bench_grad)
+        processor.save_tensors_in_element(api_full_name + '.backward.output.device', data_info.device_grad)
+def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict):
+    in_fwd_data_list = []
+    backward_message = ''
+    api_type, api_name = extract_basic_api_segments(api_full_name)
+    args, kwargs, need_grad = get_api_info(api_info_dict, api_name, real_data_path)
+    in_fwd_data_list.append(args)
+    in_fwd_data_list.append(kwargs)
+    need_backward = api_full_name in backward_content
+    if not need_grad:
+        logger.warning("%s %s" % (api_full_name, BackwardMessage.UNSUPPORT_BACKWARD_MESSAGE))
+        backward_message += BackwardMessage.UNSUPPORT_BACKWARD_MESSAGE
+    if api_name in not_backward_list:
+        need_grad = False
+        logger.warning("%s %s" % (api_full_name, BackwardMessage.NO_BACKWARD_RESULT_MESSAGE))
+        backward_message += BackwardMessage.NO_BACKWARD_RESULT_MESSAGE
+    need_backward = need_backward and need_grad
+    if kwargs.get("device"):
+        del kwargs["device"]
+    cpu_args, cpu_kwargs = generate_cpu_params(args, kwargs, need_backward, api_name)
+    device_args, device_kwargs = generate_device_params(args, kwargs, need_backward, api_name)
+    bench_grad_out, device_grad_out = None, None
+    out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
+    device_out = exec_api(api_type, api_name, current_device, device_args, device_kwargs)
+    current_path = os.path.dirname(os.path.realpath(__file__))
+    ut_setting_path = os.path.join(current_path, "torch_ut_setting.json")
+    api_setting_dict = get_json_contents(ut_setting_path)
+    grad_input_index = api_setting_dict.get(api_name)
+    grad_index = None
+    grad, bench_grad = None, None
+    if grad_input_index is not None:
+        grad_index = grad_input_index.get('grad_index')
+    if need_backward:
+        if need_to_backward(grad_index, out):
+            backward_args = backward_content[api_full_name].get("input")
+            func_options = {
+                'real_data_path': real_data_path
+            }
+            grad = gen_args(backward_args, api_name, func_options)[0]
+            bench_grad, _ = generate_cpu_params(grad, {}, False, api_name)
+            bench_grad_out = run_backward(cpu_args, bench_grad, grad_index, out)
+            device_grad = grad.clone().detach().to(current_device)
+            device_grad_out = run_backward(device_args, device_grad, grad_index, device_out)
+        else:
+            backward_message += BackwardMessage.MULTIPLE_BACKWARD_MESSAGE
+    if api_name == "npu_fusion_attention":
+        out = out[0]
+        device_out = device_out[0]
+    return UtDataInfo(bench_grad_out, device_grad_out, device_out, out, bench_grad, in_fwd_data_list, backward_message)
+def run_torch_api_online(api_full_name, api_data, backward_content):
+    in_fwd_data_list = []
+    api_type, api_name = extract_basic_api_segments(api_full_name)
+    args, kwargs, out = api_data.args, api_data.kwargs, api_data.result
+    in_fwd_data_list.append(args)
+    in_fwd_data_list.append(kwargs)
+    if kwargs.get("device"):
+        del kwargs["device"]
+    device_out = exec_api(api_type, api_name, Const.CUDA_LOWERCASE, args, kwargs)
+    device_out = move2device_exec(device_out, "cpu")
+    return UtDataInfo(None, None, out, device_out, None, in_fwd_data_list, None, rank=api_data.rank)
+def get_api_info(api_info_dict, api_name, real_data_path):
+    convert_type, api_info_dict = api_info_preprocess(api_name, api_info_dict)
+    need_grad = True
+    if api_info_dict.get("input_kwargs") and "out" in api_info_dict.get("input_kwargs"):
+        need_grad = False
+    args, kwargs = gen_api_params(api_info_dict, api_name, need_grad, convert_type, real_data_path)
+    return args, kwargs, need_grad
+def need_to_backward(grad_index, out):
+    if grad_index is None and isinstance(out, (list, tuple)):
+        return False
+    return True
+def run_backward(args, grad, grad_index, out):
+    if grad_index is not None:
+        out[grad_index].backward(grad)
+    else:
+        out.backward(grad)
+    args_grad = []
+    for arg in args:
+        if isinstance(arg, torch.Tensor):
+            args_grad.append(arg.grad)
+    grad_out = args_grad
+    return grad_out
+def initialize_save_error_data(error_data_path):
+    check_path_before_create(error_data_path)
+    create_directory(error_data_path)
+    error_data_path_checker = FileChecker(error_data_path, FileCheckConst.DIR,
+                                          ability=FileCheckConst.WRITE_ABLE)
+    error_data_path = error_data_path_checker.common_check()
+    error_data_path = initialize_save_path(error_data_path, UT_ERROR_DATA_DIR)
+    return error_data_path
+def init_attl(config):
+    """config: OnlineConfig"""
+    attl = ATTL('gpu', ATTLConfig(is_benchmark_device=True,
+                                  connect_ip=config.host,
+                                  connect_port=config.port,
+                                  nfs_path=config.nfs_path,
+                                  tls_path=config.tls_path))
+    return attl
+def _run_ut_parser(parser):
+    parser.add_argument("-api_info", "--api_info_file", dest="api_info_file", default="", type=str,
+                        help="<Optional> The api param tool result file: generate from api param tool, "
+                             "a json file.",
+                        required=False)
+    parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str,
+                        help="<optional> The ut task result out path.",
+                        required=False)
+    parser.add_argument('-save_error_data', dest="save_error_data", action="store_true",
+                        help="<optional> Save compare failed api output.", required=False)
+    parser.add_argument("-j", "--jit_compile", dest="jit_compile", action="store_true",
+                        help="<optional> whether to turn on jit compile", required=False)
+    class UniqueDeviceAction(argparse.Action):
+        def __call__(self, parser, namespace, values, option_string=None):
+            unique_values = set(values)
+            if len(values) != len(unique_values):
+                parser.error("device id must be unique")
+            for device_id in values:
+                if not 0 <= device_id:
+                    parser.error("device id must be greater than or equal to 0")
+            setattr(namespace, self.dest, values)
+    parser.add_argument("-d", "--device", dest="device_id", nargs='+', type=int,
+                        help="<optional> set device id to run ut, must be unique and in range 0-7",
+                        default=[0], required=False, action=UniqueDeviceAction)
+    parser.add_argument("-csv_path", "--result_csv_path", dest="result_csv_path", default="", type=str,
+                        help="<optional> The path of accuracy_checking_result_{timestamp}.csv, "
+                             "when run ut is interrupted, enter the file path to continue run ut.",
+                        required=False)
+    parser.add_argument("-f", "--filter_api", dest="filter_api", action="store_true",
+                        help="<optional> Whether to filter the api in the api_info_file.", required=False)
+    parser.add_argument("-config", "--config_path", dest="config_path", default="", type=str,
+                        help="<optional> The path of config.json", required=False)
+def preprocess_forward_content(forward_content):
+    processed_content = {}
+    base_keys_variants = {}
+    arg_cache = {}
+    for key, value in forward_content.items():
+        base_key = key.rsplit(Const.SEP, 1)[0]
+        if key not in arg_cache:
+            filtered_new_args = [
+                {k: v for k, v in arg.items() if k not in ['Max', 'Min']}
+                for arg in value['input_args']
+                if isinstance(arg, dict)
+            ]
+            arg_cache[key] = (filtered_new_args, value['input_kwargs'])
+        filtered_new_args, new_kwargs = arg_cache[key]
+        if base_key not in base_keys_variants:
+            processed_content[key] = value
+            base_keys_variants[base_key] = {key}
+        else:
+            is_duplicate = False
+            for variant in base_keys_variants.get(base_key, []):
+                try:
+                    existing_args, existing_kwargs = arg_cache.get(variant)
+                except KeyError as e:
+                    logger.error(f"KeyError: {e} when processing {key}")
+                if existing_args == filtered_new_args and existing_kwargs == new_kwargs:
+                    is_duplicate = True
+                    break
+            if not is_duplicate:
+                processed_content[key] = value
+                base_keys_variants[base_key].add(key)
+    return processed_content
+def _run_ut(parser=None):
+    if not parser:
+        parser = argparse.ArgumentParser()
+    _run_ut_parser(parser)
+    args = parser.parse_args(sys.argv[1:])
+    run_ut_command(args)
+def run_ut_command(args):
+    if not is_gpu:
+        torch.npu.set_compile_mode(jit_compile=args.jit_compile)
+    used_device = current_device + ":" + str(args.device_id[0])
+    try:
+        if is_gpu:
+            torch.cuda.set_device(used_device)
+        else:
+            torch.npu.set_device(used_device)
+    except Exception as error:
+        logger.error(f"Set device id failed. device id is: {args.device_id}")
+        raise NotImplementedError from error
+    # 在线预检场景下，不需要外出输出api信息，forward_content, backward_content, real_data_path设置为None
+    # 离线场景下，forward_content, backward_content, real_data_path从api_info_file中解析
+    forward_content, backward_content, real_data_path = None, None, None
+    if args.api_info_file:
+        api_info_file_checker = FileChecker(file_path=args.api_info_file, path_type=FileCheckConst.FILE,
+                                            ability=FileCheckConst.READ_ABLE, file_type=FileCheckConst.JSON_SUFFIX)
+        checked_api_info = api_info_file_checker.common_check()
+        forward_content, backward_content, real_data_path = parse_json_info_forward_backward(checked_api_info)
+        if args.filter_api:
+            logger.info("Start filtering the api in the api_info_file.")
+            forward_content = preprocess_forward_content(forward_content)
+            logger.info("Finish filtering the api in the api_info_file.")
+    out_path = os.path.realpath(args.out_path) if args.out_path else "./"
+    check_path_before_create(out_path)
+    create_directory(out_path)
+    out_path_checker = FileChecker(out_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE)
+    out_path = out_path_checker.common_check()
+    save_error_data = args.save_error_data
+    result_csv_path = os.path.join(out_path, RESULT_FILE_NAME)
+    details_csv_path = os.path.join(out_path, DETAILS_FILE_NAME)
+    if args.result_csv_path:
+        result_csv_path = get_validated_result_csv_path(args.result_csv_path, 'result')
+        details_csv_path = get_validated_details_csv_path(result_csv_path)
+    white_list = msCheckerConfig.white_list
+    black_list = msCheckerConfig.black_list
+    error_data_path = msCheckerConfig.error_data_path
+    is_online = msCheckerConfig.is_online
+    nfs_path = msCheckerConfig.nfs_path
+    host = msCheckerConfig.host
+    port = msCheckerConfig.port
+    rank_list = msCheckerConfig.rank_list
+    tls_path = msCheckerConfig.tls_path
+    if args.config_path:
+        config_path_checker = FileChecker(args.config_path, FileCheckConst.FILE,
+                                          FileCheckConst.READ_ABLE, FileCheckConst.JSON_SUFFIX)
+        checked_config_path = config_path_checker.common_check()
+        _, task_config = parse_json_config(checked_config_path, Const.RUN_UT)
+        white_list = task_config.white_list
+        black_list = task_config.black_list
+        error_data_path = task_config.error_data_path
+        is_online = task_config.is_online
+        nfs_path = task_config.nfs_path
+        host = task_config.host
+        port = task_config.port
+        rank_list = task_config.rank_list
+        tls_path = task_config.tls_path
+    if save_error_data:
+        if args.result_csv_path:
+            time_info = result_csv_path.split('.')[0].split('_')[-1]
+            global UT_ERROR_DATA_DIR
+            UT_ERROR_DATA_DIR = 'ut_error_data' + time_info
+        error_data_path = initialize_save_error_data(error_data_path)
+    online_config = OnlineConfig(is_online, nfs_path, host, port, rank_list, tls_path)
+    run_ut_config = RunUTConfig(forward_content, backward_content, result_csv_path, details_csv_path, save_error_data,
+                                args.result_csv_path, real_data_path, set(white_list), set(black_list), error_data_path,
+                                online_config)
+    run_ut(run_ut_config)
+if __name__ == '__main__':
+    _run_ut()
+    logger.info("UT task completed.")

mindstudio-probe 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

mindstudio-probe 1.0.3py3-none-any.whl → 1.1.0py3-none-any.whl