PyPI - mindstudio-probe - Versions diffs - 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

mindstudio-probe 1.0.3py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +201 -201
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +36 -34
mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +1 -1
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +1 -0
msprobe/README.md +131 -237
msprobe/__init__.py +16 -1
msprobe/{config/config.json → config.json} +47 -49
msprobe/core/advisor/advisor.py +124 -124
msprobe/core/advisor/advisor_const.py +58 -59
msprobe/core/advisor/advisor_result.py +58 -58
msprobe/core/common/const.py +402 -318
msprobe/core/common/exceptions.py +99 -99
msprobe/core/common/{file_check.py → file_utils.py} +523 -283
msprobe/core/common/inplace_op_checker.py +38 -0
msprobe/core/common/inplace_ops.yaml +251 -0
msprobe/core/common/log.py +86 -69
msprobe/core/common/utils.py +371 -616
msprobe/core/common_config.py +78 -71
msprobe/core/compare/acc_compare.py +472 -298
msprobe/core/compare/check.py +180 -95
msprobe/core/compare/compare_cli.py +69 -49
msprobe/core/compare/highlight.py +259 -222
msprobe/core/compare/multiprocessing_compute.py +174 -149
msprobe/core/compare/npy_compare.py +310 -295
msprobe/core/compare/utils.py +464 -429
msprobe/core/data_dump/data_collector.py +153 -144
msprobe/core/data_dump/data_processor/base.py +337 -293
msprobe/core/data_dump/data_processor/factory.py +76 -59
msprobe/core/data_dump/data_processor/mindspore_processor.py +192 -198
msprobe/core/data_dump/data_processor/pytorch_processor.py +383 -389
msprobe/core/data_dump/json_writer.py +117 -116
msprobe/core/data_dump/scope.py +194 -178
msprobe/core/grad_probe/constant.py +74 -70
msprobe/core/grad_probe/grad_compare.py +170 -175
msprobe/core/grad_probe/utils.py +77 -52
msprobe/docs/01.installation.md +99 -0
msprobe/docs/02.config_introduction.md +137 -0
msprobe/docs/03.config_examples.md +237 -0
msprobe/docs/04.acl_config_examples.md +78 -0
msprobe/docs/05.data_dump_PyTorch.md +326 -0
msprobe/docs/06.data_dump_MindSpore.md +285 -0
msprobe/docs/07.accuracy_checker_PyTorch.md +297 -0
msprobe/docs/08.accuracy_checker_online_PyTorch.md +238 -0
msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
msprobe/docs/10.accuracy_compare_PyTorch.md +327 -0
msprobe/docs/11.accuracy_compare_MindSpore.md +333 -0
msprobe/docs/12.overflow_check_PyTorch.md +79 -0
msprobe/docs/13.overflow_check_MindSpore.md +31 -0
msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
msprobe/docs/15.free_benchmarking_PyTorch.md +170 -0
msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +205 -207
msprobe/{pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md → docs/18.online_dispatch.md} +89 -90
msprobe/docs/FAQ.md +189 -0
msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
msprobe/docs/img/free_benchmark_framework.png +0 -0
msprobe/docs/img/ms_dump.png +0 -0
msprobe/docs/img/ms_layer.png +0 -0
msprobe/docs/img/pt_dump.png +0 -0
msprobe/mindspore/__init__.py +2 -1
msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +278 -245
msprobe/mindspore/api_accuracy_checker/api_info.py +76 -69
msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
msprobe/mindspore/api_accuracy_checker/main.py +8 -15
msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
msprobe/mindspore/cell_processor.py +58 -34
msprobe/mindspore/common/const.py +108 -87
msprobe/mindspore/common/log.py +37 -37
msprobe/mindspore/common/utils.py +97 -57
msprobe/mindspore/compare/distributed_compare.py +62 -75
msprobe/mindspore/compare/layer_mapping.py +146 -0
msprobe/mindspore/compare/modify_mapping.py +107 -0
msprobe/mindspore/compare/ms_compare.py +357 -117
msprobe/mindspore/compare/ms_graph_compare.py +364 -317
msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
msprobe/mindspore/debugger/debugger_config.py +69 -74
msprobe/mindspore/debugger/precision_debugger.py +150 -107
msprobe/mindspore/dump/dump_tool_factory.py +50 -35
msprobe/mindspore/dump/hook_cell/api_registry.py +128 -104
msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +994 -925
msprobe/mindspore/dump/hook_cell/wrap_api.py +121 -0
msprobe/mindspore/dump/jit_dump.py +96 -56
msprobe/mindspore/dump/kernel_graph_dump.py +75 -60
msprobe/mindspore/dump/kernel_kbyk_dump.py +79 -65
msprobe/mindspore/free_benchmark/api_pynative_self_check.py +131 -116
msprobe/mindspore/free_benchmark/common/config.py +27 -12
msprobe/mindspore/free_benchmark/common/handler_params.py +32 -17
msprobe/mindspore/free_benchmark/common/utils.py +85 -71
msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
msprobe/mindspore/free_benchmark/decorator/dec_forward.py +57 -42
msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +122 -107
msprobe/mindspore/free_benchmark/handler/base_handler.py +105 -90
msprobe/mindspore/free_benchmark/handler/check_handler.py +56 -41
msprobe/mindspore/free_benchmark/handler/fix_handler.py +51 -36
msprobe/mindspore/free_benchmark/handler/handler_factory.py +36 -21
msprobe/mindspore/free_benchmark/perturbation/add_noise.py +82 -67
msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +36 -21
msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +78 -63
msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +77 -0
msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +49 -34
msprobe/mindspore/free_benchmark/perturbation/no_change.py +27 -12
msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +44 -27
msprobe/mindspore/free_benchmark/self_check_tool_factory.py +48 -33
msprobe/mindspore/grad_probe/global_context.py +100 -91
msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
msprobe/mindspore/grad_probe/hook.py +94 -92
msprobe/mindspore/grad_probe/utils.py +29 -28
msprobe/mindspore/ms_config.py +128 -126
msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +60 -45
msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +49 -34
msprobe/mindspore/runtime.py +4 -4
msprobe/mindspore/service.py +297 -354
msprobe/mindspore/task_handler_factory.py +24 -24
msprobe/msprobe.py +105 -107
msprobe/pytorch/__init__.py +23 -4
msprobe/pytorch/api_accuracy_checker/common/config.py +70 -55
msprobe/pytorch/api_accuracy_checker/common/utils.py +246 -165
msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +230 -213
msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +632 -581
msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
msprobe/pytorch/api_accuracy_checker/compare/compare.py +416 -381
msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +90 -73
msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +265 -244
msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +370 -332
msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +221 -199
msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +150 -134
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +518 -581
msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +213 -74
msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +218 -202
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +370 -324
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +227 -204
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +244 -218
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
msprobe/pytorch/bench_functions/__init__.py +30 -15
msprobe/pytorch/bench_functions/apply_adam_w.py +43 -28
msprobe/pytorch/bench_functions/confusion_transpose.py +34 -19
msprobe/pytorch/bench_functions/fast_gelu.py +70 -55
msprobe/pytorch/bench_functions/layer_norm_eval.py +21 -6
msprobe/pytorch/bench_functions/linear.py +27 -12
msprobe/pytorch/bench_functions/matmul_backward.py +63 -48
msprobe/pytorch/bench_functions/npu_fusion_attention.py +538 -421
msprobe/pytorch/bench_functions/rms_norm.py +30 -15
msprobe/pytorch/bench_functions/rotary_mul.py +71 -52
msprobe/pytorch/bench_functions/scaled_mask_softmax.py +41 -26
msprobe/pytorch/bench_functions/swiglu.py +70 -55
msprobe/pytorch/common/__init__.py +17 -2
msprobe/pytorch/common/compare_script.template +14 -14
msprobe/pytorch/common/log.py +33 -32
msprobe/pytorch/common/parse_json.py +54 -39
msprobe/pytorch/common/utils.py +310 -300
msprobe/pytorch/compare/distributed_compare.py +66 -66
msprobe/pytorch/compare/mapping.yaml +607 -607
msprobe/pytorch/compare/match.py +49 -33
msprobe/pytorch/compare/pt_compare.py +82 -40
msprobe/pytorch/debugger/debugger_config.py +108 -95
msprobe/pytorch/debugger/precision_debugger.py +173 -125
msprobe/pytorch/free_benchmark/__init__.py +23 -8
msprobe/pytorch/free_benchmark/common/constant.py +70 -70
msprobe/pytorch/free_benchmark/common/counter.py +71 -71
msprobe/pytorch/free_benchmark/common/enums.py +65 -37
msprobe/pytorch/free_benchmark/common/params.py +144 -129
msprobe/pytorch/free_benchmark/common/utils.py +118 -102
msprobe/pytorch/free_benchmark/compare/grad_saver.py +200 -179
msprobe/pytorch/free_benchmark/compare/single_benchmark.py +119 -104
msprobe/pytorch/free_benchmark/main.py +120 -105
msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +28 -13
msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +56 -41
msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +105 -90
msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +119 -104
msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +87 -63
msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +83 -68
msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +43 -28
msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +60 -45
msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +34 -19
msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +256 -217
msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +54 -39
msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +38 -23
msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +45 -30
msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +185 -170
msprobe/pytorch/function_factory.py +91 -75
msprobe/pytorch/functional/module_dump.py +84 -0
msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
msprobe/pytorch/hook_module/__init__.py +16 -1
msprobe/pytorch/hook_module/api_registry.py +166 -161
msprobe/pytorch/hook_module/hook_module.py +118 -120
msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
msprobe/pytorch/hook_module/utils.py +28 -29
msprobe/pytorch/hook_module/wrap_aten.py +111 -110
msprobe/pytorch/hook_module/wrap_distributed.py +77 -78
msprobe/pytorch/hook_module/wrap_functional.py +104 -105
msprobe/pytorch/hook_module/wrap_npu_custom.py +85 -84
msprobe/pytorch/hook_module/wrap_tensor.py +69 -71
msprobe/pytorch/hook_module/wrap_torch.py +84 -86
msprobe/pytorch/hook_module/wrap_vf.py +60 -62
msprobe/pytorch/module_processer.py +153 -138
msprobe/pytorch/online_dispatch/__init__.py +20 -20
msprobe/pytorch/online_dispatch/compare.py +235 -236
msprobe/pytorch/online_dispatch/dispatch.py +271 -271
msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
msprobe/pytorch/online_dispatch/single_compare.py +391 -391
msprobe/pytorch/online_dispatch/torch_ops_config.yaml +57 -49
msprobe/pytorch/online_dispatch/utils.py +127 -146
msprobe/pytorch/parse.py +19 -4
msprobe/pytorch/parse_tool/cli.py +31 -32
msprobe/pytorch/parse_tool/lib/compare.py +259 -271
msprobe/pytorch/parse_tool/lib/config.py +52 -52
msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
msprobe/pytorch/parse_tool/lib/parse_tool.py +161 -158
msprobe/pytorch/parse_tool/lib/utils.py +320 -321
msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
msprobe/pytorch/pt_config.py +317 -187
msprobe/pytorch/service.py +311 -252
mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
msprobe/config/README.md +0 -539
msprobe/mindspore/doc/compare.md +0 -58
msprobe/mindspore/doc/dump.md +0 -217
msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
msprobe/pytorch/doc/FAQ.md +0 -193
msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
msprobe/pytorch/doc/dump.md +0 -260
msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
msprobe/pytorch/doc/run_overflow_check.md +0 -25
msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
msprobe/pytorch/functional/data_processor.py +0 -0
msprobe/pytorch/functional/dump_module.py +0 -39
{mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
/msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
/msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
/msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
/msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
/msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
/msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
/msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
/msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
/msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
/msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
/msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
/msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
/msprobe/{config → docs}/img/free_benchmark.png +0 -0
/msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
/msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
/msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
/msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
/msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
/msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py CHANGED Viewed

@@ -1,204 +1,227 @@
-import time
-from collections import namedtuple
-import pandas as pd
-import torch
-import torch.multiprocessing as mp
-from msprobe.core.common.const import Const
-from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import online_api_precision_compare
-from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TEST_ROWS, thousandth_standard_api, \
-    binary_standard_api, absolute_standard_api
-from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
-from msprobe.pytorch.common.log import logger
-from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
-# NPU vs GPU api list
-CompareApi = set(absolute_standard_api) | set(binary_standard_api) | set(thousandth_standard_api)
-current_time = time.strftime("%Y%m%d%H%M%S")
-ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME = "api_precision_compare_result_" + current_time + "_rank*.csv"
-ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME = "api_precision_compare_details_" + current_time + "_rank*.csv"
-OnlineApiPrecisionCompareConfig = namedtuple('OnlineApiPrecisionCompareConfig',
-                                             ['npu_data', 'gpu_data', 'rank', 'result_csv_path', 'details_csv_path'])
-# namedtuple of [instance of Comparator, func of run_touch_api_online, config of run_ut_config]
-CommonCompareConfig = namedtuple('CommonCompareConfig', ['compare', 'handle_func', 'config'])
-def run_ut_process(xpu_id, consumer_queue, common_config, api_precision_csv_file):
-    """ When consumer_queue(shared with ConsumerDispatcher) is not empty, consume api data from consumer_queue.
-    :param xpu_id: int
-    :param consumer_queue: shared queues of ConsumerDispatcher
-    :param common_config: namedtuple of CommonCompareConfig
-    :param api_precision_csv_file: list, length is 2, result file name and details file name
-    :return:
-    """
-    gpu_device = torch.device(f'cuda:{xpu_id}')
-    while True:
-        if consumer_queue.empty():
-            time.sleep(0.1)
-            continue
-        api_data = consumer_queue.get()
-        if api_data == "KILL_":
-            # current consumer finish
-            return
-        _, api_name, _ = api_data.name.split(Const.SEP)
-        if api_name in CompareApi:
-            # NPU vs GPU
-            online_compare(api_data, gpu_device, common_config)
-        else:
-            # NPUvsCPU vs GPUvsCPU
-            online_precision_compare(api_data, gpu_device, common_config, api_precision_csv_file)
-def online_precision_compare(api_data, device, common_config, api_precision_csv_file):
-    """online run_ut for precision_compare: NPUvsCPU vs GPUvsCPU
-    1. get NPUvsCPU compare result
-    2. get GPUvsCPU compare result
-    3. call online_api_precision_compare
-    :param api_data
-    :param device
-    :param common_config: namedtuple of CommonCompareConfig
-    :param api_precision_csv_file: [result_file_name, details_file_name]
-    """
-    compare, func, config = common_config.compare, common_config.handle_func, common_config.config
-    api_full_name = api_data.name
-    [api_type, api_name, _] = api_full_name.split(Const.SEP)
-    npu_args, npu_kwargs, npu_out = api_data.args, api_data.kwargs, api_data.result
-    if npu_kwargs.get("device"):
-        del npu_kwargs["device"]
-    try:
-        # NPU vs CPU
-        cpu_out = exec_api(api_type, api_name, npu_args, npu_kwargs)
-        npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
-        npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
-        npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])
-        # GPU vs CPU
-        api_data_gpu = move2target_device(api_data, device)  # args, kwargs -> gpu, result -> npu
-        data_info = func(api_full_name, api_data_gpu, config.backward_content)
-        gpu_out = data_info.bench_output
-        gpu_data_info = UtDataInfo(None, None, gpu_out, cpu_out, None, [], None, rank=api_data.rank)
-        gpu_detail = compare.compare_output(api_full_name, gpu_data_info, True)
-        gpu_data = pd.DataFrame(gpu_detail, columns=DETAIL_TEST_ROWS[-1])
-        # NPUvsCPU vs GPUvsCPU
-        result_file_name, details_file_name = api_precision_csv_file
-        precision_compare_config = OnlineApiPrecisionCompareConfig(npu_data, gpu_data, api_data.rank,
-                                                                   result_file_name, details_file_name)
-        online_api_precision_compare(precision_compare_config)
-    except Exception as err:
-        if "expected scalar type Long" in str(err):
-            logger.warning(
-                f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
-                f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
-        elif api_type in [Const.DISTRIBUTED]:
-            logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
-        else:
-            logger.error(f"Run {api_full_name} UT Error: {str(err)}")
-        compare.write_summary_csv((api_full_name, "SKIP", "SKIP", [[str(err)]], api_data.rank))
-    finally:
-        torch.cuda.empty_cache()
-def online_compare(api_data, device, common_config):
-    """online run_ut for compare：NPU vs GPU
-    """
-    compare, func, config = common_config.compare, common_config.handle_func, common_config.config
-    api_full_name = api_data.name
-    api_data = move2target_device(api_data, device)
-    try:
-        data_info = func(api_full_name, api_data, config.backward_content)
-        is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info)
-        logger.info(f"running api_full_name {api_full_name} ut, "
-                    f"is_fwd_success: {is_fwd_success}, "
-                    f"is_bwd_success: {is_bwd_success}")
-    except Exception as err:
-        [api_type, api_name, _] = api_full_name.split(Const.SEP)
-        if "expected scalar type Long" in str(err):
-            logger.warning(
-                f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
-                f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
-        elif api_type in [Const.DISTRIBUTED]:
-            logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
-        else:
-            logger.error(f"Run {api_full_name} UT Error: {str(err)}")
-        compare.write_summary_csv((api_full_name, "SKIP", "SKIP", [[str(err)]], api_data.rank))
-    finally:
-        torch.cuda.empty_cache()
-class ConsumerDispatcher:
-    def __init__(self, compare, capacity=10, num_workers=8, device: str = "gpu") -> None:
-        self.num_workers = num_workers
-        self.capacity = capacity
-        self.compare = compare
-        self.queues = []
-        self.processes = []
-        self.reverse_sort = False
-        self.pool = None
-        self.device = device
-        self.data_id = 0
-        self.lock = mp.Lock()
-        self.result_queue = mp.Queue()
-        mp.set_start_method("spawn", force=True)
-    def start(self, handle_func, config):
-        self.queues = [mp.Queue(maxsize=self.capacity) for _ in range(self.num_workers)]
-        api_precision_csv_file = [ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME, ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME]
-        common_config = CommonCompareConfig(self.compare, handle_func, config)
-        for xpu_id, q in enumerate(self.queues):
-            p = mp.Process(name="run_ut_process", target=run_ut_process,
-                           args=(xpu_id, q, common_config, api_precision_csv_file))
-            p.start()
-            self.processes.append(p)
-        logger.info(f"Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
-        logger.info(f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
-        logger.info("Successfully start unittest process.")
-    def stop(self):
-        for q in self.queues:
-            while q.full():
-                time.sleep(0.1)
-            q.put("KILL_")
-        for p in self.processes:
-            p.join()
-        logger.info("Successfully stop unittest process.")
-        logger.info(f"Api_precision_compare task result is saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
-        logger.info(f"Api_precision_compare task details is saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
-    def update_consume_queue(self, api_data):
-        while True:
-            index = self._choose_max_empty_site_strategy()
-            if index != -1:
-                q = self.queues[index]
-                q.put(api_data)
-                break
-            time.sleep(0.1)
-    def _choose_max_empty_site_strategy(self):
-        maximum = 0
-        index = -1
-        # 充分利用多卡资源，防止任务过多分配给前面的卡
-        _reverse = 1 if not self.reverse_sort else -1
-        for i, q in enumerate(self.queues[::_reverse]):
-            empty_site = self.capacity - q.qsize()
-            if empty_site > maximum:
-                maximum = empty_site
-                index = i
-        index = len(self.queues) - index - 1 if index != -1 and self.reverse_sort else index
-        self.reverse_sort = not self.reverse_sort
-        return index
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+from collections import namedtuple
+import pandas as pd
+import torch
+import torch.multiprocessing as mp
+from msprobe.core.common.const import Const, CompareConst
+from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import online_api_precision_compare
+from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TEST_ROWS, thousandth_standard_api, \
+    binary_standard_api, absolute_standard_api
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
+from msprobe.pytorch.common.log import logger
+from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
+from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params
+# NPU vs GPU api list
+CompareApi = set(absolute_standard_api) | set(binary_standard_api) | set(thousandth_standard_api)
+current_time = time.strftime("%Y%m%d%H%M%S")
+ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME = "api_precision_compare_result_" + current_time + "_rank*.csv"
+ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME = "api_precision_compare_details_" + current_time + "_rank*.csv"
+OnlineApiPrecisionCompareConfig = namedtuple('OnlineApiPrecisionCompareConfig',
+                                             ['npu_data', 'gpu_data', 'rank', 'result_csv_path', 'details_csv_path'])
+# namedtuple of [instance of Comparator, func of run_touch_api_online, config of run_ut_config]
+CommonCompareConfig = namedtuple('CommonCompareConfig', ['compare', 'handle_func', 'config'])
+def run_ut_process(xpu_id, consumer_queue, common_config, api_precision_csv_file):
+    """ When consumer_queue(shared with ConsumerDispatcher) is not empty, consume api data from consumer_queue.
+    :param xpu_id: int
+    :param consumer_queue: shared queues of ConsumerDispatcher
+    :param common_config: namedtuple of CommonCompareConfig
+    :param api_precision_csv_file: list, length is 2, result file name and details file name
+    :return:
+    """
+    gpu_device = torch.device(f'cuda:{xpu_id}')
+    while True:
+        if consumer_queue.empty():
+            time.sleep(0.1)
+            continue
+        api_data = consumer_queue.get()
+        if api_data == "KILL_":
+            # current consumer finish
+            return
+        _, api_name, _ = api_data.name.split(Const.SEP)
+        if api_name in CompareApi:
+            # NPU vs GPU
+            online_compare(api_data, gpu_device, common_config)
+        else:
+            # NPUvsCPU vs GPUvsCPU
+            online_precision_compare(api_data, gpu_device, common_config, api_precision_csv_file)
+def online_precision_compare(api_data, device, common_config, api_precision_csv_file):
+    """online run_ut for precision_compare: NPUvsCPU vs GPUvsCPU
+    1. get NPUvsCPU compare result
+    2. get GPUvsCPU compare result
+    3. call online_api_precision_compare
+    :param api_data
+    :param device
+    :param common_config: namedtuple of CommonCompareConfig
+    :param api_precision_csv_file: [result_file_name, details_file_name]
+    """
+    compare, func, config = common_config.compare, common_config.handle_func, common_config.config
+    api_full_name = api_data.name
+    [api_type, api_name, _] = api_full_name.split(Const.SEP)
+    npu_args, npu_kwargs, npu_out = api_data.args, api_data.kwargs, api_data.result
+    if npu_kwargs.get("device"):
+        del npu_kwargs["device"]
+    try:
+        # NPU vs CPU
+        cpu_args, cpu_kwargs = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
+        cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
+        npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
+        npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
+        npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])
+        # GPU vs CPU
+        api_data_gpu = move2target_device(api_data, device)  # args, kwargs -> gpu, result -> npu
+        data_info = func(api_full_name, api_data_gpu, config.backward_content)
+        gpu_out = data_info.bench_output
+        gpu_data_info = UtDataInfo(None, None, gpu_out, cpu_out, None, [], None, rank=api_data.rank)
+        gpu_detail = compare.compare_output(api_full_name, gpu_data_info, True)
+        gpu_data = pd.DataFrame(gpu_detail, columns=DETAIL_TEST_ROWS[-1])
+        # NPUvsCPU vs GPUvsCPU
+        result_file_name, details_file_name = api_precision_csv_file
+        precision_compare_config = OnlineApiPrecisionCompareConfig(npu_data, gpu_data, api_data.rank,
+                                                                   result_file_name, details_file_name)
+        online_api_precision_compare(precision_compare_config)
+    except Exception as err:
+        if "expected scalar type Long" in str(err):
+            logger.warning(
+                f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
+                f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
+        elif api_type in [Const.DISTRIBUTED]:
+            logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
+        else:
+            logger.error(f"Run {api_full_name} UT Error: {str(err)}")
+        compare.write_summary_csv((api_full_name, CompareConst.SKIP, CompareConst.SKIP, [[str(err)]], api_data.rank))
+    finally:
+        torch.cuda.empty_cache()
+def online_compare(api_data, device, common_config):
+    """online run_ut for compare：NPU vs GPU
+    """
+    compare, func, config = common_config.compare, common_config.handle_func, common_config.config
+    api_full_name = api_data.name
+    api_data = move2target_device(api_data, device)
+    try:
+        data_info = func(api_full_name, api_data, config.backward_content)
+        is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info)
+        logger.info(f"running api_full_name {api_full_name} ut, "
+                    f"is_fwd_success: {is_fwd_success}, "
+                    f"is_bwd_success: {is_bwd_success}")
+    except Exception as err:
+        [api_type, api_name, _] = api_full_name.split(Const.SEP)
+        if "expected scalar type Long" in str(err):
+            logger.warning(
+                f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
+                f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
+        elif api_type in [Const.DISTRIBUTED]:
+            logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
+        else:
+            logger.error(f"Run {api_full_name} UT Error: {str(err)}")
+        compare.write_summary_csv((api_full_name, CompareConst.SKIP, CompareConst.SKIP, [[str(err)]], api_data.rank))
+    finally:
+        torch.cuda.empty_cache()
+class ConsumerDispatcher:
+    def __init__(self, compare, capacity=10, num_workers=8, device: str = "gpu") -> None:
+        self.num_workers = num_workers
+        self.capacity = capacity
+        self.compare = compare
+        self.queues = []
+        self.processes = []
+        self.reverse_sort = False
+        self.pool = None
+        self.device = device
+        self.data_id = 0
+        self.lock = mp.Lock()
+        self.result_queue = mp.Queue()
+        mp.set_start_method("spawn", force=True)
+    def start(self, handle_func, config):
+        self.queues = [mp.Queue(maxsize=self.capacity) for _ in range(self.num_workers)]
+        api_precision_csv_file = [
+            ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME,
+            ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME
+        ]
+        common_config = CommonCompareConfig(self.compare, handle_func, config)
+        for xpu_id, q in enumerate(self.queues):
+            p = mp.Process(name="run_ut_process", target=run_ut_process,
+                           args=(xpu_id, q, common_config, api_precision_csv_file))
+            p.start()
+            self.processes.append(p)
+        logger.info(
+            f'Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}')
+        logger.info(
+            f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
+        logger.info("Successfully start unittest process.")
+    def stop(self):
+        for q in self.queues:
+            while q.full():
+                time.sleep(0.1)
+            q.put("KILL_")
+        for p in self.processes:
+            p.join()
+        logger.info("Successfully stop unittest process.")
+        logger.info(f"Api_precision_compare task result is saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
+        logger.info(f"Api_precision_compare task details is saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
+    def update_consume_queue(self, api_data):
+        while True:
+            index = self._choose_max_empty_site_strategy()
+            if index != -1:
+                q = self.queues[index]
+                q.put(api_data)
+                break
+            time.sleep(0.1)
+    def _choose_max_empty_site_strategy(self):
+        maximum = 0
+        index = -1
+        # 充分利用多卡资源，防止任务过多分配给前面的卡
+        _reverse = 1 if not self.reverse_sort else -1
+        for i, q in enumerate(self.queues[::_reverse]):
+            empty_site = self.capacity - q.qsize()
+            if empty_site > maximum:
+                maximum = empty_site
+                index = i
+        index = len(self.queues) - index - 1 if index != -1 and self.reverse_sort else index
+        self.reverse_sort = not self.reverse_sort
+        return index

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py ADDED Viewed

@@ -0,0 +1,110 @@
+# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0  (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from functools import wraps
+import torch
+from torch.utils._python_dispatch import TorchDispatchMode
+from msprobe.pytorch.api_accuracy_checker.common.utils import ApiData
+from msprobe.pytorch.common.utils import get_tensor_rank
+from msprobe.core.common.const import Const
+from msprobe.pytorch.common.log import logger
+from msprobe.core.common.file_utils import load_yaml
+def singleton(cls):
+    _instance = {}
+    @wraps(cls)
+    def inner():
+        if cls not in _instance:
+            _instance[cls] = cls()
+        return _instance[cls]
+    return inner
+@singleton
+class Counter:
+    def __init__(self) -> None:
+        self.index_dict = {}
+counter = Counter()
+yaml_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "torch_ops_config.yaml")
+yaml_file = load_yaml(yaml_path)
+class AccuracyCheckerDispatch(TorchDispatchMode):
+    def __init__(self, attl):
+        super(AccuracyCheckerDispatch, self).__init__()
+        self.attl = attl
+        self.counter = counter
+        self.aten_ops_blacklist = []
+        self.npu_adjust_autogard = []
+        self.aten_ops_blacklist = yaml_file.get('aten_ops_blacklist', [])
+        self.npu_adjust_autogard = yaml_file.get('npu_adjust_autogard', [])
+    def __torch_dispatch__(self, func, types, args=None, kwargs=None):
+        func_name_split_list = func.__name__.split(Const.SEP)
+        aten_api = func_name_split_list[0]
+        self.enable_autogard(aten_api)
+        if aten_api in self.aten_ops_blacklist:
+            npu_out = func(*args, **kwargs)
+            return npu_out
+        res = func(*args, **kwargs)
+        cur_rank = get_tensor_rank(args, res)
+        cur_api_number = self.counter.index_dict.setdefault(aten_api, 0)
+        api_name = f'{Const.ATEN}{Const.SEP}{aten_api}{Const.SEP}{cur_api_number}'
+        logger.info(f"tools is dumping api: {api_name}")
+        api_data = ApiData(api_name, args, kwargs, res, 0, cur_rank)
+        if "device" in api_data.kwargs:
+            api_data.kwargs.pop("device")
+        if self.attl.nfs_path:
+            self.attl.upload(api_data)
+        else:
+            self.attl.send(api_data)
+        self.counter.index_dict[aten_api] += 1
+        return res
+    def enable_autogard(self, aten_api):
+        if aten_api in self.npu_adjust_autogard:
+            torch._C._dispatch_tls_set_dispatch_key_excluded(torch._C.DispatchKey.AutogradFunctionality, False)
+def dispatch4data(func, attl, status):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        if not status:
+            return func(*args, **kwargs)
+        with AccuracyCheckerDispatch(attl):
+            res = func(*args, **kwargs)
+            return res
+    return wrapper
+def run_ut_dispatch(attl, status):
+    """
+    This function called by online_run_ut.
+    It is used to enable or disable dispatch for torch.autograd.backward function.
+    Args:
+        attl (ATTL):  online_run_ut class ATTL, which is used to upload or send api data to server.
+        status (bool): True means enable dispatch, False means disable dispatch.
+    """
+    torch.autograd.backward = dispatch4data(torch.autograd.backward, attl, status)

mindstudio-probe 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

mindstudio-probe 1.0.3py3-none-any.whl → 1.1.0py3-none-any.whl