mindstudio-probe 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/LICENSE +201 -201
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/METADATA +36 -34
- mindstudio_probe-1.0.4.dist-info/RECORD +276 -0
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/WHEEL +1 -1
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/entry_points.txt +1 -0
- msprobe/README.md +101 -237
- msprobe/{config/config.json → config.json} +49 -49
- msprobe/core/advisor/advisor.py +124 -124
- msprobe/core/advisor/advisor_const.py +59 -59
- msprobe/core/advisor/advisor_result.py +58 -58
- msprobe/core/common/const.py +341 -318
- msprobe/core/common/exceptions.py +99 -99
- msprobe/core/common/{file_check.py → file_utils.py} +478 -283
- msprobe/core/common/log.py +76 -69
- msprobe/core/common/utils.py +385 -616
- msprobe/core/common_config.py +85 -71
- msprobe/core/compare/acc_compare.py +299 -298
- msprobe/core/compare/check.py +95 -95
- msprobe/core/compare/compare_cli.py +49 -49
- msprobe/core/compare/highlight.py +223 -222
- msprobe/core/compare/multiprocessing_compute.py +149 -149
- msprobe/core/compare/npy_compare.py +295 -295
- msprobe/core/compare/utils.py +430 -429
- msprobe/core/data_dump/data_collector.py +154 -144
- msprobe/core/data_dump/data_processor/base.py +314 -293
- msprobe/core/data_dump/data_processor/factory.py +59 -59
- msprobe/core/data_dump/data_processor/mindspore_processor.py +186 -198
- msprobe/core/data_dump/data_processor/pytorch_processor.py +366 -389
- msprobe/core/data_dump/json_writer.py +96 -116
- msprobe/core/data_dump/scope.py +178 -178
- msprobe/core/grad_probe/constant.py +70 -70
- msprobe/core/grad_probe/grad_compare.py +171 -175
- msprobe/core/grad_probe/utils.py +64 -52
- msprobe/docs/01.installation.md +89 -0
- msprobe/docs/02.config_introduction.md +165 -0
- msprobe/docs/03.config_examples.md +247 -0
- msprobe/docs/04.acl_config_examples.md +76 -0
- msprobe/docs/05.data_dump_PyTorch.md +198 -0
- msprobe/docs/06.data_dump_MindSpore.md +243 -0
- msprobe/docs/07.accuracy_checker_PyTorch.md +274 -0
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +198 -0
- msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
- msprobe/docs/10.accuracy_compare_PyTorch.md +245 -0
- msprobe/docs/11.accuracy_compare_MindSpore.md +202 -0
- msprobe/docs/12.overflow_check_PyTorch.md +79 -0
- msprobe/docs/13.overflow_check_MindSpore.md +31 -0
- msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
- msprobe/docs/15.free_benchmarking_PyTorch.md +164 -0
- msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +207 -207
- msprobe/docs/FAQ_PyTorch.md +177 -0
- msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
- msprobe/docs/img/free_benchmark_framework.png +0 -0
- msprobe/mindspore/__init__.py +1 -1
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +254 -245
- msprobe/mindspore/api_accuracy_checker/api_info.py +69 -69
- msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
- msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
- msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
- msprobe/mindspore/api_accuracy_checker/main.py +8 -15
- msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
- msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
- msprobe/mindspore/cell_processor.py +34 -34
- msprobe/mindspore/common/const.py +106 -87
- msprobe/mindspore/common/log.py +37 -37
- msprobe/mindspore/common/utils.py +81 -57
- msprobe/mindspore/compare/distributed_compare.py +75 -75
- msprobe/mindspore/compare/ms_compare.py +219 -117
- msprobe/mindspore/compare/ms_graph_compare.py +348 -317
- msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
- msprobe/mindspore/debugger/debugger_config.py +66 -74
- msprobe/mindspore/debugger/precision_debugger.py +126 -107
- msprobe/mindspore/dump/dump_tool_factory.py +35 -35
- msprobe/mindspore/dump/hook_cell/api_registry.py +118 -104
- msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +922 -925
- msprobe/mindspore/dump/hook_cell/wrap_api.py +113 -0
- msprobe/mindspore/dump/jit_dump.py +72 -56
- msprobe/mindspore/dump/kernel_graph_dump.py +59 -60
- msprobe/mindspore/dump/kernel_kbyk_dump.py +64 -65
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -116
- msprobe/mindspore/free_benchmark/common/config.py +12 -12
- msprobe/mindspore/free_benchmark/common/handler_params.py +17 -17
- msprobe/mindspore/free_benchmark/common/utils.py +71 -71
- msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
- msprobe/mindspore/free_benchmark/decorator/dec_forward.py +43 -42
- msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -107
- msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -90
- msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -41
- msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -36
- msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -21
- msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -67
- msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -21
- msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -63
- msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +51 -0
- msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +35 -34
- msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -12
- msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +29 -27
- msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -33
- msprobe/mindspore/grad_probe/global_context.py +90 -91
- msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
- msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
- msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
- msprobe/mindspore/grad_probe/hook.py +94 -92
- msprobe/mindspore/grad_probe/utils.py +29 -28
- msprobe/mindspore/ms_config.py +128 -126
- msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +44 -45
- msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +34 -34
- msprobe/mindspore/runtime.py +4 -4
- msprobe/mindspore/service.py +378 -354
- msprobe/mindspore/task_handler_factory.py +24 -24
- msprobe/msprobe.py +105 -107
- msprobe/pytorch/__init__.py +3 -3
- msprobe/pytorch/api_accuracy_checker/common/config.py +53 -55
- msprobe/pytorch/api_accuracy_checker/common/utils.py +214 -165
- msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +213 -213
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +606 -581
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +386 -381
- msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +73 -73
- msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +245 -244
- msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
- msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +335 -332
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +200 -199
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +133 -134
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +592 -581
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +70 -74
- msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +197 -202
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +325 -324
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -204
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +219 -218
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -10
- msprobe/pytorch/bench_functions/__init__.py +15 -15
- msprobe/pytorch/bench_functions/apply_adam_w.py +28 -28
- msprobe/pytorch/bench_functions/confusion_transpose.py +19 -19
- msprobe/pytorch/bench_functions/fast_gelu.py +55 -55
- msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -6
- msprobe/pytorch/bench_functions/linear.py +12 -12
- msprobe/pytorch/bench_functions/matmul_backward.py +48 -48
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +509 -421
- msprobe/pytorch/bench_functions/rms_norm.py +15 -15
- msprobe/pytorch/bench_functions/rotary_mul.py +52 -52
- msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -26
- msprobe/pytorch/bench_functions/swiglu.py +55 -55
- msprobe/pytorch/common/__init__.py +2 -2
- msprobe/pytorch/common/compare_script.template +14 -14
- msprobe/pytorch/common/log.py +20 -31
- msprobe/pytorch/common/parse_json.py +39 -39
- msprobe/pytorch/common/utils.py +305 -300
- msprobe/pytorch/compare/distributed_compare.py +66 -66
- msprobe/pytorch/compare/mapping.yaml +607 -607
- msprobe/pytorch/compare/match.py +34 -33
- msprobe/pytorch/compare/pt_compare.py +50 -40
- msprobe/pytorch/debugger/debugger_config.py +95 -95
- msprobe/pytorch/debugger/precision_debugger.py +125 -125
- msprobe/pytorch/free_benchmark/__init__.py +8 -8
- msprobe/pytorch/free_benchmark/common/constant.py +70 -70
- msprobe/pytorch/free_benchmark/common/counter.py +71 -71
- msprobe/pytorch/free_benchmark/common/enums.py +37 -37
- msprobe/pytorch/free_benchmark/common/params.py +129 -129
- msprobe/pytorch/free_benchmark/common/utils.py +102 -102
- msprobe/pytorch/free_benchmark/compare/grad_saver.py +179 -179
- msprobe/pytorch/free_benchmark/compare/single_benchmark.py +104 -104
- msprobe/pytorch/free_benchmark/main.py +105 -105
- msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +13 -13
- msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +41 -41
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +90 -90
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +104 -104
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +63 -63
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +68 -68
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +28 -28
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +45 -45
- msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +19 -19
- msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +217 -217
- msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +39 -39
- msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +23 -23
- msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +30 -30
- msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +170 -170
- msprobe/pytorch/function_factory.py +76 -75
- msprobe/pytorch/functional/dump_module.py +39 -39
- msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
- msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
- msprobe/pytorch/hook_module/api_registry.py +161 -161
- msprobe/pytorch/hook_module/hook_module.py +120 -120
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
- msprobe/pytorch/hook_module/utils.py +30 -29
- msprobe/pytorch/hook_module/wrap_aten.py +110 -110
- msprobe/pytorch/hook_module/wrap_distributed.py +78 -78
- msprobe/pytorch/hook_module/wrap_functional.py +105 -105
- msprobe/pytorch/hook_module/wrap_npu_custom.py +93 -84
- msprobe/pytorch/hook_module/wrap_tensor.py +71 -71
- msprobe/pytorch/hook_module/wrap_torch.py +86 -86
- msprobe/pytorch/hook_module/wrap_vf.py +62 -62
- msprobe/pytorch/module_processer.py +138 -138
- msprobe/pytorch/online_dispatch/__init__.py +20 -20
- msprobe/pytorch/online_dispatch/compare.py +236 -236
- msprobe/pytorch/online_dispatch/dispatch.py +271 -271
- msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
- msprobe/pytorch/online_dispatch/single_compare.py +391 -391
- msprobe/pytorch/online_dispatch/torch_ops_config.yaml +49 -49
- msprobe/pytorch/online_dispatch/utils.py +130 -146
- msprobe/pytorch/parse.py +4 -4
- msprobe/pytorch/parse_tool/cli.py +32 -32
- msprobe/pytorch/parse_tool/lib/compare.py +260 -271
- msprobe/pytorch/parse_tool/lib/config.py +52 -52
- msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
- msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
- msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
- msprobe/pytorch/parse_tool/lib/parse_tool.py +158 -158
- msprobe/pytorch/parse_tool/lib/utils.py +316 -321
- msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
- msprobe/pytorch/pt_config.py +188 -187
- msprobe/pytorch/service.py +246 -252
- mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
- msprobe/config/README.md +0 -539
- msprobe/mindspore/doc/compare.md +0 -58
- msprobe/mindspore/doc/dump.md +0 -217
- msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
- msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
- msprobe/pytorch/doc/FAQ.md +0 -193
- msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
- msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
- msprobe/pytorch/doc/dump.md +0 -260
- msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
- msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
- msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
- msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
- msprobe/pytorch/doc/run_overflow_check.md +0 -25
- msprobe/pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md +0 -90
- msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/top_level.txt +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
- /msprobe/{config → docs}/img/free_benchmark.png +0 -0
- /msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
- /msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
- /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
|
@@ -1,204 +1,204 @@
|
|
|
1
|
-
import time
|
|
2
|
-
from collections import namedtuple
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import torch
|
|
6
|
-
import torch.multiprocessing as mp
|
|
7
|
-
|
|
8
|
-
from msprobe.core.common.const import Const
|
|
9
|
-
from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import online_api_precision_compare
|
|
10
|
-
from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TEST_ROWS, thousandth_standard_api, \
|
|
11
|
-
binary_standard_api, absolute_standard_api
|
|
12
|
-
from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
|
|
13
|
-
from msprobe.pytorch.common.log import logger
|
|
14
|
-
from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
|
|
15
|
-
|
|
16
|
-
# NPU vs GPU api list
|
|
17
|
-
CompareApi = set(absolute_standard_api) | set(binary_standard_api) | set(thousandth_standard_api)
|
|
18
|
-
|
|
19
|
-
current_time = time.strftime("%Y%m%d%H%M%S")
|
|
20
|
-
ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME = "api_precision_compare_result_" + current_time + "_rank*.csv"
|
|
21
|
-
ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME = "api_precision_compare_details_" + current_time + "_rank*.csv"
|
|
22
|
-
|
|
23
|
-
OnlineApiPrecisionCompareConfig = namedtuple('OnlineApiPrecisionCompareConfig',
|
|
24
|
-
['npu_data', 'gpu_data', 'rank', 'result_csv_path', 'details_csv_path'])
|
|
25
|
-
# namedtuple of [instance of Comparator, func of run_touch_api_online, config of run_ut_config]
|
|
26
|
-
CommonCompareConfig = namedtuple('CommonCompareConfig', ['compare', 'handle_func', 'config'])
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def run_ut_process(xpu_id, consumer_queue, common_config, api_precision_csv_file):
|
|
30
|
-
""" When consumer_queue(shared with ConsumerDispatcher) is not empty, consume api data from consumer_queue.
|
|
31
|
-
:param xpu_id: int
|
|
32
|
-
:param consumer_queue: shared queues of ConsumerDispatcher
|
|
33
|
-
:param common_config: namedtuple of CommonCompareConfig
|
|
34
|
-
:param api_precision_csv_file: list, length is 2, result file name and details file name
|
|
35
|
-
:return:
|
|
36
|
-
"""
|
|
37
|
-
gpu_device = torch.device(f'cuda:{xpu_id}')
|
|
38
|
-
|
|
39
|
-
while True:
|
|
40
|
-
if consumer_queue.empty():
|
|
41
|
-
time.sleep(0.1)
|
|
42
|
-
continue
|
|
43
|
-
|
|
44
|
-
api_data = consumer_queue.get()
|
|
45
|
-
if api_data == "KILL_":
|
|
46
|
-
# current consumer finish
|
|
47
|
-
return
|
|
48
|
-
|
|
49
|
-
_, api_name, _ = api_data.name.split(Const.SEP)
|
|
50
|
-
if api_name in CompareApi:
|
|
51
|
-
# NPU vs GPU
|
|
52
|
-
online_compare(api_data, gpu_device, common_config)
|
|
53
|
-
else:
|
|
54
|
-
# NPUvsCPU vs GPUvsCPU
|
|
55
|
-
online_precision_compare(api_data, gpu_device, common_config, api_precision_csv_file)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def online_precision_compare(api_data, device, common_config, api_precision_csv_file):
|
|
59
|
-
"""online run_ut for precision_compare: NPUvsCPU vs GPUvsCPU
|
|
60
|
-
1. get NPUvsCPU compare result
|
|
61
|
-
2. get GPUvsCPU compare result
|
|
62
|
-
3. call online_api_precision_compare
|
|
63
|
-
:param api_data
|
|
64
|
-
:param device
|
|
65
|
-
:param common_config: namedtuple of CommonCompareConfig
|
|
66
|
-
:param api_precision_csv_file: [result_file_name, details_file_name]
|
|
67
|
-
"""
|
|
68
|
-
compare, func, config = common_config.compare, common_config.handle_func, common_config.config
|
|
69
|
-
api_full_name = api_data.name
|
|
70
|
-
[api_type, api_name, _] = api_full_name.split(Const.SEP)
|
|
71
|
-
npu_args, npu_kwargs, npu_out = api_data.args, api_data.kwargs, api_data.result
|
|
72
|
-
|
|
73
|
-
if npu_kwargs.get("device"):
|
|
74
|
-
del npu_kwargs["device"]
|
|
75
|
-
|
|
76
|
-
try:
|
|
77
|
-
# NPU vs CPU
|
|
78
|
-
cpu_out = exec_api(api_type, api_name, npu_args, npu_kwargs)
|
|
79
|
-
npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
|
|
80
|
-
npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
|
|
81
|
-
npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])
|
|
82
|
-
|
|
83
|
-
# GPU vs CPU
|
|
84
|
-
api_data_gpu = move2target_device(api_data, device) # args, kwargs -> gpu, result -> npu
|
|
85
|
-
data_info = func(api_full_name, api_data_gpu, config.backward_content)
|
|
86
|
-
gpu_out = data_info.bench_output
|
|
87
|
-
gpu_data_info = UtDataInfo(None, None, gpu_out, cpu_out, None, [], None, rank=api_data.rank)
|
|
88
|
-
gpu_detail = compare.compare_output(api_full_name, gpu_data_info, True)
|
|
89
|
-
gpu_data = pd.DataFrame(gpu_detail, columns=DETAIL_TEST_ROWS[-1])
|
|
90
|
-
|
|
91
|
-
# NPUvsCPU vs GPUvsCPU
|
|
92
|
-
result_file_name, details_file_name = api_precision_csv_file
|
|
93
|
-
precision_compare_config = OnlineApiPrecisionCompareConfig(npu_data, gpu_data, api_data.rank,
|
|
94
|
-
result_file_name, details_file_name)
|
|
95
|
-
online_api_precision_compare(precision_compare_config)
|
|
96
|
-
|
|
97
|
-
except Exception as err:
|
|
98
|
-
if "expected scalar type Long" in str(err):
|
|
99
|
-
logger.warning(
|
|
100
|
-
f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
|
|
101
|
-
f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
|
|
102
|
-
elif api_type in [Const.DISTRIBUTED]:
|
|
103
|
-
logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
|
|
104
|
-
else:
|
|
105
|
-
logger.error(f"Run {api_full_name} UT Error: {str(err)}")
|
|
106
|
-
|
|
107
|
-
compare.write_summary_csv((api_full_name,
|
|
108
|
-
|
|
109
|
-
finally:
|
|
110
|
-
torch.cuda.empty_cache()
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def online_compare(api_data, device, common_config):
|
|
114
|
-
"""online run_ut for compare:NPU vs GPU
|
|
115
|
-
"""
|
|
116
|
-
compare, func, config = common_config.compare, common_config.handle_func, common_config.config
|
|
117
|
-
api_full_name = api_data.name
|
|
118
|
-
api_data = move2target_device(api_data, device)
|
|
119
|
-
try:
|
|
120
|
-
data_info = func(api_full_name, api_data, config.backward_content)
|
|
121
|
-
is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info)
|
|
122
|
-
logger.info(f"running api_full_name {api_full_name} ut, "
|
|
123
|
-
f"is_fwd_success: {is_fwd_success}, "
|
|
124
|
-
f"is_bwd_success: {is_bwd_success}")
|
|
125
|
-
except Exception as err:
|
|
126
|
-
[api_type, api_name, _] = api_full_name.split(Const.SEP)
|
|
127
|
-
if "expected scalar type Long" in str(err):
|
|
128
|
-
logger.warning(
|
|
129
|
-
f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
|
|
130
|
-
f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
|
|
131
|
-
elif api_type in [Const.DISTRIBUTED]:
|
|
132
|
-
logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
|
|
133
|
-
else:
|
|
134
|
-
logger.error(f"Run {api_full_name} UT Error: {str(err)}")
|
|
135
|
-
|
|
136
|
-
compare.write_summary_csv((api_full_name,
|
|
137
|
-
|
|
138
|
-
finally:
|
|
139
|
-
torch.cuda.empty_cache()
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
class ConsumerDispatcher:
|
|
143
|
-
def __init__(self, compare, capacity=10, num_workers=8, device: str = "gpu") -> None:
|
|
144
|
-
self.num_workers = num_workers
|
|
145
|
-
self.capacity = capacity
|
|
146
|
-
self.compare = compare
|
|
147
|
-
self.queues = []
|
|
148
|
-
self.processes = []
|
|
149
|
-
self.reverse_sort = False
|
|
150
|
-
self.pool = None
|
|
151
|
-
self.device = device
|
|
152
|
-
self.data_id = 0
|
|
153
|
-
self.lock = mp.Lock()
|
|
154
|
-
self.result_queue = mp.Queue()
|
|
155
|
-
mp.set_start_method("spawn", force=True)
|
|
156
|
-
|
|
157
|
-
def start(self, handle_func, config):
|
|
158
|
-
self.queues = [mp.Queue(maxsize=self.capacity) for _ in range(self.num_workers)]
|
|
159
|
-
api_precision_csv_file = [ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME, ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME]
|
|
160
|
-
common_config = CommonCompareConfig(self.compare, handle_func, config)
|
|
161
|
-
for xpu_id, q in enumerate(self.queues):
|
|
162
|
-
p = mp.Process(name="run_ut_process", target=run_ut_process,
|
|
163
|
-
args=(xpu_id, q, common_config, api_precision_csv_file))
|
|
164
|
-
|
|
165
|
-
p.start()
|
|
166
|
-
self.processes.append(p)
|
|
167
|
-
logger.info(f"Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
|
|
168
|
-
logger.info(f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
|
|
169
|
-
logger.info("Successfully start unittest process.")
|
|
170
|
-
|
|
171
|
-
def stop(self):
|
|
172
|
-
for q in self.queues:
|
|
173
|
-
while q.full():
|
|
174
|
-
time.sleep(0.1)
|
|
175
|
-
q.put("KILL_")
|
|
176
|
-
|
|
177
|
-
for p in self.processes:
|
|
178
|
-
p.join()
|
|
179
|
-
logger.info("Successfully stop unittest process.")
|
|
180
|
-
logger.info(f"Api_precision_compare task result is saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
|
|
181
|
-
logger.info(f"Api_precision_compare task details is saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
|
|
182
|
-
|
|
183
|
-
def update_consume_queue(self, api_data):
|
|
184
|
-
while True:
|
|
185
|
-
index = self._choose_max_empty_site_strategy()
|
|
186
|
-
if index != -1:
|
|
187
|
-
q = self.queues[index]
|
|
188
|
-
q.put(api_data)
|
|
189
|
-
break
|
|
190
|
-
time.sleep(0.1)
|
|
191
|
-
|
|
192
|
-
def _choose_max_empty_site_strategy(self):
|
|
193
|
-
maximum = 0
|
|
194
|
-
index = -1
|
|
195
|
-
# 充分利用多卡资源,防止任务过多分配给前面的卡
|
|
196
|
-
_reverse = 1 if not self.reverse_sort else -1
|
|
197
|
-
for i, q in enumerate(self.queues[::_reverse]):
|
|
198
|
-
empty_site = self.capacity - q.qsize()
|
|
199
|
-
if empty_site > maximum:
|
|
200
|
-
maximum = empty_site
|
|
201
|
-
index = i
|
|
202
|
-
index = len(self.queues) - index - 1 if index != -1 and self.reverse_sort else index
|
|
203
|
-
self.reverse_sort = not self.reverse_sort
|
|
204
|
-
return index
|
|
1
|
+
import time
|
|
2
|
+
from collections import namedtuple
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import torch
|
|
6
|
+
import torch.multiprocessing as mp
|
|
7
|
+
|
|
8
|
+
from msprobe.core.common.const import Const, CompareConst
|
|
9
|
+
from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import online_api_precision_compare
|
|
10
|
+
from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TEST_ROWS, thousandth_standard_api, \
|
|
11
|
+
binary_standard_api, absolute_standard_api
|
|
12
|
+
from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
|
|
13
|
+
from msprobe.pytorch.common.log import logger
|
|
14
|
+
from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
|
|
15
|
+
|
|
16
|
+
# NPU vs GPU api list
|
|
17
|
+
CompareApi = set(absolute_standard_api) | set(binary_standard_api) | set(thousandth_standard_api)
|
|
18
|
+
|
|
19
|
+
current_time = time.strftime("%Y%m%d%H%M%S")
|
|
20
|
+
ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME = "api_precision_compare_result_" + current_time + "_rank*.csv"
|
|
21
|
+
ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME = "api_precision_compare_details_" + current_time + "_rank*.csv"
|
|
22
|
+
|
|
23
|
+
OnlineApiPrecisionCompareConfig = namedtuple('OnlineApiPrecisionCompareConfig',
|
|
24
|
+
['npu_data', 'gpu_data', 'rank', 'result_csv_path', 'details_csv_path'])
|
|
25
|
+
# namedtuple of [instance of Comparator, func of run_touch_api_online, config of run_ut_config]
|
|
26
|
+
CommonCompareConfig = namedtuple('CommonCompareConfig', ['compare', 'handle_func', 'config'])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def run_ut_process(xpu_id, consumer_queue, common_config, api_precision_csv_file):
|
|
30
|
+
""" When consumer_queue(shared with ConsumerDispatcher) is not empty, consume api data from consumer_queue.
|
|
31
|
+
:param xpu_id: int
|
|
32
|
+
:param consumer_queue: shared queues of ConsumerDispatcher
|
|
33
|
+
:param common_config: namedtuple of CommonCompareConfig
|
|
34
|
+
:param api_precision_csv_file: list, length is 2, result file name and details file name
|
|
35
|
+
:return:
|
|
36
|
+
"""
|
|
37
|
+
gpu_device = torch.device(f'cuda:{xpu_id}')
|
|
38
|
+
|
|
39
|
+
while True:
|
|
40
|
+
if consumer_queue.empty():
|
|
41
|
+
time.sleep(0.1)
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
api_data = consumer_queue.get()
|
|
45
|
+
if api_data == "KILL_":
|
|
46
|
+
# current consumer finish
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
_, api_name, _ = api_data.name.split(Const.SEP)
|
|
50
|
+
if api_name in CompareApi:
|
|
51
|
+
# NPU vs GPU
|
|
52
|
+
online_compare(api_data, gpu_device, common_config)
|
|
53
|
+
else:
|
|
54
|
+
# NPUvsCPU vs GPUvsCPU
|
|
55
|
+
online_precision_compare(api_data, gpu_device, common_config, api_precision_csv_file)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def online_precision_compare(api_data, device, common_config, api_precision_csv_file):
|
|
59
|
+
"""online run_ut for precision_compare: NPUvsCPU vs GPUvsCPU
|
|
60
|
+
1. get NPUvsCPU compare result
|
|
61
|
+
2. get GPUvsCPU compare result
|
|
62
|
+
3. call online_api_precision_compare
|
|
63
|
+
:param api_data
|
|
64
|
+
:param device
|
|
65
|
+
:param common_config: namedtuple of CommonCompareConfig
|
|
66
|
+
:param api_precision_csv_file: [result_file_name, details_file_name]
|
|
67
|
+
"""
|
|
68
|
+
compare, func, config = common_config.compare, common_config.handle_func, common_config.config
|
|
69
|
+
api_full_name = api_data.name
|
|
70
|
+
[api_type, api_name, _] = api_full_name.split(Const.SEP)
|
|
71
|
+
npu_args, npu_kwargs, npu_out = api_data.args, api_data.kwargs, api_data.result
|
|
72
|
+
|
|
73
|
+
if npu_kwargs.get("device"):
|
|
74
|
+
del npu_kwargs["device"]
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
# NPU vs CPU
|
|
78
|
+
cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, npu_args, npu_kwargs)
|
|
79
|
+
npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
|
|
80
|
+
npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
|
|
81
|
+
npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])
|
|
82
|
+
|
|
83
|
+
# GPU vs CPU
|
|
84
|
+
api_data_gpu = move2target_device(api_data, device) # args, kwargs -> gpu, result -> npu
|
|
85
|
+
data_info = func(api_full_name, api_data_gpu, config.backward_content)
|
|
86
|
+
gpu_out = data_info.bench_output
|
|
87
|
+
gpu_data_info = UtDataInfo(None, None, gpu_out, cpu_out, None, [], None, rank=api_data.rank)
|
|
88
|
+
gpu_detail = compare.compare_output(api_full_name, gpu_data_info, True)
|
|
89
|
+
gpu_data = pd.DataFrame(gpu_detail, columns=DETAIL_TEST_ROWS[-1])
|
|
90
|
+
|
|
91
|
+
# NPUvsCPU vs GPUvsCPU
|
|
92
|
+
result_file_name, details_file_name = api_precision_csv_file
|
|
93
|
+
precision_compare_config = OnlineApiPrecisionCompareConfig(npu_data, gpu_data, api_data.rank,
|
|
94
|
+
result_file_name, details_file_name)
|
|
95
|
+
online_api_precision_compare(precision_compare_config)
|
|
96
|
+
|
|
97
|
+
except Exception as err:
|
|
98
|
+
if "expected scalar type Long" in str(err):
|
|
99
|
+
logger.warning(
|
|
100
|
+
f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
|
|
101
|
+
f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
|
|
102
|
+
elif api_type in [Const.DISTRIBUTED]:
|
|
103
|
+
logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
|
|
104
|
+
else:
|
|
105
|
+
logger.error(f"Run {api_full_name} UT Error: {str(err)}")
|
|
106
|
+
|
|
107
|
+
compare.write_summary_csv((api_full_name, CompareConst.SKIP, CompareConst.SKIP, [[str(err)]], api_data.rank))
|
|
108
|
+
|
|
109
|
+
finally:
|
|
110
|
+
torch.cuda.empty_cache()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def online_compare(api_data, device, common_config):
|
|
114
|
+
"""online run_ut for compare:NPU vs GPU
|
|
115
|
+
"""
|
|
116
|
+
compare, func, config = common_config.compare, common_config.handle_func, common_config.config
|
|
117
|
+
api_full_name = api_data.name
|
|
118
|
+
api_data = move2target_device(api_data, device)
|
|
119
|
+
try:
|
|
120
|
+
data_info = func(api_full_name, api_data, config.backward_content)
|
|
121
|
+
is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info)
|
|
122
|
+
logger.info(f"running api_full_name {api_full_name} ut, "
|
|
123
|
+
f"is_fwd_success: {is_fwd_success}, "
|
|
124
|
+
f"is_bwd_success: {is_bwd_success}")
|
|
125
|
+
except Exception as err:
|
|
126
|
+
[api_type, api_name, _] = api_full_name.split(Const.SEP)
|
|
127
|
+
if "expected scalar type Long" in str(err):
|
|
128
|
+
logger.warning(
|
|
129
|
+
f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
|
|
130
|
+
f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
|
|
131
|
+
elif api_type in [Const.DISTRIBUTED]:
|
|
132
|
+
logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
|
|
133
|
+
else:
|
|
134
|
+
logger.error(f"Run {api_full_name} UT Error: {str(err)}")
|
|
135
|
+
|
|
136
|
+
compare.write_summary_csv((api_full_name, CompareConst.SKIP, CompareConst.SKIP, [[str(err)]], api_data.rank))
|
|
137
|
+
|
|
138
|
+
finally:
|
|
139
|
+
torch.cuda.empty_cache()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class ConsumerDispatcher:
|
|
143
|
+
def __init__(self, compare, capacity=10, num_workers=8, device: str = "gpu") -> None:
|
|
144
|
+
self.num_workers = num_workers
|
|
145
|
+
self.capacity = capacity
|
|
146
|
+
self.compare = compare
|
|
147
|
+
self.queues = []
|
|
148
|
+
self.processes = []
|
|
149
|
+
self.reverse_sort = False
|
|
150
|
+
self.pool = None
|
|
151
|
+
self.device = device
|
|
152
|
+
self.data_id = 0
|
|
153
|
+
self.lock = mp.Lock()
|
|
154
|
+
self.result_queue = mp.Queue()
|
|
155
|
+
mp.set_start_method("spawn", force=True)
|
|
156
|
+
|
|
157
|
+
def start(self, handle_func, config):
|
|
158
|
+
self.queues = [mp.Queue(maxsize=self.capacity) for _ in range(self.num_workers)]
|
|
159
|
+
api_precision_csv_file = [ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME, ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME]
|
|
160
|
+
common_config = CommonCompareConfig(self.compare, handle_func, config)
|
|
161
|
+
for xpu_id, q in enumerate(self.queues):
|
|
162
|
+
p = mp.Process(name="run_ut_process", target=run_ut_process,
|
|
163
|
+
args=(xpu_id, q, common_config, api_precision_csv_file))
|
|
164
|
+
|
|
165
|
+
p.start()
|
|
166
|
+
self.processes.append(p)
|
|
167
|
+
logger.info(f"Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
|
|
168
|
+
logger.info(f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
|
|
169
|
+
logger.info("Successfully start unittest process.")
|
|
170
|
+
|
|
171
|
+
def stop(self):
|
|
172
|
+
for q in self.queues:
|
|
173
|
+
while q.full():
|
|
174
|
+
time.sleep(0.1)
|
|
175
|
+
q.put("KILL_")
|
|
176
|
+
|
|
177
|
+
for p in self.processes:
|
|
178
|
+
p.join()
|
|
179
|
+
logger.info("Successfully stop unittest process.")
|
|
180
|
+
logger.info(f"Api_precision_compare task result is saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
|
|
181
|
+
logger.info(f"Api_precision_compare task details is saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
|
|
182
|
+
|
|
183
|
+
def update_consume_queue(self, api_data):
|
|
184
|
+
while True:
|
|
185
|
+
index = self._choose_max_empty_site_strategy()
|
|
186
|
+
if index != -1:
|
|
187
|
+
q = self.queues[index]
|
|
188
|
+
q.put(api_data)
|
|
189
|
+
break
|
|
190
|
+
time.sleep(0.1)
|
|
191
|
+
|
|
192
|
+
def _choose_max_empty_site_strategy(self):
|
|
193
|
+
maximum = 0
|
|
194
|
+
index = -1
|
|
195
|
+
# 充分利用多卡资源,防止任务过多分配给前面的卡
|
|
196
|
+
_reverse = 1 if not self.reverse_sort else -1
|
|
197
|
+
for i, q in enumerate(self.queues[::_reverse]):
|
|
198
|
+
empty_site = self.capacity - q.qsize()
|
|
199
|
+
if empty_site > maximum:
|
|
200
|
+
maximum = empty_site
|
|
201
|
+
index = i
|
|
202
|
+
index = len(self.queues) - index - 1 if index != -1 and self.reverse_sort else index
|
|
203
|
+
self.reverse_sort = not self.reverse_sort
|
|
204
|
+
return index
|