mindstudio-probe 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +201 -201
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +36 -34
- mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +1 -1
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +1 -0
- msprobe/README.md +131 -237
- msprobe/__init__.py +16 -1
- msprobe/{config/config.json → config.json} +47 -49
- msprobe/core/advisor/advisor.py +124 -124
- msprobe/core/advisor/advisor_const.py +58 -59
- msprobe/core/advisor/advisor_result.py +58 -58
- msprobe/core/common/const.py +402 -318
- msprobe/core/common/exceptions.py +99 -99
- msprobe/core/common/{file_check.py → file_utils.py} +523 -283
- msprobe/core/common/inplace_op_checker.py +38 -0
- msprobe/core/common/inplace_ops.yaml +251 -0
- msprobe/core/common/log.py +86 -69
- msprobe/core/common/utils.py +371 -616
- msprobe/core/common_config.py +78 -71
- msprobe/core/compare/acc_compare.py +472 -298
- msprobe/core/compare/check.py +180 -95
- msprobe/core/compare/compare_cli.py +69 -49
- msprobe/core/compare/highlight.py +259 -222
- msprobe/core/compare/multiprocessing_compute.py +174 -149
- msprobe/core/compare/npy_compare.py +310 -295
- msprobe/core/compare/utils.py +464 -429
- msprobe/core/data_dump/data_collector.py +153 -144
- msprobe/core/data_dump/data_processor/base.py +337 -293
- msprobe/core/data_dump/data_processor/factory.py +76 -59
- msprobe/core/data_dump/data_processor/mindspore_processor.py +192 -198
- msprobe/core/data_dump/data_processor/pytorch_processor.py +383 -389
- msprobe/core/data_dump/json_writer.py +117 -116
- msprobe/core/data_dump/scope.py +194 -178
- msprobe/core/grad_probe/constant.py +74 -70
- msprobe/core/grad_probe/grad_compare.py +170 -175
- msprobe/core/grad_probe/utils.py +77 -52
- msprobe/docs/01.installation.md +99 -0
- msprobe/docs/02.config_introduction.md +137 -0
- msprobe/docs/03.config_examples.md +237 -0
- msprobe/docs/04.acl_config_examples.md +78 -0
- msprobe/docs/05.data_dump_PyTorch.md +326 -0
- msprobe/docs/06.data_dump_MindSpore.md +285 -0
- msprobe/docs/07.accuracy_checker_PyTorch.md +297 -0
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +238 -0
- msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
- msprobe/docs/10.accuracy_compare_PyTorch.md +327 -0
- msprobe/docs/11.accuracy_compare_MindSpore.md +333 -0
- msprobe/docs/12.overflow_check_PyTorch.md +79 -0
- msprobe/docs/13.overflow_check_MindSpore.md +31 -0
- msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
- msprobe/docs/15.free_benchmarking_PyTorch.md +170 -0
- msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
- msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +205 -207
- msprobe/{pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md → docs/18.online_dispatch.md} +89 -90
- msprobe/docs/FAQ.md +189 -0
- msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
- msprobe/docs/img/free_benchmark_framework.png +0 -0
- msprobe/docs/img/ms_dump.png +0 -0
- msprobe/docs/img/ms_layer.png +0 -0
- msprobe/docs/img/pt_dump.png +0 -0
- msprobe/mindspore/__init__.py +2 -1
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +278 -245
- msprobe/mindspore/api_accuracy_checker/api_info.py +76 -69
- msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
- msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
- msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
- msprobe/mindspore/api_accuracy_checker/main.py +8 -15
- msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
- msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
- msprobe/mindspore/cell_processor.py +58 -34
- msprobe/mindspore/common/const.py +108 -87
- msprobe/mindspore/common/log.py +37 -37
- msprobe/mindspore/common/utils.py +97 -57
- msprobe/mindspore/compare/distributed_compare.py +62 -75
- msprobe/mindspore/compare/layer_mapping.py +146 -0
- msprobe/mindspore/compare/modify_mapping.py +107 -0
- msprobe/mindspore/compare/ms_compare.py +357 -117
- msprobe/mindspore/compare/ms_graph_compare.py +364 -317
- msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
- msprobe/mindspore/debugger/debugger_config.py +69 -74
- msprobe/mindspore/debugger/precision_debugger.py +150 -107
- msprobe/mindspore/dump/dump_tool_factory.py +50 -35
- msprobe/mindspore/dump/hook_cell/api_registry.py +128 -104
- msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
- msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +994 -925
- msprobe/mindspore/dump/hook_cell/wrap_api.py +121 -0
- msprobe/mindspore/dump/jit_dump.py +96 -56
- msprobe/mindspore/dump/kernel_graph_dump.py +75 -60
- msprobe/mindspore/dump/kernel_kbyk_dump.py +79 -65
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +131 -116
- msprobe/mindspore/free_benchmark/common/config.py +27 -12
- msprobe/mindspore/free_benchmark/common/handler_params.py +32 -17
- msprobe/mindspore/free_benchmark/common/utils.py +85 -71
- msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
- msprobe/mindspore/free_benchmark/decorator/dec_forward.py +57 -42
- msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +122 -107
- msprobe/mindspore/free_benchmark/handler/base_handler.py +105 -90
- msprobe/mindspore/free_benchmark/handler/check_handler.py +56 -41
- msprobe/mindspore/free_benchmark/handler/fix_handler.py +51 -36
- msprobe/mindspore/free_benchmark/handler/handler_factory.py +36 -21
- msprobe/mindspore/free_benchmark/perturbation/add_noise.py +82 -67
- msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +36 -21
- msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +78 -63
- msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +77 -0
- msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +49 -34
- msprobe/mindspore/free_benchmark/perturbation/no_change.py +27 -12
- msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +44 -27
- msprobe/mindspore/free_benchmark/self_check_tool_factory.py +48 -33
- msprobe/mindspore/grad_probe/global_context.py +100 -91
- msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
- msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
- msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
- msprobe/mindspore/grad_probe/hook.py +94 -92
- msprobe/mindspore/grad_probe/utils.py +29 -28
- msprobe/mindspore/ms_config.py +128 -126
- msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +60 -45
- msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +49 -34
- msprobe/mindspore/runtime.py +4 -4
- msprobe/mindspore/service.py +297 -354
- msprobe/mindspore/task_handler_factory.py +24 -24
- msprobe/msprobe.py +105 -107
- msprobe/pytorch/__init__.py +23 -4
- msprobe/pytorch/api_accuracy_checker/common/config.py +70 -55
- msprobe/pytorch/api_accuracy_checker/common/utils.py +246 -165
- msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +230 -213
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +632 -581
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +416 -381
- msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +90 -73
- msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +265 -244
- msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
- msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +370 -332
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +221 -199
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +150 -134
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +518 -581
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +213 -74
- msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +218 -202
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +370 -324
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +227 -204
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +244 -218
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
- msprobe/pytorch/bench_functions/__init__.py +30 -15
- msprobe/pytorch/bench_functions/apply_adam_w.py +43 -28
- msprobe/pytorch/bench_functions/confusion_transpose.py +34 -19
- msprobe/pytorch/bench_functions/fast_gelu.py +70 -55
- msprobe/pytorch/bench_functions/layer_norm_eval.py +21 -6
- msprobe/pytorch/bench_functions/linear.py +27 -12
- msprobe/pytorch/bench_functions/matmul_backward.py +63 -48
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +538 -421
- msprobe/pytorch/bench_functions/rms_norm.py +30 -15
- msprobe/pytorch/bench_functions/rotary_mul.py +71 -52
- msprobe/pytorch/bench_functions/scaled_mask_softmax.py +41 -26
- msprobe/pytorch/bench_functions/swiglu.py +70 -55
- msprobe/pytorch/common/__init__.py +17 -2
- msprobe/pytorch/common/compare_script.template +14 -14
- msprobe/pytorch/common/log.py +33 -32
- msprobe/pytorch/common/parse_json.py +54 -39
- msprobe/pytorch/common/utils.py +310 -300
- msprobe/pytorch/compare/distributed_compare.py +66 -66
- msprobe/pytorch/compare/mapping.yaml +607 -607
- msprobe/pytorch/compare/match.py +49 -33
- msprobe/pytorch/compare/pt_compare.py +82 -40
- msprobe/pytorch/debugger/debugger_config.py +108 -95
- msprobe/pytorch/debugger/precision_debugger.py +173 -125
- msprobe/pytorch/free_benchmark/__init__.py +23 -8
- msprobe/pytorch/free_benchmark/common/constant.py +70 -70
- msprobe/pytorch/free_benchmark/common/counter.py +71 -71
- msprobe/pytorch/free_benchmark/common/enums.py +65 -37
- msprobe/pytorch/free_benchmark/common/params.py +144 -129
- msprobe/pytorch/free_benchmark/common/utils.py +118 -102
- msprobe/pytorch/free_benchmark/compare/grad_saver.py +200 -179
- msprobe/pytorch/free_benchmark/compare/single_benchmark.py +119 -104
- msprobe/pytorch/free_benchmark/main.py +120 -105
- msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +28 -13
- msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +56 -41
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +105 -90
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +119 -104
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +87 -63
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +83 -68
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +43 -28
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +60 -45
- msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +34 -19
- msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +256 -217
- msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +54 -39
- msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +38 -23
- msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +45 -30
- msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +185 -170
- msprobe/pytorch/function_factory.py +91 -75
- msprobe/pytorch/functional/module_dump.py +84 -0
- msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
- msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
- msprobe/pytorch/hook_module/__init__.py +16 -1
- msprobe/pytorch/hook_module/api_registry.py +166 -161
- msprobe/pytorch/hook_module/hook_module.py +118 -120
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
- msprobe/pytorch/hook_module/utils.py +28 -29
- msprobe/pytorch/hook_module/wrap_aten.py +111 -110
- msprobe/pytorch/hook_module/wrap_distributed.py +77 -78
- msprobe/pytorch/hook_module/wrap_functional.py +104 -105
- msprobe/pytorch/hook_module/wrap_npu_custom.py +85 -84
- msprobe/pytorch/hook_module/wrap_tensor.py +69 -71
- msprobe/pytorch/hook_module/wrap_torch.py +84 -86
- msprobe/pytorch/hook_module/wrap_vf.py +60 -62
- msprobe/pytorch/module_processer.py +153 -138
- msprobe/pytorch/online_dispatch/__init__.py +20 -20
- msprobe/pytorch/online_dispatch/compare.py +235 -236
- msprobe/pytorch/online_dispatch/dispatch.py +271 -271
- msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
- msprobe/pytorch/online_dispatch/single_compare.py +391 -391
- msprobe/pytorch/online_dispatch/torch_ops_config.yaml +57 -49
- msprobe/pytorch/online_dispatch/utils.py +127 -146
- msprobe/pytorch/parse.py +19 -4
- msprobe/pytorch/parse_tool/cli.py +31 -32
- msprobe/pytorch/parse_tool/lib/compare.py +259 -271
- msprobe/pytorch/parse_tool/lib/config.py +52 -52
- msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
- msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
- msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
- msprobe/pytorch/parse_tool/lib/parse_tool.py +161 -158
- msprobe/pytorch/parse_tool/lib/utils.py +320 -321
- msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
- msprobe/pytorch/pt_config.py +317 -187
- msprobe/pytorch/service.py +311 -252
- mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
- msprobe/config/README.md +0 -539
- msprobe/mindspore/doc/compare.md +0 -58
- msprobe/mindspore/doc/dump.md +0 -217
- msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
- msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
- msprobe/pytorch/doc/FAQ.md +0 -193
- msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
- msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
- msprobe/pytorch/doc/dump.md +0 -260
- msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
- msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
- msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
- msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
- msprobe/pytorch/doc/run_overflow_check.md +0 -25
- msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
- msprobe/pytorch/functional/data_processor.py +0 -0
- msprobe/pytorch/functional/dump_module.py +0 -39
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
- /msprobe/{config → docs}/img/free_benchmark.png +0 -0
- /msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
- /msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
- /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
|
@@ -1,204 +1,227 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import time
|
|
17
|
+
from collections import namedtuple
|
|
18
|
+
|
|
19
|
+
import pandas as pd
|
|
20
|
+
import torch
|
|
21
|
+
import torch.multiprocessing as mp
|
|
22
|
+
|
|
23
|
+
from msprobe.core.common.const import Const, CompareConst
|
|
24
|
+
from msprobe.pytorch.api_accuracy_checker.compare.api_precision_compare import online_api_precision_compare
|
|
25
|
+
from msprobe.pytorch.api_accuracy_checker.compare.compare_utils import DETAIL_TEST_ROWS, thousandth_standard_api, \
|
|
26
|
+
binary_standard_api, absolute_standard_api
|
|
27
|
+
from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import UtDataInfo, exec_api
|
|
28
|
+
from msprobe.pytorch.common.log import logger
|
|
29
|
+
from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.attl import move2target_device
|
|
30
|
+
from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_cpu_params
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# NPU vs GPU api list
|
|
34
|
+
CompareApi = set(absolute_standard_api) | set(binary_standard_api) | set(thousandth_standard_api)
|
|
35
|
+
|
|
36
|
+
current_time = time.strftime("%Y%m%d%H%M%S")
|
|
37
|
+
ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME = "api_precision_compare_result_" + current_time + "_rank*.csv"
|
|
38
|
+
ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME = "api_precision_compare_details_" + current_time + "_rank*.csv"
|
|
39
|
+
|
|
40
|
+
OnlineApiPrecisionCompareConfig = namedtuple('OnlineApiPrecisionCompareConfig',
|
|
41
|
+
['npu_data', 'gpu_data', 'rank', 'result_csv_path', 'details_csv_path'])
|
|
42
|
+
# namedtuple of [instance of Comparator, func of run_touch_api_online, config of run_ut_config]
|
|
43
|
+
CommonCompareConfig = namedtuple('CommonCompareConfig', ['compare', 'handle_func', 'config'])
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def run_ut_process(xpu_id, consumer_queue, common_config, api_precision_csv_file):
|
|
47
|
+
""" When consumer_queue(shared with ConsumerDispatcher) is not empty, consume api data from consumer_queue.
|
|
48
|
+
:param xpu_id: int
|
|
49
|
+
:param consumer_queue: shared queues of ConsumerDispatcher
|
|
50
|
+
:param common_config: namedtuple of CommonCompareConfig
|
|
51
|
+
:param api_precision_csv_file: list, length is 2, result file name and details file name
|
|
52
|
+
:return:
|
|
53
|
+
"""
|
|
54
|
+
gpu_device = torch.device(f'cuda:{xpu_id}')
|
|
55
|
+
|
|
56
|
+
while True:
|
|
57
|
+
if consumer_queue.empty():
|
|
58
|
+
time.sleep(0.1)
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
api_data = consumer_queue.get()
|
|
62
|
+
if api_data == "KILL_":
|
|
63
|
+
# current consumer finish
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
_, api_name, _ = api_data.name.split(Const.SEP)
|
|
67
|
+
if api_name in CompareApi:
|
|
68
|
+
# NPU vs GPU
|
|
69
|
+
online_compare(api_data, gpu_device, common_config)
|
|
70
|
+
else:
|
|
71
|
+
# NPUvsCPU vs GPUvsCPU
|
|
72
|
+
online_precision_compare(api_data, gpu_device, common_config, api_precision_csv_file)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def online_precision_compare(api_data, device, common_config, api_precision_csv_file):
|
|
76
|
+
"""online run_ut for precision_compare: NPUvsCPU vs GPUvsCPU
|
|
77
|
+
1. get NPUvsCPU compare result
|
|
78
|
+
2. get GPUvsCPU compare result
|
|
79
|
+
3. call online_api_precision_compare
|
|
80
|
+
:param api_data
|
|
81
|
+
:param device
|
|
82
|
+
:param common_config: namedtuple of CommonCompareConfig
|
|
83
|
+
:param api_precision_csv_file: [result_file_name, details_file_name]
|
|
84
|
+
"""
|
|
85
|
+
compare, func, config = common_config.compare, common_config.handle_func, common_config.config
|
|
86
|
+
api_full_name = api_data.name
|
|
87
|
+
[api_type, api_name, _] = api_full_name.split(Const.SEP)
|
|
88
|
+
npu_args, npu_kwargs, npu_out = api_data.args, api_data.kwargs, api_data.result
|
|
89
|
+
|
|
90
|
+
if npu_kwargs.get("device"):
|
|
91
|
+
del npu_kwargs["device"]
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
# NPU vs CPU
|
|
95
|
+
cpu_args, cpu_kwargs = generate_cpu_params(npu_args, npu_kwargs, False, api_name)
|
|
96
|
+
cpu_out = exec_api(api_type, api_name, Const.CPU_LOWERCASE, cpu_args, cpu_kwargs)
|
|
97
|
+
npu_data_info = UtDataInfo(None, None, npu_out, cpu_out, None, [], None, rank=api_data.rank)
|
|
98
|
+
npu_detail = compare.compare_output(api_full_name, npu_data_info, True)
|
|
99
|
+
npu_data = pd.DataFrame(npu_detail, columns=DETAIL_TEST_ROWS[-1])
|
|
100
|
+
|
|
101
|
+
# GPU vs CPU
|
|
102
|
+
api_data_gpu = move2target_device(api_data, device) # args, kwargs -> gpu, result -> npu
|
|
103
|
+
data_info = func(api_full_name, api_data_gpu, config.backward_content)
|
|
104
|
+
gpu_out = data_info.bench_output
|
|
105
|
+
gpu_data_info = UtDataInfo(None, None, gpu_out, cpu_out, None, [], None, rank=api_data.rank)
|
|
106
|
+
gpu_detail = compare.compare_output(api_full_name, gpu_data_info, True)
|
|
107
|
+
gpu_data = pd.DataFrame(gpu_detail, columns=DETAIL_TEST_ROWS[-1])
|
|
108
|
+
|
|
109
|
+
# NPUvsCPU vs GPUvsCPU
|
|
110
|
+
result_file_name, details_file_name = api_precision_csv_file
|
|
111
|
+
precision_compare_config = OnlineApiPrecisionCompareConfig(npu_data, gpu_data, api_data.rank,
|
|
112
|
+
result_file_name, details_file_name)
|
|
113
|
+
online_api_precision_compare(precision_compare_config)
|
|
114
|
+
|
|
115
|
+
except Exception as err:
|
|
116
|
+
if "expected scalar type Long" in str(err):
|
|
117
|
+
logger.warning(
|
|
118
|
+
f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
|
|
119
|
+
f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
|
|
120
|
+
elif api_type in [Const.DISTRIBUTED]:
|
|
121
|
+
logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
|
|
122
|
+
else:
|
|
123
|
+
logger.error(f"Run {api_full_name} UT Error: {str(err)}")
|
|
124
|
+
|
|
125
|
+
compare.write_summary_csv((api_full_name, CompareConst.SKIP, CompareConst.SKIP, [[str(err)]], api_data.rank))
|
|
126
|
+
|
|
127
|
+
finally:
|
|
128
|
+
torch.cuda.empty_cache()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def online_compare(api_data, device, common_config):
|
|
132
|
+
"""online run_ut for compare:NPU vs GPU
|
|
133
|
+
"""
|
|
134
|
+
compare, func, config = common_config.compare, common_config.handle_func, common_config.config
|
|
135
|
+
api_full_name = api_data.name
|
|
136
|
+
api_data = move2target_device(api_data, device)
|
|
137
|
+
try:
|
|
138
|
+
data_info = func(api_full_name, api_data, config.backward_content)
|
|
139
|
+
is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info)
|
|
140
|
+
logger.info(f"running api_full_name {api_full_name} ut, "
|
|
141
|
+
f"is_fwd_success: {is_fwd_success}, "
|
|
142
|
+
f"is_bwd_success: {is_bwd_success}")
|
|
143
|
+
except Exception as err:
|
|
144
|
+
[api_type, api_name, _] = api_full_name.split(Const.SEP)
|
|
145
|
+
if "expected scalar type Long" in str(err):
|
|
146
|
+
logger.warning(
|
|
147
|
+
f"API {api_name} not support int32 tensor in CPU, please add {api_name} to CONVERT_API "
|
|
148
|
+
f"'int32_to_int64' list in accuracy_tools/msprobe/core/common/const.py file.")
|
|
149
|
+
elif api_type in [Const.DISTRIBUTED]:
|
|
150
|
+
logger.info(f"{api_full_name} is not supported for run ut. SKIP.")
|
|
151
|
+
else:
|
|
152
|
+
logger.error(f"Run {api_full_name} UT Error: {str(err)}")
|
|
153
|
+
|
|
154
|
+
compare.write_summary_csv((api_full_name, CompareConst.SKIP, CompareConst.SKIP, [[str(err)]], api_data.rank))
|
|
155
|
+
|
|
156
|
+
finally:
|
|
157
|
+
torch.cuda.empty_cache()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class ConsumerDispatcher:
|
|
161
|
+
def __init__(self, compare, capacity=10, num_workers=8, device: str = "gpu") -> None:
|
|
162
|
+
self.num_workers = num_workers
|
|
163
|
+
self.capacity = capacity
|
|
164
|
+
self.compare = compare
|
|
165
|
+
self.queues = []
|
|
166
|
+
self.processes = []
|
|
167
|
+
self.reverse_sort = False
|
|
168
|
+
self.pool = None
|
|
169
|
+
self.device = device
|
|
170
|
+
self.data_id = 0
|
|
171
|
+
self.lock = mp.Lock()
|
|
172
|
+
self.result_queue = mp.Queue()
|
|
173
|
+
mp.set_start_method("spawn", force=True)
|
|
174
|
+
|
|
175
|
+
def start(self, handle_func, config):
|
|
176
|
+
self.queues = [mp.Queue(maxsize=self.capacity) for _ in range(self.num_workers)]
|
|
177
|
+
api_precision_csv_file = [
|
|
178
|
+
ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME,
|
|
179
|
+
ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME
|
|
180
|
+
]
|
|
181
|
+
common_config = CommonCompareConfig(self.compare, handle_func, config)
|
|
182
|
+
for xpu_id, q in enumerate(self.queues):
|
|
183
|
+
p = mp.Process(name="run_ut_process", target=run_ut_process,
|
|
184
|
+
args=(xpu_id, q, common_config, api_precision_csv_file))
|
|
185
|
+
|
|
186
|
+
p.start()
|
|
187
|
+
self.processes.append(p)
|
|
188
|
+
logger.info(
|
|
189
|
+
f'Api_precision_compare task result will be saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}')
|
|
190
|
+
logger.info(
|
|
191
|
+
f"Api_precision_compare task details will be saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
|
|
192
|
+
logger.info("Successfully start unittest process.")
|
|
193
|
+
|
|
194
|
+
def stop(self):
|
|
195
|
+
for q in self.queues:
|
|
196
|
+
while q.full():
|
|
197
|
+
time.sleep(0.1)
|
|
198
|
+
q.put("KILL_")
|
|
199
|
+
|
|
200
|
+
for p in self.processes:
|
|
201
|
+
p.join()
|
|
202
|
+
logger.info("Successfully stop unittest process.")
|
|
203
|
+
logger.info(f"Api_precision_compare task result is saved in {ONLINE_API_PRECISION_COMPARE_RESULT_FILE_NAME}")
|
|
204
|
+
logger.info(f"Api_precision_compare task details is saved in {ONLINE_API_PRECISION_COMPARE_DETAILS_FILE_NAME}")
|
|
205
|
+
|
|
206
|
+
def update_consume_queue(self, api_data):
|
|
207
|
+
while True:
|
|
208
|
+
index = self._choose_max_empty_site_strategy()
|
|
209
|
+
if index != -1:
|
|
210
|
+
q = self.queues[index]
|
|
211
|
+
q.put(api_data)
|
|
212
|
+
break
|
|
213
|
+
time.sleep(0.1)
|
|
214
|
+
|
|
215
|
+
def _choose_max_empty_site_strategy(self):
|
|
216
|
+
maximum = 0
|
|
217
|
+
index = -1
|
|
218
|
+
# 充分利用多卡资源,防止任务过多分配给前面的卡
|
|
219
|
+
_reverse = 1 if not self.reverse_sort else -1
|
|
220
|
+
for i, q in enumerate(self.queues[::_reverse]):
|
|
221
|
+
empty_site = self.capacity - q.qsize()
|
|
222
|
+
if empty_site > maximum:
|
|
223
|
+
maximum = empty_site
|
|
224
|
+
index = i
|
|
225
|
+
index = len(self.queues) - index - 1 if index != -1 and self.reverse_sort else index
|
|
226
|
+
self.reverse_sort = not self.reverse_sort
|
|
227
|
+
return index
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
from functools import wraps
|
|
18
|
+
|
|
19
|
+
import torch
|
|
20
|
+
from torch.utils._python_dispatch import TorchDispatchMode
|
|
21
|
+
from msprobe.pytorch.api_accuracy_checker.common.utils import ApiData
|
|
22
|
+
from msprobe.pytorch.common.utils import get_tensor_rank
|
|
23
|
+
from msprobe.core.common.const import Const
|
|
24
|
+
from msprobe.pytorch.common.log import logger
|
|
25
|
+
from msprobe.core.common.file_utils import load_yaml
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def singleton(cls):
|
|
29
|
+
_instance = {}
|
|
30
|
+
|
|
31
|
+
@wraps(cls)
|
|
32
|
+
def inner():
|
|
33
|
+
if cls not in _instance:
|
|
34
|
+
_instance[cls] = cls()
|
|
35
|
+
return _instance[cls]
|
|
36
|
+
return inner
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@singleton
|
|
40
|
+
class Counter:
|
|
41
|
+
def __init__(self) -> None:
|
|
42
|
+
self.index_dict = {}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
counter = Counter()
|
|
46
|
+
yaml_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "torch_ops_config.yaml")
|
|
47
|
+
yaml_file = load_yaml(yaml_path)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AccuracyCheckerDispatch(TorchDispatchMode):
|
|
51
|
+
def __init__(self, attl):
|
|
52
|
+
super(AccuracyCheckerDispatch, self).__init__()
|
|
53
|
+
self.attl = attl
|
|
54
|
+
self.counter = counter
|
|
55
|
+
self.aten_ops_blacklist = []
|
|
56
|
+
self.npu_adjust_autogard = []
|
|
57
|
+
self.aten_ops_blacklist = yaml_file.get('aten_ops_blacklist', [])
|
|
58
|
+
self.npu_adjust_autogard = yaml_file.get('npu_adjust_autogard', [])
|
|
59
|
+
|
|
60
|
+
def __torch_dispatch__(self, func, types, args=None, kwargs=None):
|
|
61
|
+
func_name_split_list = func.__name__.split(Const.SEP)
|
|
62
|
+
aten_api = func_name_split_list[0]
|
|
63
|
+
self.enable_autogard(aten_api)
|
|
64
|
+
if aten_api in self.aten_ops_blacklist:
|
|
65
|
+
npu_out = func(*args, **kwargs)
|
|
66
|
+
return npu_out
|
|
67
|
+
|
|
68
|
+
res = func(*args, **kwargs)
|
|
69
|
+
cur_rank = get_tensor_rank(args, res)
|
|
70
|
+
cur_api_number = self.counter.index_dict.setdefault(aten_api, 0)
|
|
71
|
+
api_name = f'{Const.ATEN}{Const.SEP}{aten_api}{Const.SEP}{cur_api_number}'
|
|
72
|
+
logger.info(f"tools is dumping api: {api_name}")
|
|
73
|
+
api_data = ApiData(api_name, args, kwargs, res, 0, cur_rank)
|
|
74
|
+
if "device" in api_data.kwargs:
|
|
75
|
+
api_data.kwargs.pop("device")
|
|
76
|
+
if self.attl.nfs_path:
|
|
77
|
+
self.attl.upload(api_data)
|
|
78
|
+
else:
|
|
79
|
+
self.attl.send(api_data)
|
|
80
|
+
self.counter.index_dict[aten_api] += 1
|
|
81
|
+
|
|
82
|
+
return res
|
|
83
|
+
|
|
84
|
+
def enable_autogard(self, aten_api):
|
|
85
|
+
if aten_api in self.npu_adjust_autogard:
|
|
86
|
+
torch._C._dispatch_tls_set_dispatch_key_excluded(torch._C.DispatchKey.AutogradFunctionality, False)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def dispatch4data(func, attl, status):
|
|
90
|
+
@wraps(func)
|
|
91
|
+
def wrapper(*args, **kwargs):
|
|
92
|
+
if not status:
|
|
93
|
+
return func(*args, **kwargs)
|
|
94
|
+
with AccuracyCheckerDispatch(attl):
|
|
95
|
+
res = func(*args, **kwargs)
|
|
96
|
+
return res
|
|
97
|
+
|
|
98
|
+
return wrapper
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def run_ut_dispatch(attl, status):
|
|
102
|
+
"""
|
|
103
|
+
This function called by online_run_ut.
|
|
104
|
+
It is used to enable or disable dispatch for torch.autograd.backward function.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
attl (ATTL): online_run_ut class ATTL, which is used to upload or send api data to server.
|
|
108
|
+
status (bool): True means enable dispatch, False means disable dispatch.
|
|
109
|
+
"""
|
|
110
|
+
torch.autograd.backward = dispatch4data(torch.autograd.backward, attl, status)
|