mindstudio-probe 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +201 -201
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +36 -34
- mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +1 -1
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +1 -0
- msprobe/README.md +131 -237
- msprobe/__init__.py +16 -1
- msprobe/{config/config.json → config.json} +47 -49
- msprobe/core/advisor/advisor.py +124 -124
- msprobe/core/advisor/advisor_const.py +58 -59
- msprobe/core/advisor/advisor_result.py +58 -58
- msprobe/core/common/const.py +402 -318
- msprobe/core/common/exceptions.py +99 -99
- msprobe/core/common/{file_check.py → file_utils.py} +523 -283
- msprobe/core/common/inplace_op_checker.py +38 -0
- msprobe/core/common/inplace_ops.yaml +251 -0
- msprobe/core/common/log.py +86 -69
- msprobe/core/common/utils.py +371 -616
- msprobe/core/common_config.py +78 -71
- msprobe/core/compare/acc_compare.py +472 -298
- msprobe/core/compare/check.py +180 -95
- msprobe/core/compare/compare_cli.py +69 -49
- msprobe/core/compare/highlight.py +259 -222
- msprobe/core/compare/multiprocessing_compute.py +174 -149
- msprobe/core/compare/npy_compare.py +310 -295
- msprobe/core/compare/utils.py +464 -429
- msprobe/core/data_dump/data_collector.py +153 -144
- msprobe/core/data_dump/data_processor/base.py +337 -293
- msprobe/core/data_dump/data_processor/factory.py +76 -59
- msprobe/core/data_dump/data_processor/mindspore_processor.py +192 -198
- msprobe/core/data_dump/data_processor/pytorch_processor.py +383 -389
- msprobe/core/data_dump/json_writer.py +117 -116
- msprobe/core/data_dump/scope.py +194 -178
- msprobe/core/grad_probe/constant.py +74 -70
- msprobe/core/grad_probe/grad_compare.py +170 -175
- msprobe/core/grad_probe/utils.py +77 -52
- msprobe/docs/01.installation.md +99 -0
- msprobe/docs/02.config_introduction.md +137 -0
- msprobe/docs/03.config_examples.md +237 -0
- msprobe/docs/04.acl_config_examples.md +78 -0
- msprobe/docs/05.data_dump_PyTorch.md +326 -0
- msprobe/docs/06.data_dump_MindSpore.md +285 -0
- msprobe/docs/07.accuracy_checker_PyTorch.md +297 -0
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +238 -0
- msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
- msprobe/docs/10.accuracy_compare_PyTorch.md +327 -0
- msprobe/docs/11.accuracy_compare_MindSpore.md +333 -0
- msprobe/docs/12.overflow_check_PyTorch.md +79 -0
- msprobe/docs/13.overflow_check_MindSpore.md +31 -0
- msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
- msprobe/docs/15.free_benchmarking_PyTorch.md +170 -0
- msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
- msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +205 -207
- msprobe/{pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md → docs/18.online_dispatch.md} +89 -90
- msprobe/docs/FAQ.md +189 -0
- msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
- msprobe/docs/img/free_benchmark_framework.png +0 -0
- msprobe/docs/img/ms_dump.png +0 -0
- msprobe/docs/img/ms_layer.png +0 -0
- msprobe/docs/img/pt_dump.png +0 -0
- msprobe/mindspore/__init__.py +2 -1
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +278 -245
- msprobe/mindspore/api_accuracy_checker/api_info.py +76 -69
- msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
- msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
- msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
- msprobe/mindspore/api_accuracy_checker/main.py +8 -15
- msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
- msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
- msprobe/mindspore/cell_processor.py +58 -34
- msprobe/mindspore/common/const.py +108 -87
- msprobe/mindspore/common/log.py +37 -37
- msprobe/mindspore/common/utils.py +97 -57
- msprobe/mindspore/compare/distributed_compare.py +62 -75
- msprobe/mindspore/compare/layer_mapping.py +146 -0
- msprobe/mindspore/compare/modify_mapping.py +107 -0
- msprobe/mindspore/compare/ms_compare.py +357 -117
- msprobe/mindspore/compare/ms_graph_compare.py +364 -317
- msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
- msprobe/mindspore/debugger/debugger_config.py +69 -74
- msprobe/mindspore/debugger/precision_debugger.py +150 -107
- msprobe/mindspore/dump/dump_tool_factory.py +50 -35
- msprobe/mindspore/dump/hook_cell/api_registry.py +128 -104
- msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
- msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +994 -925
- msprobe/mindspore/dump/hook_cell/wrap_api.py +121 -0
- msprobe/mindspore/dump/jit_dump.py +96 -56
- msprobe/mindspore/dump/kernel_graph_dump.py +75 -60
- msprobe/mindspore/dump/kernel_kbyk_dump.py +79 -65
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +131 -116
- msprobe/mindspore/free_benchmark/common/config.py +27 -12
- msprobe/mindspore/free_benchmark/common/handler_params.py +32 -17
- msprobe/mindspore/free_benchmark/common/utils.py +85 -71
- msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
- msprobe/mindspore/free_benchmark/decorator/dec_forward.py +57 -42
- msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +122 -107
- msprobe/mindspore/free_benchmark/handler/base_handler.py +105 -90
- msprobe/mindspore/free_benchmark/handler/check_handler.py +56 -41
- msprobe/mindspore/free_benchmark/handler/fix_handler.py +51 -36
- msprobe/mindspore/free_benchmark/handler/handler_factory.py +36 -21
- msprobe/mindspore/free_benchmark/perturbation/add_noise.py +82 -67
- msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +36 -21
- msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +78 -63
- msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +77 -0
- msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +49 -34
- msprobe/mindspore/free_benchmark/perturbation/no_change.py +27 -12
- msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +44 -27
- msprobe/mindspore/free_benchmark/self_check_tool_factory.py +48 -33
- msprobe/mindspore/grad_probe/global_context.py +100 -91
- msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
- msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
- msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
- msprobe/mindspore/grad_probe/hook.py +94 -92
- msprobe/mindspore/grad_probe/utils.py +29 -28
- msprobe/mindspore/ms_config.py +128 -126
- msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +60 -45
- msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +49 -34
- msprobe/mindspore/runtime.py +4 -4
- msprobe/mindspore/service.py +297 -354
- msprobe/mindspore/task_handler_factory.py +24 -24
- msprobe/msprobe.py +105 -107
- msprobe/pytorch/__init__.py +23 -4
- msprobe/pytorch/api_accuracy_checker/common/config.py +70 -55
- msprobe/pytorch/api_accuracy_checker/common/utils.py +246 -165
- msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +230 -213
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +632 -581
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +416 -381
- msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +90 -73
- msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +265 -244
- msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
- msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +370 -332
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +221 -199
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +150 -134
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +518 -581
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +213 -74
- msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +218 -202
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +370 -324
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +227 -204
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +244 -218
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
- msprobe/pytorch/bench_functions/__init__.py +30 -15
- msprobe/pytorch/bench_functions/apply_adam_w.py +43 -28
- msprobe/pytorch/bench_functions/confusion_transpose.py +34 -19
- msprobe/pytorch/bench_functions/fast_gelu.py +70 -55
- msprobe/pytorch/bench_functions/layer_norm_eval.py +21 -6
- msprobe/pytorch/bench_functions/linear.py +27 -12
- msprobe/pytorch/bench_functions/matmul_backward.py +63 -48
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +538 -421
- msprobe/pytorch/bench_functions/rms_norm.py +30 -15
- msprobe/pytorch/bench_functions/rotary_mul.py +71 -52
- msprobe/pytorch/bench_functions/scaled_mask_softmax.py +41 -26
- msprobe/pytorch/bench_functions/swiglu.py +70 -55
- msprobe/pytorch/common/__init__.py +17 -2
- msprobe/pytorch/common/compare_script.template +14 -14
- msprobe/pytorch/common/log.py +33 -32
- msprobe/pytorch/common/parse_json.py +54 -39
- msprobe/pytorch/common/utils.py +310 -300
- msprobe/pytorch/compare/distributed_compare.py +66 -66
- msprobe/pytorch/compare/mapping.yaml +607 -607
- msprobe/pytorch/compare/match.py +49 -33
- msprobe/pytorch/compare/pt_compare.py +82 -40
- msprobe/pytorch/debugger/debugger_config.py +108 -95
- msprobe/pytorch/debugger/precision_debugger.py +173 -125
- msprobe/pytorch/free_benchmark/__init__.py +23 -8
- msprobe/pytorch/free_benchmark/common/constant.py +70 -70
- msprobe/pytorch/free_benchmark/common/counter.py +71 -71
- msprobe/pytorch/free_benchmark/common/enums.py +65 -37
- msprobe/pytorch/free_benchmark/common/params.py +144 -129
- msprobe/pytorch/free_benchmark/common/utils.py +118 -102
- msprobe/pytorch/free_benchmark/compare/grad_saver.py +200 -179
- msprobe/pytorch/free_benchmark/compare/single_benchmark.py +119 -104
- msprobe/pytorch/free_benchmark/main.py +120 -105
- msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +28 -13
- msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +56 -41
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +105 -90
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +119 -104
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +87 -63
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +83 -68
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +43 -28
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +60 -45
- msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +34 -19
- msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +256 -217
- msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +54 -39
- msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +38 -23
- msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +45 -30
- msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +185 -170
- msprobe/pytorch/function_factory.py +91 -75
- msprobe/pytorch/functional/module_dump.py +84 -0
- msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
- msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
- msprobe/pytorch/hook_module/__init__.py +16 -1
- msprobe/pytorch/hook_module/api_registry.py +166 -161
- msprobe/pytorch/hook_module/hook_module.py +118 -120
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
- msprobe/pytorch/hook_module/utils.py +28 -29
- msprobe/pytorch/hook_module/wrap_aten.py +111 -110
- msprobe/pytorch/hook_module/wrap_distributed.py +77 -78
- msprobe/pytorch/hook_module/wrap_functional.py +104 -105
- msprobe/pytorch/hook_module/wrap_npu_custom.py +85 -84
- msprobe/pytorch/hook_module/wrap_tensor.py +69 -71
- msprobe/pytorch/hook_module/wrap_torch.py +84 -86
- msprobe/pytorch/hook_module/wrap_vf.py +60 -62
- msprobe/pytorch/module_processer.py +153 -138
- msprobe/pytorch/online_dispatch/__init__.py +20 -20
- msprobe/pytorch/online_dispatch/compare.py +235 -236
- msprobe/pytorch/online_dispatch/dispatch.py +271 -271
- msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
- msprobe/pytorch/online_dispatch/single_compare.py +391 -391
- msprobe/pytorch/online_dispatch/torch_ops_config.yaml +57 -49
- msprobe/pytorch/online_dispatch/utils.py +127 -146
- msprobe/pytorch/parse.py +19 -4
- msprobe/pytorch/parse_tool/cli.py +31 -32
- msprobe/pytorch/parse_tool/lib/compare.py +259 -271
- msprobe/pytorch/parse_tool/lib/config.py +52 -52
- msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
- msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
- msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
- msprobe/pytorch/parse_tool/lib/parse_tool.py +161 -158
- msprobe/pytorch/parse_tool/lib/utils.py +320 -321
- msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
- msprobe/pytorch/pt_config.py +317 -187
- msprobe/pytorch/service.py +311 -252
- mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
- msprobe/config/README.md +0 -539
- msprobe/mindspore/doc/compare.md +0 -58
- msprobe/mindspore/doc/dump.md +0 -217
- msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
- msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
- msprobe/pytorch/doc/FAQ.md +0 -193
- msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
- msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
- msprobe/pytorch/doc/dump.md +0 -260
- msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
- msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
- msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
- msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
- msprobe/pytorch/doc/run_overflow_check.md +0 -25
- msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
- msprobe/pytorch/functional/data_processor.py +0 -0
- msprobe/pytorch/functional/dump_module.py +0 -39
- {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
- /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
- /msprobe/{config → docs}/img/free_benchmark.png +0 -0
- /msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
- /msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
- /msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
- /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
msprobe/core/advisor/advisor.py
CHANGED
|
@@ -1,124 +1,124 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
import
|
|
19
|
-
|
|
20
|
-
from msprobe.core.
|
|
21
|
-
from msprobe.core.
|
|
22
|
-
from msprobe.core.common.
|
|
23
|
-
from msprobe.core.common.
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
-
self.
|
|
36
|
-
|
|
37
|
-
@staticmethod
|
|
38
|
-
def deterministic_advisor(message, node_name):
|
|
39
|
-
for api_name in AdvisorConst.NEED_DETERMINISTIC_API:
|
|
40
|
-
if api_name in node_name:
|
|
41
|
-
return AdvisorConst.DETERMINISTIC_SUGGEST
|
|
42
|
-
return message
|
|
43
|
-
|
|
44
|
-
@staticmethod
|
|
45
|
-
def batch_norm_advisor(message, node_name):
|
|
46
|
-
if AdvisorConst.FUNC_BATCH_NORM in node_name and AdvisorConst.FORWARD_INPUT_1 in node_name:
|
|
47
|
-
message = AdvisorConst.BATCH_NORM_SUGGEST
|
|
48
|
-
return message
|
|
49
|
-
|
|
50
|
-
def analyze_unmatched(self, analyze_data):
|
|
51
|
-
if self.file_type == Const.ALL:
|
|
52
|
-
accuracy_unmatched = analyze_data[
|
|
53
|
-
analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_UNMATCH]
|
|
54
|
-
else:
|
|
55
|
-
accuracy_unmatched = analyze_data[(analyze_data[CompareConst.NPU_SHAPE] == CompareConst.NAN) |
|
|
56
|
-
(analyze_data[CompareConst.BENCH_SHAPE] == CompareConst.NAN)]
|
|
57
|
-
num_unmatch = len(accuracy_unmatched)
|
|
58
|
-
if num_unmatch != 0:
|
|
59
|
-
for i in range(len(accuracy_unmatched)):
|
|
60
|
-
item = accuracy_unmatched.iloc[i]
|
|
61
|
-
logger.warning("The tensor name matches but the shape or dtype does not match: {}"
|
|
62
|
-
.format(item[CompareConst.NPU_NAME]))
|
|
63
|
-
|
|
64
|
-
def gen_advisor_result(self, pd_data):
|
|
65
|
-
first_failing_data = pd_data.iloc[0]
|
|
66
|
-
node_name = first_failing_data[CompareConst.NPU_NAME]
|
|
67
|
-
index = first_failing_data['index']
|
|
68
|
-
message = self.gen_advisor_message(node_name)
|
|
69
|
-
logger.warning("Find %s accuracy not reached, the line is %s" % (node_name, index))
|
|
70
|
-
result = AdvisorResult(node_name, index, message)
|
|
71
|
-
return result
|
|
72
|
-
|
|
73
|
-
def gen_advisor_message(self, node_name):
|
|
74
|
-
if AdvisorConst.FORWARD in node_name:
|
|
75
|
-
if AdvisorConst.INPUT in node_name:
|
|
76
|
-
message = AdvisorConst.FORWARD_INPUT_SUGGEST
|
|
77
|
-
else:
|
|
78
|
-
message = AdvisorConst.FORWARD_OUTPUT_SUGGEST
|
|
79
|
-
message = self.deterministic_advisor(message, node_name)
|
|
80
|
-
else:
|
|
81
|
-
if AdvisorConst.INPUT in node_name:
|
|
82
|
-
message = AdvisorConst.BACKWARD_INPUT_SUGGEST
|
|
83
|
-
else:
|
|
84
|
-
message = AdvisorConst.BACKWARD_OUTPUT_SUGGEST
|
|
85
|
-
message = self.deterministic_advisor(message, node_name)
|
|
86
|
-
message = self.batch_norm_advisor(message, node_name)
|
|
87
|
-
return message
|
|
88
|
-
|
|
89
|
-
def analysis(self):
|
|
90
|
-
self._check_path_vaild()
|
|
91
|
-
analyze_data = self._parse_input_data()
|
|
92
|
-
logger.info("Start analyzing the comparison result: %s" % self.file_type)
|
|
93
|
-
self.analyze_unmatched(analyze_data)
|
|
94
|
-
if self.file_type == Const.ALL:
|
|
95
|
-
failing_data = analyze_data[analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_NO]
|
|
96
|
-
elif self.file_type == Const.MD5:
|
|
97
|
-
failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.DIFF]
|
|
98
|
-
elif self.file_type == Const.SUMMARY:
|
|
99
|
-
failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.WARNING]
|
|
100
|
-
if failing_data.empty:
|
|
101
|
-
logger.info("All data from api input/output accuracy reached")
|
|
102
|
-
result = AdvisorResult(AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERR_SUGGEST)
|
|
103
|
-
else:
|
|
104
|
-
result = self.gen_advisor_result(failing_data)
|
|
105
|
-
message_list = result.print_advisor_log()
|
|
106
|
-
result.gen_summary_file(self.out_path, message_list)
|
|
107
|
-
|
|
108
|
-
def _parse_input_data(self):
|
|
109
|
-
data_columns = self.input_data.columns.values
|
|
110
|
-
if {CompareConst.ACCURACY, CompareConst.NPU_NAME}.issubset(data_columns):
|
|
111
|
-
self.file_type = Const.ALL
|
|
112
|
-
elif {CompareConst.RESULT, CompareConst.NPU_MD5}.issubset(data_columns):
|
|
113
|
-
self.file_type = Const.MD5
|
|
114
|
-
elif {CompareConst.MAX_DIFF, CompareConst.RESULT}.issubset(data_columns):
|
|
115
|
-
self.file_type = Const.SUMMARY
|
|
116
|
-
else:
|
|
117
|
-
logger.error('Compare result does not meet the required conditions.')
|
|
118
|
-
raise CompareException(CompareException.INVALID_DATA_ERROR)
|
|
119
|
-
df = self.input_data.reset_index()
|
|
120
|
-
return df
|
|
121
|
-
|
|
122
|
-
def _check_path_vaild(self):
|
|
123
|
-
out_path_checker = FileChecker(self.out_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE)
|
|
124
|
-
out_path_checker.common_check()
|
|
1
|
+
# Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
from msprobe.core.advisor.advisor_result import AdvisorResult
|
|
19
|
+
from msprobe.core.advisor.advisor_const import AdvisorConst
|
|
20
|
+
from msprobe.core.common.log import logger
|
|
21
|
+
from msprobe.core.common.utils import CompareException
|
|
22
|
+
from msprobe.core.common.file_utils import FileChecker
|
|
23
|
+
from msprobe.core.common.const import Const, CompareConst, FileCheckConst
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Advisor:
|
|
27
|
+
"""
|
|
28
|
+
Class for generate advisor
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, input_data, out_path="", suffix=""):
|
|
32
|
+
self.input_data = input_data
|
|
33
|
+
self.out_path = os.path.realpath(out_path)
|
|
34
|
+
self.file_type = None
|
|
35
|
+
self.suffix = suffix
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def deterministic_advisor(message, node_name):
|
|
39
|
+
for api_name in AdvisorConst.NEED_DETERMINISTIC_API:
|
|
40
|
+
if api_name in node_name:
|
|
41
|
+
return AdvisorConst.DETERMINISTIC_SUGGEST
|
|
42
|
+
return message
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def batch_norm_advisor(message, node_name):
|
|
46
|
+
if AdvisorConst.FUNC_BATCH_NORM in node_name and AdvisorConst.FORWARD_INPUT_1 in node_name:
|
|
47
|
+
message = AdvisorConst.BATCH_NORM_SUGGEST
|
|
48
|
+
return message
|
|
49
|
+
|
|
50
|
+
def analyze_unmatched(self, analyze_data):
|
|
51
|
+
if self.file_type == Const.ALL:
|
|
52
|
+
accuracy_unmatched = analyze_data[
|
|
53
|
+
analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_UNMATCH]
|
|
54
|
+
else:
|
|
55
|
+
accuracy_unmatched = analyze_data[(analyze_data[CompareConst.NPU_SHAPE] == CompareConst.NAN) |
|
|
56
|
+
(analyze_data[CompareConst.BENCH_SHAPE] == CompareConst.NAN)]
|
|
57
|
+
num_unmatch = len(accuracy_unmatched)
|
|
58
|
+
if num_unmatch != 0:
|
|
59
|
+
for i in range(len(accuracy_unmatched)):
|
|
60
|
+
item = accuracy_unmatched.iloc[i]
|
|
61
|
+
logger.warning("The tensor name matches but the shape or dtype does not match: {}"
|
|
62
|
+
.format(item[CompareConst.NPU_NAME]))
|
|
63
|
+
|
|
64
|
+
def gen_advisor_result(self, pd_data):
|
|
65
|
+
first_failing_data = pd_data.iloc[0]
|
|
66
|
+
node_name = first_failing_data[CompareConst.NPU_NAME]
|
|
67
|
+
index = first_failing_data['index']
|
|
68
|
+
message = self.gen_advisor_message(node_name)
|
|
69
|
+
logger.warning("Find %s accuracy not reached, the line is %s" % (node_name, index))
|
|
70
|
+
result = AdvisorResult(node_name, index, message)
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
def gen_advisor_message(self, node_name):
|
|
74
|
+
if AdvisorConst.FORWARD in node_name:
|
|
75
|
+
if AdvisorConst.INPUT in node_name:
|
|
76
|
+
message = AdvisorConst.FORWARD_INPUT_SUGGEST
|
|
77
|
+
else:
|
|
78
|
+
message = AdvisorConst.FORWARD_OUTPUT_SUGGEST
|
|
79
|
+
message = self.deterministic_advisor(message, node_name)
|
|
80
|
+
else:
|
|
81
|
+
if AdvisorConst.INPUT in node_name:
|
|
82
|
+
message = AdvisorConst.BACKWARD_INPUT_SUGGEST
|
|
83
|
+
else:
|
|
84
|
+
message = AdvisorConst.BACKWARD_OUTPUT_SUGGEST
|
|
85
|
+
message = self.deterministic_advisor(message, node_name)
|
|
86
|
+
message = self.batch_norm_advisor(message, node_name)
|
|
87
|
+
return message
|
|
88
|
+
|
|
89
|
+
def analysis(self):
|
|
90
|
+
self._check_path_vaild()
|
|
91
|
+
analyze_data = self._parse_input_data()
|
|
92
|
+
logger.info("Start analyzing the comparison result: %s" % self.file_type)
|
|
93
|
+
self.analyze_unmatched(analyze_data)
|
|
94
|
+
if self.file_type == Const.ALL:
|
|
95
|
+
failing_data = analyze_data[analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_NO]
|
|
96
|
+
elif self.file_type == Const.MD5:
|
|
97
|
+
failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.DIFF]
|
|
98
|
+
elif self.file_type == Const.SUMMARY:
|
|
99
|
+
failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.WARNING]
|
|
100
|
+
if failing_data.empty:
|
|
101
|
+
logger.info("All data from api input/output accuracy reached")
|
|
102
|
+
result = AdvisorResult(AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERR_SUGGEST)
|
|
103
|
+
else:
|
|
104
|
+
result = self.gen_advisor_result(failing_data)
|
|
105
|
+
message_list = result.print_advisor_log()
|
|
106
|
+
result.gen_summary_file(self.out_path, message_list, suffix=self.suffix)
|
|
107
|
+
|
|
108
|
+
def _parse_input_data(self):
|
|
109
|
+
data_columns = self.input_data.columns.values
|
|
110
|
+
if {CompareConst.ACCURACY, CompareConst.NPU_NAME}.issubset(data_columns):
|
|
111
|
+
self.file_type = Const.ALL
|
|
112
|
+
elif {CompareConst.RESULT, CompareConst.NPU_MD5}.issubset(data_columns):
|
|
113
|
+
self.file_type = Const.MD5
|
|
114
|
+
elif {CompareConst.MAX_DIFF, CompareConst.RESULT}.issubset(data_columns):
|
|
115
|
+
self.file_type = Const.SUMMARY
|
|
116
|
+
else:
|
|
117
|
+
logger.error('Compare result does not meet the required conditions.')
|
|
118
|
+
raise CompareException(CompareException.INVALID_DATA_ERROR)
|
|
119
|
+
df = self.input_data.reset_index()
|
|
120
|
+
return df
|
|
121
|
+
|
|
122
|
+
def _check_path_vaild(self):
|
|
123
|
+
out_path_checker = FileChecker(self.out_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE)
|
|
124
|
+
out_path_checker.common_check()
|
|
@@ -1,59 +1,58 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
"
|
|
45
|
-
"
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
BACKWARD = "backward"
|
|
1
|
+
# Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AdvisorConst:
|
|
18
|
+
"""
|
|
19
|
+
Class for advisor const
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
# text symbol
|
|
23
|
+
NEW_LINE = "\n"
|
|
24
|
+
COLON = ": "
|
|
25
|
+
|
|
26
|
+
# advisor summary key
|
|
27
|
+
SUSPECT_NODES = "Suspect Nodes"
|
|
28
|
+
LINE = "Line"
|
|
29
|
+
ADVISOR_SUGGEST = "Expert Advice"
|
|
30
|
+
|
|
31
|
+
NO_ERROR_API = "NA"
|
|
32
|
+
|
|
33
|
+
# advisor message
|
|
34
|
+
NO_ERR_SUGGEST = "All data in comparison result meets the accuracy requirements."
|
|
35
|
+
FORWARD_INPUT_SUGGEST = "1. Analyze the model to view the input source.\n" \
|
|
36
|
+
"2. Check whether an inplace API causes the output result to overwrite the input result. "\
|
|
37
|
+
"That is, the fault is actually caused by a computation error.\n" \
|
|
38
|
+
"3. The fault may be caused by memory corruption and further analysis is required."
|
|
39
|
+
FORWARD_OUTPUT_SUGGEST = "This is a forward API computation error. Check the computation implementation."
|
|
40
|
+
BACKWARD_INPUT_SUGGEST = "Check whether the forward computation result is affected."
|
|
41
|
+
BACKWARD_OUTPUT_SUGGEST = "This is a backward API computation error. Check the computation implementation."
|
|
42
|
+
BATCH_NORM_SUGGEST = "Torch API batch_norm input not fixed, the following suggestions may fix it:\n" \
|
|
43
|
+
"1. If use torch.nn.functional.batch_norm, you can set parameter training=False.\n" \
|
|
44
|
+
"2. If use torch.nn.BatchNormXXX, you can set parameter affine=False.\n" \
|
|
45
|
+
"3. Use seed_all(mode=True) to enable deterministic computing."
|
|
46
|
+
DETERMINISTIC_SUGGEST = "This torch api may be uncertainty in the calculation, " \
|
|
47
|
+
"can seed_all(mode=True) to enable deterministic computing."
|
|
48
|
+
|
|
49
|
+
FUNC_BATCH_NORM = "Functional_batch_norm"
|
|
50
|
+
FORWARD_INPUT_1 = "forward_input.1"
|
|
51
|
+
NEED_DETERMINISTIC_API = ["conv2d", "conv3d", "matmul", "nll_loss", "layer_norm", "lstm"]
|
|
52
|
+
BATCH_NORM = "batch_norm"
|
|
53
|
+
|
|
54
|
+
# name keyword
|
|
55
|
+
INPUT = "input"
|
|
56
|
+
OUTPUT = "output"
|
|
57
|
+
FORWARD = "forward"
|
|
58
|
+
BACKWARD = "backward"
|
|
@@ -1,58 +1,58 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
import
|
|
19
|
-
|
|
20
|
-
from msprobe.core.
|
|
21
|
-
from msprobe.core.common.
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
self.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
output_file.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
logger.
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
for message in message_list:
|
|
57
|
-
logger.info(message)
|
|
58
|
-
return message_list
|
|
1
|
+
# Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import os
|
|
16
|
+
import time
|
|
17
|
+
|
|
18
|
+
from msprobe.core.advisor.advisor_const import AdvisorConst
|
|
19
|
+
from msprobe.core.common.log import logger
|
|
20
|
+
from msprobe.core.common.const import FileCheckConst
|
|
21
|
+
from msprobe.core.common.file_utils import change_mode, FileOpen
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AdvisorResult:
|
|
25
|
+
"""
|
|
26
|
+
Class for generate advisor result
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, node, line, message):
|
|
30
|
+
self.suspect_node = node
|
|
31
|
+
self.line = line
|
|
32
|
+
self.advisor_message = message
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def gen_summary_file(out_path, message_list, suffix):
|
|
36
|
+
file_name = 'advisor{}_{}.txt'.format(suffix, time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())))
|
|
37
|
+
result_file = os.path.join(out_path, file_name)
|
|
38
|
+
try:
|
|
39
|
+
with FileOpen(result_file, 'w+') as output_file:
|
|
40
|
+
output_file.truncate(0)
|
|
41
|
+
message_list = [message + AdvisorConst.NEW_LINE for message in message_list]
|
|
42
|
+
output_file.writelines(message_list)
|
|
43
|
+
change_mode(result_file, FileCheckConst.DATA_FILE_AUTHORITY)
|
|
44
|
+
except IOError as io_error:
|
|
45
|
+
logger.error("Failed to save %s, the reason is %s." % (result_file, io_error))
|
|
46
|
+
else:
|
|
47
|
+
logger.info("The advisor summary is saved in: %s" % result_file)
|
|
48
|
+
|
|
49
|
+
def print_advisor_log(self):
|
|
50
|
+
logger.info("The summary of the expert advice is as follows: ")
|
|
51
|
+
message_list = [
|
|
52
|
+
AdvisorConst.LINE + AdvisorConst.COLON + str(self.line),
|
|
53
|
+
AdvisorConst.SUSPECT_NODES + AdvisorConst.COLON + self.suspect_node,
|
|
54
|
+
AdvisorConst.ADVISOR_SUGGEST + AdvisorConst.COLON + self.advisor_message
|
|
55
|
+
]
|
|
56
|
+
for message in message_list:
|
|
57
|
+
logger.info(message)
|
|
58
|
+
return message_list
|