mindstudio-probe 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/LICENSE +201 -201
  2. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/METADATA +36 -34
  3. mindstudio_probe-1.1.0.dist-info/RECORD +287 -0
  4. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/WHEEL +1 -1
  5. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/entry_points.txt +1 -0
  6. msprobe/README.md +131 -237
  7. msprobe/__init__.py +16 -1
  8. msprobe/{config/config.json → config.json} +47 -49
  9. msprobe/core/advisor/advisor.py +124 -124
  10. msprobe/core/advisor/advisor_const.py +58 -59
  11. msprobe/core/advisor/advisor_result.py +58 -58
  12. msprobe/core/common/const.py +402 -318
  13. msprobe/core/common/exceptions.py +99 -99
  14. msprobe/core/common/{file_check.py → file_utils.py} +523 -283
  15. msprobe/core/common/inplace_op_checker.py +38 -0
  16. msprobe/core/common/inplace_ops.yaml +251 -0
  17. msprobe/core/common/log.py +86 -69
  18. msprobe/core/common/utils.py +371 -616
  19. msprobe/core/common_config.py +78 -71
  20. msprobe/core/compare/acc_compare.py +472 -298
  21. msprobe/core/compare/check.py +180 -95
  22. msprobe/core/compare/compare_cli.py +69 -49
  23. msprobe/core/compare/highlight.py +259 -222
  24. msprobe/core/compare/multiprocessing_compute.py +174 -149
  25. msprobe/core/compare/npy_compare.py +310 -295
  26. msprobe/core/compare/utils.py +464 -429
  27. msprobe/core/data_dump/data_collector.py +153 -144
  28. msprobe/core/data_dump/data_processor/base.py +337 -293
  29. msprobe/core/data_dump/data_processor/factory.py +76 -59
  30. msprobe/core/data_dump/data_processor/mindspore_processor.py +192 -198
  31. msprobe/core/data_dump/data_processor/pytorch_processor.py +383 -389
  32. msprobe/core/data_dump/json_writer.py +117 -116
  33. msprobe/core/data_dump/scope.py +194 -178
  34. msprobe/core/grad_probe/constant.py +74 -70
  35. msprobe/core/grad_probe/grad_compare.py +170 -175
  36. msprobe/core/grad_probe/utils.py +77 -52
  37. msprobe/docs/01.installation.md +99 -0
  38. msprobe/docs/02.config_introduction.md +137 -0
  39. msprobe/docs/03.config_examples.md +237 -0
  40. msprobe/docs/04.acl_config_examples.md +78 -0
  41. msprobe/docs/05.data_dump_PyTorch.md +326 -0
  42. msprobe/docs/06.data_dump_MindSpore.md +285 -0
  43. msprobe/docs/07.accuracy_checker_PyTorch.md +297 -0
  44. msprobe/docs/08.accuracy_checker_online_PyTorch.md +238 -0
  45. msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
  46. msprobe/docs/10.accuracy_compare_PyTorch.md +327 -0
  47. msprobe/docs/11.accuracy_compare_MindSpore.md +333 -0
  48. msprobe/docs/12.overflow_check_PyTorch.md +79 -0
  49. msprobe/docs/13.overflow_check_MindSpore.md +31 -0
  50. msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
  51. msprobe/docs/15.free_benchmarking_PyTorch.md +170 -0
  52. msprobe/docs/16.free_benchmarking_MindSpore.md +140 -0
  53. msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +205 -207
  54. msprobe/{pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md → docs/18.online_dispatch.md} +89 -90
  55. msprobe/docs/FAQ.md +189 -0
  56. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
  57. msprobe/docs/img/free_benchmark_framework.png +0 -0
  58. msprobe/docs/img/ms_dump.png +0 -0
  59. msprobe/docs/img/ms_layer.png +0 -0
  60. msprobe/docs/img/pt_dump.png +0 -0
  61. msprobe/mindspore/__init__.py +2 -1
  62. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +278 -245
  63. msprobe/mindspore/api_accuracy_checker/api_info.py +76 -69
  64. msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
  65. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
  66. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
  67. msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
  68. msprobe/mindspore/api_accuracy_checker/main.py +8 -15
  69. msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
  70. msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
  71. msprobe/mindspore/cell_processor.py +58 -34
  72. msprobe/mindspore/common/const.py +108 -87
  73. msprobe/mindspore/common/log.py +37 -37
  74. msprobe/mindspore/common/utils.py +97 -57
  75. msprobe/mindspore/compare/distributed_compare.py +62 -75
  76. msprobe/mindspore/compare/layer_mapping.py +146 -0
  77. msprobe/mindspore/compare/modify_mapping.py +107 -0
  78. msprobe/mindspore/compare/ms_compare.py +357 -117
  79. msprobe/mindspore/compare/ms_graph_compare.py +364 -317
  80. msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
  81. msprobe/mindspore/debugger/debugger_config.py +69 -74
  82. msprobe/mindspore/debugger/precision_debugger.py +150 -107
  83. msprobe/mindspore/dump/dump_tool_factory.py +50 -35
  84. msprobe/mindspore/dump/hook_cell/api_registry.py +128 -104
  85. msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
  86. msprobe/mindspore/dump/hook_cell/primitive_hooks.py +206 -0
  87. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +994 -925
  88. msprobe/mindspore/dump/hook_cell/wrap_api.py +121 -0
  89. msprobe/mindspore/dump/jit_dump.py +96 -56
  90. msprobe/mindspore/dump/kernel_graph_dump.py +75 -60
  91. msprobe/mindspore/dump/kernel_kbyk_dump.py +79 -65
  92. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +131 -116
  93. msprobe/mindspore/free_benchmark/common/config.py +27 -12
  94. msprobe/mindspore/free_benchmark/common/handler_params.py +32 -17
  95. msprobe/mindspore/free_benchmark/common/utils.py +85 -71
  96. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
  97. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +57 -42
  98. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +122 -107
  99. msprobe/mindspore/free_benchmark/handler/base_handler.py +105 -90
  100. msprobe/mindspore/free_benchmark/handler/check_handler.py +56 -41
  101. msprobe/mindspore/free_benchmark/handler/fix_handler.py +51 -36
  102. msprobe/mindspore/free_benchmark/handler/handler_factory.py +36 -21
  103. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +82 -67
  104. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +36 -21
  105. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +78 -63
  106. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +77 -0
  107. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +49 -34
  108. msprobe/mindspore/free_benchmark/perturbation/no_change.py +27 -12
  109. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +44 -27
  110. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +48 -33
  111. msprobe/mindspore/grad_probe/global_context.py +100 -91
  112. msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
  113. msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
  114. msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
  115. msprobe/mindspore/grad_probe/hook.py +94 -92
  116. msprobe/mindspore/grad_probe/utils.py +29 -28
  117. msprobe/mindspore/ms_config.py +128 -126
  118. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +60 -45
  119. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +49 -34
  120. msprobe/mindspore/runtime.py +4 -4
  121. msprobe/mindspore/service.py +297 -354
  122. msprobe/mindspore/task_handler_factory.py +24 -24
  123. msprobe/msprobe.py +105 -107
  124. msprobe/pytorch/__init__.py +23 -4
  125. msprobe/pytorch/api_accuracy_checker/common/config.py +70 -55
  126. msprobe/pytorch/api_accuracy_checker/common/utils.py +246 -165
  127. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +230 -213
  128. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +632 -581
  129. msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
  130. msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
  131. msprobe/pytorch/api_accuracy_checker/compare/compare.py +416 -381
  132. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +90 -73
  133. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +265 -244
  134. msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
  135. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +370 -332
  136. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +221 -199
  137. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +150 -134
  138. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +518 -581
  139. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +213 -74
  140. msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
  141. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +218 -202
  142. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +370 -324
  143. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +227 -204
  144. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +110 -0
  145. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +244 -218
  146. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
  147. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
  148. msprobe/pytorch/bench_functions/__init__.py +30 -15
  149. msprobe/pytorch/bench_functions/apply_adam_w.py +43 -28
  150. msprobe/pytorch/bench_functions/confusion_transpose.py +34 -19
  151. msprobe/pytorch/bench_functions/fast_gelu.py +70 -55
  152. msprobe/pytorch/bench_functions/layer_norm_eval.py +21 -6
  153. msprobe/pytorch/bench_functions/linear.py +27 -12
  154. msprobe/pytorch/bench_functions/matmul_backward.py +63 -48
  155. msprobe/pytorch/bench_functions/npu_fusion_attention.py +538 -421
  156. msprobe/pytorch/bench_functions/rms_norm.py +30 -15
  157. msprobe/pytorch/bench_functions/rotary_mul.py +71 -52
  158. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +41 -26
  159. msprobe/pytorch/bench_functions/swiglu.py +70 -55
  160. msprobe/pytorch/common/__init__.py +17 -2
  161. msprobe/pytorch/common/compare_script.template +14 -14
  162. msprobe/pytorch/common/log.py +33 -32
  163. msprobe/pytorch/common/parse_json.py +54 -39
  164. msprobe/pytorch/common/utils.py +310 -300
  165. msprobe/pytorch/compare/distributed_compare.py +66 -66
  166. msprobe/pytorch/compare/mapping.yaml +607 -607
  167. msprobe/pytorch/compare/match.py +49 -33
  168. msprobe/pytorch/compare/pt_compare.py +82 -40
  169. msprobe/pytorch/debugger/debugger_config.py +108 -95
  170. msprobe/pytorch/debugger/precision_debugger.py +173 -125
  171. msprobe/pytorch/free_benchmark/__init__.py +23 -8
  172. msprobe/pytorch/free_benchmark/common/constant.py +70 -70
  173. msprobe/pytorch/free_benchmark/common/counter.py +71 -71
  174. msprobe/pytorch/free_benchmark/common/enums.py +65 -37
  175. msprobe/pytorch/free_benchmark/common/params.py +144 -129
  176. msprobe/pytorch/free_benchmark/common/utils.py +118 -102
  177. msprobe/pytorch/free_benchmark/compare/grad_saver.py +200 -179
  178. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +119 -104
  179. msprobe/pytorch/free_benchmark/main.py +120 -105
  180. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +28 -13
  181. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +56 -41
  182. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +105 -90
  183. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +119 -104
  184. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +87 -63
  185. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +83 -68
  186. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +43 -28
  187. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +60 -45
  188. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +34 -19
  189. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +256 -217
  190. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +54 -39
  191. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +38 -23
  192. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +45 -30
  193. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +185 -170
  194. msprobe/pytorch/function_factory.py +91 -75
  195. msprobe/pytorch/functional/module_dump.py +84 -0
  196. msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
  197. msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
  198. msprobe/pytorch/hook_module/__init__.py +16 -1
  199. msprobe/pytorch/hook_module/api_registry.py +166 -161
  200. msprobe/pytorch/hook_module/hook_module.py +118 -120
  201. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
  202. msprobe/pytorch/hook_module/utils.py +28 -29
  203. msprobe/pytorch/hook_module/wrap_aten.py +111 -110
  204. msprobe/pytorch/hook_module/wrap_distributed.py +77 -78
  205. msprobe/pytorch/hook_module/wrap_functional.py +104 -105
  206. msprobe/pytorch/hook_module/wrap_npu_custom.py +85 -84
  207. msprobe/pytorch/hook_module/wrap_tensor.py +69 -71
  208. msprobe/pytorch/hook_module/wrap_torch.py +84 -86
  209. msprobe/pytorch/hook_module/wrap_vf.py +60 -62
  210. msprobe/pytorch/module_processer.py +153 -138
  211. msprobe/pytorch/online_dispatch/__init__.py +20 -20
  212. msprobe/pytorch/online_dispatch/compare.py +235 -236
  213. msprobe/pytorch/online_dispatch/dispatch.py +271 -271
  214. msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
  215. msprobe/pytorch/online_dispatch/single_compare.py +391 -391
  216. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +57 -49
  217. msprobe/pytorch/online_dispatch/utils.py +127 -146
  218. msprobe/pytorch/parse.py +19 -4
  219. msprobe/pytorch/parse_tool/cli.py +31 -32
  220. msprobe/pytorch/parse_tool/lib/compare.py +259 -271
  221. msprobe/pytorch/parse_tool/lib/config.py +52 -52
  222. msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
  223. msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
  224. msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
  225. msprobe/pytorch/parse_tool/lib/parse_tool.py +161 -158
  226. msprobe/pytorch/parse_tool/lib/utils.py +320 -321
  227. msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
  228. msprobe/pytorch/pt_config.py +317 -187
  229. msprobe/pytorch/service.py +311 -252
  230. mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
  231. msprobe/config/README.md +0 -539
  232. msprobe/mindspore/doc/compare.md +0 -58
  233. msprobe/mindspore/doc/dump.md +0 -217
  234. msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
  235. msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
  236. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
  237. msprobe/pytorch/doc/FAQ.md +0 -193
  238. msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
  239. msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
  240. msprobe/pytorch/doc/dump.md +0 -260
  241. msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
  242. msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
  243. msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
  244. msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
  245. msprobe/pytorch/doc/run_overflow_check.md +0 -25
  246. msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
  247. msprobe/pytorch/functional/data_processor.py +0 -0
  248. msprobe/pytorch/functional/dump_module.py +0 -39
  249. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.1.0.dist-info}/top_level.txt +0 -0
  250. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
  251. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
  252. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
  253. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
  254. /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
  255. /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
  256. /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
  257. /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
  258. /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
  259. /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
  260. /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
  261. /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
  262. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
  263. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
  264. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
  265. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
  266. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
  267. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
  268. /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
  269. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
  270. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
  271. /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
  272. /msprobe/{config → docs}/img/free_benchmark.png +0 -0
  273. /msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
  274. /msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
  275. /msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
  276. /msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
  277. /msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
  278. /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
@@ -1,124 +1,124 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- # Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved.
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- """
17
-
18
- import os
19
-
20
- from msprobe.core.advisor.advisor_result import AdvisorResult
21
- from msprobe.core.advisor.advisor_const import AdvisorConst
22
- from msprobe.core.common.log import logger
23
- from msprobe.core.common.utils import CompareException
24
- from msprobe.core.common.file_check import FileChecker
25
- from msprobe.core.common.const import Const, CompareConst, FileCheckConst
26
-
27
- class Advisor:
28
- """
29
- Class for generate advisor
30
- """
31
-
32
- def __init__(self, input_data, out_path=""):
33
- self.input_data = input_data
34
- self.out_path = os.path.realpath(out_path)
35
- self.file_type = None
36
-
37
- @staticmethod
38
- def deterministic_advisor(message, node_name):
39
- for api_name in AdvisorConst.NEED_DETERMINISTIC_API:
40
- if api_name in node_name:
41
- return AdvisorConst.DETERMINISTIC_SUGGEST
42
- return message
43
-
44
- @staticmethod
45
- def batch_norm_advisor(message, node_name):
46
- if AdvisorConst.FUNC_BATCH_NORM in node_name and AdvisorConst.FORWARD_INPUT_1 in node_name:
47
- message = AdvisorConst.BATCH_NORM_SUGGEST
48
- return message
49
-
50
- def analyze_unmatched(self, analyze_data):
51
- if self.file_type == Const.ALL:
52
- accuracy_unmatched = analyze_data[
53
- analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_UNMATCH]
54
- else:
55
- accuracy_unmatched = analyze_data[(analyze_data[CompareConst.NPU_SHAPE] == CompareConst.NAN) |
56
- (analyze_data[CompareConst.BENCH_SHAPE] == CompareConst.NAN)]
57
- num_unmatch = len(accuracy_unmatched)
58
- if num_unmatch != 0:
59
- for i in range(len(accuracy_unmatched)):
60
- item = accuracy_unmatched.iloc[i]
61
- logger.warning("The tensor name matches but the shape or dtype does not match: {}"
62
- .format(item[CompareConst.NPU_NAME]))
63
-
64
- def gen_advisor_result(self, pd_data):
65
- first_failing_data = pd_data.iloc[0]
66
- node_name = first_failing_data[CompareConst.NPU_NAME]
67
- index = first_failing_data['index']
68
- message = self.gen_advisor_message(node_name)
69
- logger.warning("Find %s accuracy not reached, the line is %s" % (node_name, index))
70
- result = AdvisorResult(node_name, index, message)
71
- return result
72
-
73
- def gen_advisor_message(self, node_name):
74
- if AdvisorConst.FORWARD in node_name:
75
- if AdvisorConst.INPUT in node_name:
76
- message = AdvisorConst.FORWARD_INPUT_SUGGEST
77
- else:
78
- message = AdvisorConst.FORWARD_OUTPUT_SUGGEST
79
- message = self.deterministic_advisor(message, node_name)
80
- else:
81
- if AdvisorConst.INPUT in node_name:
82
- message = AdvisorConst.BACKWARD_INPUT_SUGGEST
83
- else:
84
- message = AdvisorConst.BACKWARD_OUTPUT_SUGGEST
85
- message = self.deterministic_advisor(message, node_name)
86
- message = self.batch_norm_advisor(message, node_name)
87
- return message
88
-
89
- def analysis(self):
90
- self._check_path_vaild()
91
- analyze_data = self._parse_input_data()
92
- logger.info("Start analyzing the comparison result: %s" % self.file_type)
93
- self.analyze_unmatched(analyze_data)
94
- if self.file_type == Const.ALL:
95
- failing_data = analyze_data[analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_NO]
96
- elif self.file_type == Const.MD5:
97
- failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.DIFF]
98
- elif self.file_type == Const.SUMMARY:
99
- failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.WARNING]
100
- if failing_data.empty:
101
- logger.info("All data from api input/output accuracy reached")
102
- result = AdvisorResult(AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERR_SUGGEST)
103
- else:
104
- result = self.gen_advisor_result(failing_data)
105
- message_list = result.print_advisor_log()
106
- result.gen_summary_file(self.out_path, message_list)
107
-
108
- def _parse_input_data(self):
109
- data_columns = self.input_data.columns.values
110
- if {CompareConst.ACCURACY, CompareConst.NPU_NAME}.issubset(data_columns):
111
- self.file_type = Const.ALL
112
- elif {CompareConst.RESULT, CompareConst.NPU_MD5}.issubset(data_columns):
113
- self.file_type = Const.MD5
114
- elif {CompareConst.MAX_DIFF, CompareConst.RESULT}.issubset(data_columns):
115
- self.file_type = Const.SUMMARY
116
- else:
117
- logger.error('Compare result does not meet the required conditions.')
118
- raise CompareException(CompareException.INVALID_DATA_ERROR)
119
- df = self.input_data.reset_index()
120
- return df
121
-
122
- def _check_path_vaild(self):
123
- out_path_checker = FileChecker(self.out_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE)
124
- out_path_checker.common_check()
1
+ # Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+
18
+ from msprobe.core.advisor.advisor_result import AdvisorResult
19
+ from msprobe.core.advisor.advisor_const import AdvisorConst
20
+ from msprobe.core.common.log import logger
21
+ from msprobe.core.common.utils import CompareException
22
+ from msprobe.core.common.file_utils import FileChecker
23
+ from msprobe.core.common.const import Const, CompareConst, FileCheckConst
24
+
25
+
26
+ class Advisor:
27
+ """
28
+ Class for generate advisor
29
+ """
30
+
31
+ def __init__(self, input_data, out_path="", suffix=""):
32
+ self.input_data = input_data
33
+ self.out_path = os.path.realpath(out_path)
34
+ self.file_type = None
35
+ self.suffix = suffix
36
+
37
+ @staticmethod
38
+ def deterministic_advisor(message, node_name):
39
+ for api_name in AdvisorConst.NEED_DETERMINISTIC_API:
40
+ if api_name in node_name:
41
+ return AdvisorConst.DETERMINISTIC_SUGGEST
42
+ return message
43
+
44
+ @staticmethod
45
+ def batch_norm_advisor(message, node_name):
46
+ if AdvisorConst.FUNC_BATCH_NORM in node_name and AdvisorConst.FORWARD_INPUT_1 in node_name:
47
+ message = AdvisorConst.BATCH_NORM_SUGGEST
48
+ return message
49
+
50
+ def analyze_unmatched(self, analyze_data):
51
+ if self.file_type == Const.ALL:
52
+ accuracy_unmatched = analyze_data[
53
+ analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_UNMATCH]
54
+ else:
55
+ accuracy_unmatched = analyze_data[(analyze_data[CompareConst.NPU_SHAPE] == CompareConst.NAN) |
56
+ (analyze_data[CompareConst.BENCH_SHAPE] == CompareConst.NAN)]
57
+ num_unmatch = len(accuracy_unmatched)
58
+ if num_unmatch != 0:
59
+ for i in range(len(accuracy_unmatched)):
60
+ item = accuracy_unmatched.iloc[i]
61
+ logger.warning("The tensor name matches but the shape or dtype does not match: {}"
62
+ .format(item[CompareConst.NPU_NAME]))
63
+
64
+ def gen_advisor_result(self, pd_data):
65
+ first_failing_data = pd_data.iloc[0]
66
+ node_name = first_failing_data[CompareConst.NPU_NAME]
67
+ index = first_failing_data['index']
68
+ message = self.gen_advisor_message(node_name)
69
+ logger.warning("Find %s accuracy not reached, the line is %s" % (node_name, index))
70
+ result = AdvisorResult(node_name, index, message)
71
+ return result
72
+
73
+ def gen_advisor_message(self, node_name):
74
+ if AdvisorConst.FORWARD in node_name:
75
+ if AdvisorConst.INPUT in node_name:
76
+ message = AdvisorConst.FORWARD_INPUT_SUGGEST
77
+ else:
78
+ message = AdvisorConst.FORWARD_OUTPUT_SUGGEST
79
+ message = self.deterministic_advisor(message, node_name)
80
+ else:
81
+ if AdvisorConst.INPUT in node_name:
82
+ message = AdvisorConst.BACKWARD_INPUT_SUGGEST
83
+ else:
84
+ message = AdvisorConst.BACKWARD_OUTPUT_SUGGEST
85
+ message = self.deterministic_advisor(message, node_name)
86
+ message = self.batch_norm_advisor(message, node_name)
87
+ return message
88
+
89
+ def analysis(self):
90
+ self._check_path_vaild()
91
+ analyze_data = self._parse_input_data()
92
+ logger.info("Start analyzing the comparison result: %s" % self.file_type)
93
+ self.analyze_unmatched(analyze_data)
94
+ if self.file_type == Const.ALL:
95
+ failing_data = analyze_data[analyze_data[CompareConst.ACCURACY] == CompareConst.ACCURACY_CHECK_NO]
96
+ elif self.file_type == Const.MD5:
97
+ failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.DIFF]
98
+ elif self.file_type == Const.SUMMARY:
99
+ failing_data = analyze_data[analyze_data[CompareConst.RESULT] == CompareConst.WARNING]
100
+ if failing_data.empty:
101
+ logger.info("All data from api input/output accuracy reached")
102
+ result = AdvisorResult(AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERROR_API, AdvisorConst.NO_ERR_SUGGEST)
103
+ else:
104
+ result = self.gen_advisor_result(failing_data)
105
+ message_list = result.print_advisor_log()
106
+ result.gen_summary_file(self.out_path, message_list, suffix=self.suffix)
107
+
108
+ def _parse_input_data(self):
109
+ data_columns = self.input_data.columns.values
110
+ if {CompareConst.ACCURACY, CompareConst.NPU_NAME}.issubset(data_columns):
111
+ self.file_type = Const.ALL
112
+ elif {CompareConst.RESULT, CompareConst.NPU_MD5}.issubset(data_columns):
113
+ self.file_type = Const.MD5
114
+ elif {CompareConst.MAX_DIFF, CompareConst.RESULT}.issubset(data_columns):
115
+ self.file_type = Const.SUMMARY
116
+ else:
117
+ logger.error('Compare result does not meet the required conditions.')
118
+ raise CompareException(CompareException.INVALID_DATA_ERROR)
119
+ df = self.input_data.reset_index()
120
+ return df
121
+
122
+ def _check_path_vaild(self):
123
+ out_path_checker = FileChecker(self.out_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE)
124
+ out_path_checker.common_check()
@@ -1,59 +1,58 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- # Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved.
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- """
17
-
18
-
19
- class AdvisorConst:
20
- """
21
- Class for advisor const
22
- """
23
-
24
- # text symbol
25
- NEW_LINE = "\n"
26
- COLON = ": "
27
-
28
- # advisor summary key
29
- SUSPECT_NODES = "Suspect Nodes"
30
- LINE = "Line"
31
- ADVISOR_SUGGEST = "Expert Advice"
32
-
33
- NO_ERROR_API = "NA"
34
-
35
- # advisor message
36
- NO_ERR_SUGGEST = "All data in comparison result meets the accuracy requirements."
37
- FORWARD_INPUT_SUGGEST = "1. Analyze the model to view the input source.\n" \
38
- "2. Check whether an inplace API causes the output result to overwrite the input result. That is, the fault is actually caused by a computation error.\n" \
39
- "3. The fault may be caused by memory corruption and further analysis is required."
40
- FORWARD_OUTPUT_SUGGEST = "This is a forward API computation error. Check the computation implementation."
41
- BACKWARD_INPUT_SUGGEST = "Check whether the forward computation result is affected."
42
- BACKWARD_OUTPUT_SUGGEST = "This is a backward API computation error. Check the computation implementation."
43
- BATCH_NORM_SUGGEST = "Torch API batch_norm input not fixed, the following suggestions may fix it:\n" \
44
- "1. If use torch.nn.functional.batch_norm, you can set parameter training=False.\n" \
45
- "2. If use torch.nn.BatchNormXXX, you can set parameter affine=False.\n" \
46
- "3. Use seed_all(mode=True) to enable deterministic computing."
47
- DETERMINISTIC_SUGGEST = "This torch api may be uncertainty in the calculation, " \
48
- "can seed_all(mode=True) to enable deterministic computing."
49
-
50
- FUNC_BATCH_NORM = "Functional_batch_norm"
51
- FORWARD_INPUT_1 = "forward_input.1"
52
- NEED_DETERMINISTIC_API = ["conv2d", "conv3d", "matmul", "nll_loss", "layer_norm", "lstm"]
53
- BATCH_NORM = "batch_norm"
54
-
55
- # name keyword
56
- INPUT = "input"
57
- OUTPUT = "output"
58
- FORWARD = "forward"
59
- BACKWARD = "backward"
1
+ # Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+
17
+ class AdvisorConst:
18
+ """
19
+ Class for advisor const
20
+ """
21
+
22
+ # text symbol
23
+ NEW_LINE = "\n"
24
+ COLON = ": "
25
+
26
+ # advisor summary key
27
+ SUSPECT_NODES = "Suspect Nodes"
28
+ LINE = "Line"
29
+ ADVISOR_SUGGEST = "Expert Advice"
30
+
31
+ NO_ERROR_API = "NA"
32
+
33
+ # advisor message
34
+ NO_ERR_SUGGEST = "All data in comparison result meets the accuracy requirements."
35
+ FORWARD_INPUT_SUGGEST = "1. Analyze the model to view the input source.\n" \
36
+ "2. Check whether an inplace API causes the output result to overwrite the input result. "\
37
+ "That is, the fault is actually caused by a computation error.\n" \
38
+ "3. The fault may be caused by memory corruption and further analysis is required."
39
+ FORWARD_OUTPUT_SUGGEST = "This is a forward API computation error. Check the computation implementation."
40
+ BACKWARD_INPUT_SUGGEST = "Check whether the forward computation result is affected."
41
+ BACKWARD_OUTPUT_SUGGEST = "This is a backward API computation error. Check the computation implementation."
42
+ BATCH_NORM_SUGGEST = "Torch API batch_norm input not fixed, the following suggestions may fix it:\n" \
43
+ "1. If use torch.nn.functional.batch_norm, you can set parameter training=False.\n" \
44
+ "2. If use torch.nn.BatchNormXXX, you can set parameter affine=False.\n" \
45
+ "3. Use seed_all(mode=True) to enable deterministic computing."
46
+ DETERMINISTIC_SUGGEST = "This torch api may be uncertainty in the calculation, " \
47
+ "can seed_all(mode=True) to enable deterministic computing."
48
+
49
+ FUNC_BATCH_NORM = "Functional_batch_norm"
50
+ FORWARD_INPUT_1 = "forward_input.1"
51
+ NEED_DETERMINISTIC_API = ["conv2d", "conv3d", "matmul", "nll_loss", "layer_norm", "lstm"]
52
+ BATCH_NORM = "batch_norm"
53
+
54
+ # name keyword
55
+ INPUT = "input"
56
+ OUTPUT = "output"
57
+ FORWARD = "forward"
58
+ BACKWARD = "backward"
@@ -1,58 +1,58 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- # Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved.
5
- # Licensed under the Apache License, Version 2.0 (the "License");
6
- # you may not use this file except in compliance with the License.
7
- # You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- """
17
- import os
18
- import time
19
-
20
- from msprobe.core.advisor.advisor_const import AdvisorConst
21
- from msprobe.core.common.log import logger
22
- from msprobe.core.common.const import Const, FileCheckConst
23
- from msprobe.core.common.file_check import change_mode
24
-
25
-
26
- class AdvisorResult:
27
- """
28
- Class for generate advisor result
29
- """
30
-
31
- def __init__(self, node, line, message):
32
- self.suspect_node = node
33
- self.line = line
34
- self.advisor_message = message
35
-
36
- @staticmethod
37
- def gen_summary_file(out_path, message_list):
38
- file_name = 'advisor_{}.txt'.format(time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())))
39
- result_file = os.path.join(out_path, file_name)
40
- try:
41
- with os.fdopen(os.open(result_file, Const.WRITE_FLAGS, Const.WRITE_MODES), 'w+') as output_file:
42
- output_file.truncate(0)
43
- message_list = [message + AdvisorConst.NEW_LINE for message in message_list]
44
- output_file.writelines(message_list)
45
- change_mode(result_file, FileCheckConst.DATA_FILE_AUTHORITY)
46
- except IOError as io_error:
47
- logger.error("Failed to save %s, the reason is %s." % (result_file, io_error))
48
- else:
49
- logger.info("The advisor summary is saved in: %s" % result_file)
50
-
51
- def print_advisor_log(self):
52
- logger.info("The summary of the expert advice is as follows: ")
53
- message_list = [AdvisorConst.LINE + AdvisorConst.COLON + str(self.line),
54
- AdvisorConst.SUSPECT_NODES + AdvisorConst.COLON + self.suspect_node,
55
- AdvisorConst.ADVISOR_SUGGEST + AdvisorConst.COLON + self.advisor_message]
56
- for message in message_list:
57
- logger.info(message)
58
- return message_list
1
+ # Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import os
16
+ import time
17
+
18
+ from msprobe.core.advisor.advisor_const import AdvisorConst
19
+ from msprobe.core.common.log import logger
20
+ from msprobe.core.common.const import FileCheckConst
21
+ from msprobe.core.common.file_utils import change_mode, FileOpen
22
+
23
+
24
+ class AdvisorResult:
25
+ """
26
+ Class for generate advisor result
27
+ """
28
+
29
+ def __init__(self, node, line, message):
30
+ self.suspect_node = node
31
+ self.line = line
32
+ self.advisor_message = message
33
+
34
+ @staticmethod
35
+ def gen_summary_file(out_path, message_list, suffix):
36
+ file_name = 'advisor{}_{}.txt'.format(suffix, time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())))
37
+ result_file = os.path.join(out_path, file_name)
38
+ try:
39
+ with FileOpen(result_file, 'w+') as output_file:
40
+ output_file.truncate(0)
41
+ message_list = [message + AdvisorConst.NEW_LINE for message in message_list]
42
+ output_file.writelines(message_list)
43
+ change_mode(result_file, FileCheckConst.DATA_FILE_AUTHORITY)
44
+ except IOError as io_error:
45
+ logger.error("Failed to save %s, the reason is %s." % (result_file, io_error))
46
+ else:
47
+ logger.info("The advisor summary is saved in: %s" % result_file)
48
+
49
+ def print_advisor_log(self):
50
+ logger.info("The summary of the expert advice is as follows: ")
51
+ message_list = [
52
+ AdvisorConst.LINE + AdvisorConst.COLON + str(self.line),
53
+ AdvisorConst.SUSPECT_NODES + AdvisorConst.COLON + self.suspect_node,
54
+ AdvisorConst.ADVISOR_SUGGEST + AdvisorConst.COLON + self.advisor_message
55
+ ]
56
+ for message in message_list:
57
+ logger.info(message)
58
+ return message_list