mindstudio-probe 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/LICENSE +201 -201
  2. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/METADATA +36 -34
  3. mindstudio_probe-1.0.4.dist-info/RECORD +276 -0
  4. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/WHEEL +1 -1
  5. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/entry_points.txt +1 -0
  6. msprobe/README.md +101 -237
  7. msprobe/{config/config.json → config.json} +49 -49
  8. msprobe/core/advisor/advisor.py +124 -124
  9. msprobe/core/advisor/advisor_const.py +59 -59
  10. msprobe/core/advisor/advisor_result.py +58 -58
  11. msprobe/core/common/const.py +341 -318
  12. msprobe/core/common/exceptions.py +99 -99
  13. msprobe/core/common/{file_check.py → file_utils.py} +478 -283
  14. msprobe/core/common/log.py +76 -69
  15. msprobe/core/common/utils.py +385 -616
  16. msprobe/core/common_config.py +85 -71
  17. msprobe/core/compare/acc_compare.py +299 -298
  18. msprobe/core/compare/check.py +95 -95
  19. msprobe/core/compare/compare_cli.py +49 -49
  20. msprobe/core/compare/highlight.py +223 -222
  21. msprobe/core/compare/multiprocessing_compute.py +149 -149
  22. msprobe/core/compare/npy_compare.py +295 -295
  23. msprobe/core/compare/utils.py +430 -429
  24. msprobe/core/data_dump/data_collector.py +154 -144
  25. msprobe/core/data_dump/data_processor/base.py +314 -293
  26. msprobe/core/data_dump/data_processor/factory.py +59 -59
  27. msprobe/core/data_dump/data_processor/mindspore_processor.py +186 -198
  28. msprobe/core/data_dump/data_processor/pytorch_processor.py +366 -389
  29. msprobe/core/data_dump/json_writer.py +96 -116
  30. msprobe/core/data_dump/scope.py +178 -178
  31. msprobe/core/grad_probe/constant.py +70 -70
  32. msprobe/core/grad_probe/grad_compare.py +171 -175
  33. msprobe/core/grad_probe/utils.py +64 -52
  34. msprobe/docs/01.installation.md +89 -0
  35. msprobe/docs/02.config_introduction.md +165 -0
  36. msprobe/docs/03.config_examples.md +247 -0
  37. msprobe/docs/04.acl_config_examples.md +76 -0
  38. msprobe/docs/05.data_dump_PyTorch.md +198 -0
  39. msprobe/docs/06.data_dump_MindSpore.md +243 -0
  40. msprobe/docs/07.accuracy_checker_PyTorch.md +274 -0
  41. msprobe/docs/08.accuracy_checker_online_PyTorch.md +198 -0
  42. msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
  43. msprobe/docs/10.accuracy_compare_PyTorch.md +245 -0
  44. msprobe/docs/11.accuracy_compare_MindSpore.md +202 -0
  45. msprobe/docs/12.overflow_check_PyTorch.md +79 -0
  46. msprobe/docs/13.overflow_check_MindSpore.md +31 -0
  47. msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
  48. msprobe/docs/15.free_benchmarking_PyTorch.md +164 -0
  49. msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +207 -207
  50. msprobe/docs/FAQ_PyTorch.md +177 -0
  51. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
  52. msprobe/docs/img/free_benchmark_framework.png +0 -0
  53. msprobe/mindspore/__init__.py +1 -1
  54. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +254 -245
  55. msprobe/mindspore/api_accuracy_checker/api_info.py +69 -69
  56. msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
  57. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
  58. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
  59. msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
  60. msprobe/mindspore/api_accuracy_checker/main.py +8 -15
  61. msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
  62. msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
  63. msprobe/mindspore/cell_processor.py +34 -34
  64. msprobe/mindspore/common/const.py +106 -87
  65. msprobe/mindspore/common/log.py +37 -37
  66. msprobe/mindspore/common/utils.py +81 -57
  67. msprobe/mindspore/compare/distributed_compare.py +75 -75
  68. msprobe/mindspore/compare/ms_compare.py +219 -117
  69. msprobe/mindspore/compare/ms_graph_compare.py +348 -317
  70. msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
  71. msprobe/mindspore/debugger/debugger_config.py +66 -74
  72. msprobe/mindspore/debugger/precision_debugger.py +126 -107
  73. msprobe/mindspore/dump/dump_tool_factory.py +35 -35
  74. msprobe/mindspore/dump/hook_cell/api_registry.py +118 -104
  75. msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
  76. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +922 -925
  77. msprobe/mindspore/dump/hook_cell/wrap_api.py +113 -0
  78. msprobe/mindspore/dump/jit_dump.py +72 -56
  79. msprobe/mindspore/dump/kernel_graph_dump.py +59 -60
  80. msprobe/mindspore/dump/kernel_kbyk_dump.py +64 -65
  81. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -116
  82. msprobe/mindspore/free_benchmark/common/config.py +12 -12
  83. msprobe/mindspore/free_benchmark/common/handler_params.py +17 -17
  84. msprobe/mindspore/free_benchmark/common/utils.py +71 -71
  85. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
  86. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +43 -42
  87. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -107
  88. msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -90
  89. msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -41
  90. msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -36
  91. msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -21
  92. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -67
  93. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -21
  94. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -63
  95. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +51 -0
  96. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +35 -34
  97. msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -12
  98. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +29 -27
  99. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -33
  100. msprobe/mindspore/grad_probe/global_context.py +90 -91
  101. msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
  102. msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
  103. msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
  104. msprobe/mindspore/grad_probe/hook.py +94 -92
  105. msprobe/mindspore/grad_probe/utils.py +29 -28
  106. msprobe/mindspore/ms_config.py +128 -126
  107. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +44 -45
  108. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +34 -34
  109. msprobe/mindspore/runtime.py +4 -4
  110. msprobe/mindspore/service.py +378 -354
  111. msprobe/mindspore/task_handler_factory.py +24 -24
  112. msprobe/msprobe.py +105 -107
  113. msprobe/pytorch/__init__.py +3 -3
  114. msprobe/pytorch/api_accuracy_checker/common/config.py +53 -55
  115. msprobe/pytorch/api_accuracy_checker/common/utils.py +214 -165
  116. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +213 -213
  117. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +606 -581
  118. msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
  119. msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
  120. msprobe/pytorch/api_accuracy_checker/compare/compare.py +386 -381
  121. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +73 -73
  122. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +245 -244
  123. msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
  124. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +335 -332
  125. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +200 -199
  126. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +133 -134
  127. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +592 -581
  128. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +70 -74
  129. msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
  130. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +197 -202
  131. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +325 -324
  132. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -204
  133. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +219 -218
  134. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -10
  135. msprobe/pytorch/bench_functions/__init__.py +15 -15
  136. msprobe/pytorch/bench_functions/apply_adam_w.py +28 -28
  137. msprobe/pytorch/bench_functions/confusion_transpose.py +19 -19
  138. msprobe/pytorch/bench_functions/fast_gelu.py +55 -55
  139. msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -6
  140. msprobe/pytorch/bench_functions/linear.py +12 -12
  141. msprobe/pytorch/bench_functions/matmul_backward.py +48 -48
  142. msprobe/pytorch/bench_functions/npu_fusion_attention.py +509 -421
  143. msprobe/pytorch/bench_functions/rms_norm.py +15 -15
  144. msprobe/pytorch/bench_functions/rotary_mul.py +52 -52
  145. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -26
  146. msprobe/pytorch/bench_functions/swiglu.py +55 -55
  147. msprobe/pytorch/common/__init__.py +2 -2
  148. msprobe/pytorch/common/compare_script.template +14 -14
  149. msprobe/pytorch/common/log.py +20 -31
  150. msprobe/pytorch/common/parse_json.py +39 -39
  151. msprobe/pytorch/common/utils.py +305 -300
  152. msprobe/pytorch/compare/distributed_compare.py +66 -66
  153. msprobe/pytorch/compare/mapping.yaml +607 -607
  154. msprobe/pytorch/compare/match.py +34 -33
  155. msprobe/pytorch/compare/pt_compare.py +50 -40
  156. msprobe/pytorch/debugger/debugger_config.py +95 -95
  157. msprobe/pytorch/debugger/precision_debugger.py +125 -125
  158. msprobe/pytorch/free_benchmark/__init__.py +8 -8
  159. msprobe/pytorch/free_benchmark/common/constant.py +70 -70
  160. msprobe/pytorch/free_benchmark/common/counter.py +71 -71
  161. msprobe/pytorch/free_benchmark/common/enums.py +37 -37
  162. msprobe/pytorch/free_benchmark/common/params.py +129 -129
  163. msprobe/pytorch/free_benchmark/common/utils.py +102 -102
  164. msprobe/pytorch/free_benchmark/compare/grad_saver.py +179 -179
  165. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +104 -104
  166. msprobe/pytorch/free_benchmark/main.py +105 -105
  167. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +13 -13
  168. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +41 -41
  169. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +90 -90
  170. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +104 -104
  171. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +63 -63
  172. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +68 -68
  173. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +28 -28
  174. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +45 -45
  175. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +19 -19
  176. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +217 -217
  177. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +39 -39
  178. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +23 -23
  179. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +30 -30
  180. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +170 -170
  181. msprobe/pytorch/function_factory.py +76 -75
  182. msprobe/pytorch/functional/dump_module.py +39 -39
  183. msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
  184. msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
  185. msprobe/pytorch/hook_module/api_registry.py +161 -161
  186. msprobe/pytorch/hook_module/hook_module.py +120 -120
  187. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
  188. msprobe/pytorch/hook_module/utils.py +30 -29
  189. msprobe/pytorch/hook_module/wrap_aten.py +110 -110
  190. msprobe/pytorch/hook_module/wrap_distributed.py +78 -78
  191. msprobe/pytorch/hook_module/wrap_functional.py +105 -105
  192. msprobe/pytorch/hook_module/wrap_npu_custom.py +93 -84
  193. msprobe/pytorch/hook_module/wrap_tensor.py +71 -71
  194. msprobe/pytorch/hook_module/wrap_torch.py +86 -86
  195. msprobe/pytorch/hook_module/wrap_vf.py +62 -62
  196. msprobe/pytorch/module_processer.py +138 -138
  197. msprobe/pytorch/online_dispatch/__init__.py +20 -20
  198. msprobe/pytorch/online_dispatch/compare.py +236 -236
  199. msprobe/pytorch/online_dispatch/dispatch.py +271 -271
  200. msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
  201. msprobe/pytorch/online_dispatch/single_compare.py +391 -391
  202. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +49 -49
  203. msprobe/pytorch/online_dispatch/utils.py +130 -146
  204. msprobe/pytorch/parse.py +4 -4
  205. msprobe/pytorch/parse_tool/cli.py +32 -32
  206. msprobe/pytorch/parse_tool/lib/compare.py +260 -271
  207. msprobe/pytorch/parse_tool/lib/config.py +52 -52
  208. msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
  209. msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
  210. msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
  211. msprobe/pytorch/parse_tool/lib/parse_tool.py +158 -158
  212. msprobe/pytorch/parse_tool/lib/utils.py +316 -321
  213. msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
  214. msprobe/pytorch/pt_config.py +188 -187
  215. msprobe/pytorch/service.py +246 -252
  216. mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
  217. msprobe/config/README.md +0 -539
  218. msprobe/mindspore/doc/compare.md +0 -58
  219. msprobe/mindspore/doc/dump.md +0 -217
  220. msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
  221. msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
  222. msprobe/pytorch/doc/FAQ.md +0 -193
  223. msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
  224. msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
  225. msprobe/pytorch/doc/dump.md +0 -260
  226. msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
  227. msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
  228. msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
  229. msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
  230. msprobe/pytorch/doc/run_overflow_check.md +0 -25
  231. msprobe/pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md +0 -90
  232. msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
  233. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/top_level.txt +0 -0
  234. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
  235. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
  236. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
  237. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
  238. /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
  239. /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
  240. /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
  241. /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
  242. /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
  243. /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
  244. /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
  245. /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
  246. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
  247. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
  248. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
  249. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
  250. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
  251. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
  252. /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
  253. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
  254. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
  255. /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
  256. /msprobe/{config → docs}/img/free_benchmark.png +0 -0
  257. /msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
  258. /msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
  259. /msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
  260. /msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
  261. /msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
  262. /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
@@ -1,126 +1,128 @@
1
- import json
2
-
3
- from msprobe.core.common_config import CommonConfig, BaseConfig
4
- from msprobe.core.common.file_check import FileOpen
5
- from msprobe.core.common.const import Const
6
- from msprobe.mindspore.common.const import FreeBenchmarkConst
7
- from msprobe.mindspore.common.log import logger
8
- from msprobe.core.grad_probe.constant import level_adp
9
- from msprobe.core.grad_probe.utils import check_numeral_list_ascend
10
-
11
-
12
- class TensorConfig(BaseConfig):
13
- def __init__(self, json_config):
14
- super().__init__(json_config)
15
- self.check_mode = None
16
- self.file_format = json_config.get("file_format")
17
- self.check_config()
18
- self._check_config()
19
-
20
- def _check_config(self):
21
- if self.data_mode is not None and len(self.data_mode) > 0:
22
- if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
23
- raise Exception("data_mode must be all, input or output")
24
- if self.file_format and self.file_format not in ["npy", "bin"]:
25
- raise Exception("file_format is invalid")
26
-
27
-
28
- class StatisticsConfig(BaseConfig):
29
- def __init__(self, json_config):
30
- super().__init__(json_config)
31
- self.file_format = None
32
- self.check_mode = None
33
- self.check_config()
34
- self._check_config()
35
-
36
- def _check_config(self):
37
- if self.data_mode is not None and len(self.data_mode) > 0:
38
- if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
39
- raise Exception("data_mode must be all, input or output")
40
- if self.summary_mode and self.summary_mode not in ["statistics", "md5"]:
41
- raise Exception("summary_mode is invalid")
42
-
43
-
44
- class OverflowCheckConfig(BaseConfig):
45
- def __init__(self, json_config):
46
- super().__init__(json_config)
47
- self.data_mode = ["all"]
48
- self._check_config()
49
-
50
- def _check_config(self):
51
- if self.overflow_nums is not None and not isinstance(self.overflow_nums, int):
52
- raise Exception("overflow_nums is invalid, it should be an integer")
53
- if self.overflow_nums is not None and self.overflow_nums != -1 and self.overflow_nums <= 0:
54
- raise Exception("overflow_nums should be -1 or positive integer")
55
- if self.check_mode and self.check_mode not in ["all", "aicore", "atomic"]:
56
- raise Exception("check_mode is invalid")
57
-
58
-
59
- class FreeBenchmarkConfig(BaseConfig):
60
- def __init__(self, task_config):
61
- super().__init__(task_config)
62
- self._check_config()
63
-
64
- def _check_config(self):
65
- if self.fuzz_device and self.fuzz_device not in FreeBenchmarkConst.DEVICE_LIST:
66
- raise Exception("fuzz_device must be npu or empty")
67
- if self.pert_mode and self.pert_mode not in FreeBenchmarkConst.PERT_TYPE_LIST:
68
- raise Exception("pert_mode must be improve_precision, add_noise, bit_noise, no_change or empty")
69
- if self.handler_type and self.handler_type not in FreeBenchmarkConst.HANDLER_TYPE_LIST:
70
- raise Exception("handler_type must be check, fix or empty")
71
- if self.fuzz_level and self.fuzz_level not in FreeBenchmarkConst.DUMP_LEVEL_LIST:
72
- raise Exception("fuzz_level must be L1 or empty")
73
- if self.fuzz_stage and self.fuzz_stage not in FreeBenchmarkConst.STAGE_LIST:
74
- raise Exception("fuzz_stage must be forward or empty")
75
- if self.if_preheat or self.preheat_step or self.max_sample:
76
- logger.warning("'if_preheat', 'preheat_step' and 'max_sample' settings "
77
- "are not supported for mindspore free benchmark task.")
78
-
79
-
80
- class GradProbeConfig(BaseConfig):
81
- def __init__(self, json_config):
82
- super().__init__(json_config)
83
- self.grad_level = json_config.get("grad_level", "L1")
84
- self.param_list = json_config.get("param_list", [])
85
- self.bounds = json_config.get("bounds", [])
86
-
87
- def _check_config(self):
88
- if self.grad_level not in level_adp.keys():
89
- raise Exception(f"grad_level must be one of {level_adp.keys()}")
90
- if not isinstance(self.param_list, list):
91
- raise Exception(f"param_list must be a list")
92
- check_numeral_list_ascend(self.bounds)
93
-
94
-
95
- TaskDict = {
96
- Const.TENSOR: TensorConfig,
97
- Const.STATISTICS: StatisticsConfig,
98
- Const.OVERFLOW_CHECK: OverflowCheckConfig,
99
- Const.FREE_BENCHMARK: FreeBenchmarkConfig,
100
- Const.GRAD_PROBE: GradProbeConfig,
101
- }
102
-
103
-
104
- def parse_common_config(json_config):
105
- return CommonConfig(json_config)
106
-
107
-
108
- def parse_task_config(task, json_config):
109
- task_map = json_config.get(task)
110
- if not task_map:
111
- task_map = dict()
112
- if task not in TaskDict:
113
- raise Exception("task is invalid.")
114
- return TaskDict.get(task)(task_map)
115
-
116
-
117
- def parse_json_config(json_file_path):
118
- if not json_file_path:
119
- raise Exception("json file path is None")
120
- with FileOpen(json_file_path, 'r') as file:
121
- json_config = json.load(file)
122
- common_config = parse_common_config(json_config)
123
- if not common_config.task:
124
- common_config.task = Const.STATISTICS
125
- task_config = parse_task_config(common_config.task, json_config)
126
- return common_config, task_config
1
+ import json
2
+
3
+ from msprobe.core.common_config import CommonConfig, BaseConfig
4
+ from msprobe.core.common.file_utils import FileOpen
5
+ from msprobe.core.common.const import Const
6
+ from msprobe.mindspore.common.const import FreeBenchmarkConst
7
+ from msprobe.mindspore.common.log import logger
8
+ from msprobe.core.grad_probe.constant import level_adp
9
+ from msprobe.core.grad_probe.utils import check_numeral_list_ascend
10
+
11
+
12
+ class TensorConfig(BaseConfig):
13
+ def __init__(self, json_config):
14
+ super().__init__(json_config)
15
+ self.check_mode = None
16
+ self.file_format = json_config.get("file_format")
17
+ self.check_config()
18
+ self._check_config()
19
+
20
+ def _check_config(self):
21
+ if self.data_mode is not None and len(self.data_mode) > 0:
22
+ if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
23
+ raise Exception("data_mode must be all, input or output")
24
+ if self.file_format and self.file_format not in ["npy", "bin"]:
25
+ raise Exception("file_format is invalid")
26
+
27
+
28
+ class StatisticsConfig(BaseConfig):
29
+ def __init__(self, json_config):
30
+ super().__init__(json_config)
31
+ self.file_format = None
32
+ self.check_mode = None
33
+ self.check_config()
34
+ self._check_config()
35
+
36
+ def _check_config(self):
37
+ if self.data_mode is not None and len(self.data_mode) > 0:
38
+ if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
39
+ raise Exception("data_mode must be all, input or output")
40
+ if self.summary_mode and self.summary_mode not in ["statistics", "md5"]:
41
+ raise Exception("summary_mode is invalid")
42
+
43
+
44
+ class OverflowCheckConfig(BaseConfig):
45
+ def __init__(self, json_config):
46
+ super().__init__(json_config)
47
+ self.data_mode = ["all"]
48
+ self._check_config()
49
+
50
+ def _check_config(self):
51
+ if self.overflow_nums is not None and not isinstance(self.overflow_nums, int):
52
+ raise Exception("overflow_nums is invalid, it should be an integer")
53
+ if self.overflow_nums is not None and self.overflow_nums != -1 and self.overflow_nums <= 0:
54
+ raise Exception("overflow_nums should be -1 or positive integer")
55
+ if self.check_mode and self.check_mode not in ["all", "aicore", "atomic"]:
56
+ raise Exception("check_mode is invalid")
57
+
58
+
59
+ class FreeBenchmarkConfig(BaseConfig):
60
+ def __init__(self, task_config):
61
+ super().__init__(task_config)
62
+ self._check_config()
63
+
64
+ def _check_config(self):
65
+ if self.fuzz_device and self.fuzz_device not in FreeBenchmarkConst.DEVICE_LIST:
66
+ raise Exception("fuzz_device must be npu or empty")
67
+ if self.pert_mode and self.pert_mode not in FreeBenchmarkConst.PERT_TYPE_LIST:
68
+ raise Exception("pert_mode must be improve_precision, add_noise, bit_noise, "
69
+ "no_change, change_value or empty")
70
+ if self.handler_type and self.handler_type not in FreeBenchmarkConst.HANDLER_TYPE_LIST:
71
+ raise Exception("handler_type must be check, fix or empty")
72
+ if self.fuzz_level and self.fuzz_level not in FreeBenchmarkConst.DUMP_LEVEL_LIST:
73
+ raise Exception("fuzz_level must be L1 or empty")
74
+ if self.fuzz_stage and self.fuzz_stage not in FreeBenchmarkConst.STAGE_LIST:
75
+ raise Exception("fuzz_stage must be forward or empty")
76
+ if self.if_preheat or self.preheat_step or self.max_sample:
77
+ logger.warning("'if_preheat', 'preheat_step' and 'max_sample' settings "
78
+ "are not supported for mindspore free benchmark task.")
79
+
80
+
81
+ class GradProbeConfig(BaseConfig):
82
+ def __init__(self, json_config):
83
+ super().__init__(json_config)
84
+ self.grad_level = json_config.get("grad_level", "L1")
85
+ self.param_list = json_config.get("param_list", [])
86
+ self.bounds = json_config.get("bounds", [-1, 0, 1])
87
+ self._check_config()
88
+
89
+ def _check_config(self):
90
+ if self.grad_level not in level_adp.keys():
91
+ raise Exception(f"grad_level must be one of {level_adp.keys()}")
92
+ if not isinstance(self.param_list, list):
93
+ raise Exception("param_list must be a list")
94
+ check_numeral_list_ascend(self.bounds)
95
+
96
+
97
+ TaskDict = {
98
+ Const.TENSOR: TensorConfig,
99
+ Const.STATISTICS: StatisticsConfig,
100
+ Const.OVERFLOW_CHECK: OverflowCheckConfig,
101
+ Const.FREE_BENCHMARK: FreeBenchmarkConfig,
102
+ Const.GRAD_PROBE: GradProbeConfig
103
+ }
104
+
105
+
106
+ def parse_common_config(json_config):
107
+ return CommonConfig(json_config)
108
+
109
+
110
+ def parse_task_config(task, json_config):
111
+ task_map = json_config.get(task)
112
+ if not task_map:
113
+ task_map = dict()
114
+ if task not in TaskDict:
115
+ raise Exception("task is invalid.")
116
+ return TaskDict.get(task)(task_map)
117
+
118
+
119
+ def parse_json_config(json_file_path):
120
+ if not json_file_path:
121
+ raise Exception("json file path is None")
122
+ with FileOpen(json_file_path, 'r') as file:
123
+ json_config = json.load(file)
124
+ common_config = parse_common_config(json_config)
125
+ if not common_config.task:
126
+ common_config.task = Const.STATISTICS
127
+ task_config = parse_task_config(common_config.task, json_config)
128
+ return common_config, task_config
@@ -1,45 +1,44 @@
1
- import os
2
- import json
3
- from msprobe.core.common.utils import make_dump_path_if_not_exists
4
- from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
5
- from msprobe.core.common.log import logger
6
- from msprobe.core.common.file_check import FileOpen
7
-
8
-
9
- class KernelGraphOverflowCheck:
10
- def __init__(self, config: DebuggerConfig):
11
- self.dump_json = dict()
12
- self.dump_json["common_dump_settings"] = dict()
13
- self.dump_json["common_dump_settings"]["dump_mode"] = 0
14
- self.dump_json["common_dump_settings"]["path"] = ""
15
- self.dump_json["common_dump_settings"]["net_name"] = "Net"
16
- self.dump_json["common_dump_settings"]["iteration"] = "all"
17
- self.dump_json["common_dump_settings"]["saved_data"] = "full"
18
- self.dump_json["common_dump_settings"]["input_output"] = 0
19
- self.dump_json["common_dump_settings"]["kernels"] = []
20
- self.dump_json["common_dump_settings"]["support_device"] = [0,1,2,3,4,5,6,7]
21
- self.dump_json["common_dump_settings"]["op_debug_mode"] = 3
22
- self.dump_json["common_dump_settings"]["file_format"] = "npy"
23
-
24
- self.dump_json["common_dump_settings"]["path"] = config.dump_path
25
- if len(config.step) > 0:
26
- logger.warning("Step would change to all in this task.")
27
- if len(config.rank) > 0:
28
- self.dump_json["common_dump_settings"]["support_device"] = config.rank
29
- if config.check_mode == "aicore":
30
- self.dump_json["common_dump_settings"]["op_debug_mode"] = 1
31
- elif config.check_mode == "atomic":
32
- self.dump_json["common_dump_settings"]["op_debug_mode"] = 2
33
-
34
- def handle(self):
35
- if os.getenv("GRAPH_OP_RUN") == "1":
36
- raise Exception("Must run in graph mode, not kbk mode")
37
- json_path = self.dump_json["common_dump_settings"]["path"]
38
- make_dump_path_if_not_exists(json_path)
39
- json_path = os.path.join(json_path, "kernel_graph_overflow_check.json")
40
- with FileOpen(json_path, 'w') as f:
41
- json.dump(self.dump_json, f)
42
- logger.info(json_path + " has been created.")
43
- os.environ["MINDSPORE_DUMP_CONFIG"] = json_path
44
- if "MS_ACL_DUMP_CFG_PATH" in os.environ:
45
- del os.environ["MS_ACL_DUMP_CFG_PATH"]
1
+ import os
2
+ import json
3
+ from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
4
+ from msprobe.mindspore.common.log import logger
5
+ from msprobe.core.common.file_utils import FileOpen, create_directory
6
+
7
+
8
+ class KernelGraphOverflowCheck:
9
+ def __init__(self, config: DebuggerConfig):
10
+ self.dump_json = dict()
11
+ self.dump_json["common_dump_settings"] = dict()
12
+ self.dump_json["common_dump_settings"]["dump_mode"] = 0
13
+ self.dump_json["common_dump_settings"]["path"] = ""
14
+ self.dump_json["common_dump_settings"]["net_name"] = "Net"
15
+ self.dump_json["common_dump_settings"]["iteration"] = "all"
16
+ self.dump_json["common_dump_settings"]["saved_data"] = "full"
17
+ self.dump_json["common_dump_settings"]["input_output"] = 0
18
+ self.dump_json["common_dump_settings"]["kernels"] = []
19
+ self.dump_json["common_dump_settings"]["support_device"] = [0,1,2,3,4,5,6,7]
20
+ self.dump_json["common_dump_settings"]["op_debug_mode"] = 3
21
+ self.dump_json["common_dump_settings"]["file_format"] = "npy"
22
+
23
+ self.dump_json["common_dump_settings"]["path"] = config.dump_path
24
+ if len(config.step) > 0:
25
+ logger.warning("Step would change to all in this task.")
26
+ if len(config.rank) > 0:
27
+ self.dump_json["common_dump_settings"]["support_device"] = config.rank
28
+ if config.check_mode == "aicore":
29
+ self.dump_json["common_dump_settings"]["op_debug_mode"] = 1
30
+ elif config.check_mode == "atomic":
31
+ self.dump_json["common_dump_settings"]["op_debug_mode"] = 2
32
+
33
+ def handle(self):
34
+ if os.getenv("GRAPH_OP_RUN") == "1":
35
+ raise Exception("Must run in graph mode, not kbk mode")
36
+ json_path = self.dump_json["common_dump_settings"]["path"]
37
+ create_directory(json_path)
38
+ json_path = os.path.join(json_path, "kernel_graph_overflow_check.json")
39
+ with FileOpen(json_path, 'w') as f:
40
+ json.dump(self.dump_json, f)
41
+ logger.info(json_path + " has been created.")
42
+ os.environ["MINDSPORE_DUMP_CONFIG"] = json_path
43
+ if "MS_ACL_DUMP_CFG_PATH" in os.environ:
44
+ del os.environ["MS_ACL_DUMP_CFG_PATH"]
@@ -1,34 +1,34 @@
1
- from msprobe.mindspore.common.const import Const
2
- from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
3
- from msprobe.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck
4
-
5
-
6
- class OverflowCheckToolFactory:
7
- tools = {
8
- Const.CELL: {
9
- Const.GRAPH_KBYK_MODE: None,
10
- Const.GRAPH_GE_MODE: None,
11
- Const.PYNATIVE_MODE: None
12
- },
13
- Const.API: {
14
- Const.GRAPH_KBYK_MODE: None,
15
- Const.GRAPH_GE_MODE: None,
16
- Const.PYNATIVE_MODE: None
17
- },
18
- Const.KERNEL: {
19
- Const.GRAPH_KBYK_MODE: None,
20
- Const.GRAPH_GE_MODE: KernelGraphOverflowCheck,
21
- Const.PYNATIVE_MODE: None
22
- }
23
- }
24
-
25
- @staticmethod
26
- def create(config: DebuggerConfig):
27
- tool = OverflowCheckToolFactory.tools.get(config.level)
28
- if not tool:
29
- raise Exception("Valid level is needed.")
30
- tool = tool.get(config.execution_mode)
31
- if not tool:
32
- raise Exception(f"Overflow check is not supported in {config.execution_mode} mode "
33
- f"when level is {config.level}.")
34
- return tool(config)
1
+ from msprobe.mindspore.common.const import Const
2
+ from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
3
+ from msprobe.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck
4
+
5
+
6
+ class OverflowCheckToolFactory:
7
+ tools = {
8
+ Const.CELL: {
9
+ Const.GRAPH_KBYK_MODE: None,
10
+ Const.GRAPH_GE_MODE: None,
11
+ Const.PYNATIVE_MODE: None
12
+ },
13
+ Const.API: {
14
+ Const.GRAPH_KBYK_MODE: None,
15
+ Const.GRAPH_GE_MODE: None,
16
+ Const.PYNATIVE_MODE: None
17
+ },
18
+ Const.KERNEL: {
19
+ Const.GRAPH_KBYK_MODE: None,
20
+ Const.GRAPH_GE_MODE: KernelGraphOverflowCheck,
21
+ Const.PYNATIVE_MODE: None
22
+ }
23
+ }
24
+
25
+ @staticmethod
26
+ def create(config: DebuggerConfig):
27
+ tool = OverflowCheckToolFactory.tools.get(config.level)
28
+ if not tool:
29
+ raise Exception("Valid level is needed.")
30
+ tool = tool.get(config.execution_mode)
31
+ if not tool:
32
+ raise Exception(f"Overflow check is not supported in {config.execution_mode} mode "
33
+ f"when level is {config.level}.")
34
+ return tool(config)
@@ -1,4 +1,4 @@
1
- class Runtime:
2
- step_count: int = 0
3
- rank_id: int = -1
4
- is_running: bool = False
1
+ class Runtime:
2
+ step_count: int = 0
3
+ rank_id: int = -1
4
+ is_running: bool = False