mindstudio-probe 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/METADATA +7 -6
  2. mindstudio_probe-1.2.1.dist-info/RECORD +396 -0
  3. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/WHEEL +1 -1
  4. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/entry_points.txt +0 -1
  5. msprobe/CMakeLists.txt +5 -0
  6. msprobe/README.md +51 -20
  7. msprobe/config.json +2 -3
  8. msprobe/core/advisor/advisor.py +8 -3
  9. msprobe/core/common/const.py +264 -15
  10. msprobe/core/common/exceptions.py +27 -3
  11. msprobe/core/common/file_utils.py +176 -26
  12. msprobe/core/common/inplace_op_checker.py +15 -0
  13. msprobe/core/common/inplace_ops.yaml +3 -0
  14. msprobe/core/common/log.py +27 -9
  15. msprobe/core/common/utils.py +204 -77
  16. msprobe/core/common_config.py +49 -14
  17. msprobe/core/compare/acc_compare.py +274 -198
  18. msprobe/core/compare/check.py +32 -33
  19. msprobe/core/compare/compare_cli.py +32 -14
  20. msprobe/core/compare/highlight.py +283 -127
  21. msprobe/core/compare/layer_mapping/__init__.py +19 -0
  22. msprobe/core/compare/layer_mapping/data_scope_parser.py +246 -0
  23. msprobe/core/compare/layer_mapping/layer_mapping.py +249 -0
  24. msprobe/core/compare/layer_mapping/postprocess_pass.py +95 -0
  25. msprobe/core/compare/merge_result/merge_result.py +380 -0
  26. msprobe/core/compare/merge_result/merge_result_cli.py +31 -0
  27. msprobe/core/compare/multiprocessing_compute.py +2 -2
  28. msprobe/core/compare/npy_compare.py +135 -144
  29. msprobe/core/compare/utils.py +419 -274
  30. msprobe/core/data_dump/data_collector.py +60 -28
  31. msprobe/core/data_dump/data_processor/base.py +84 -36
  32. msprobe/core/data_dump/data_processor/factory.py +5 -3
  33. msprobe/core/data_dump/data_processor/mindspore_processor.py +152 -18
  34. msprobe/core/data_dump/data_processor/pytorch_processor.py +267 -110
  35. msprobe/core/data_dump/json_writer.py +29 -1
  36. msprobe/core/data_dump/scope.py +119 -39
  37. msprobe/core/grad_probe/constant.py +27 -13
  38. msprobe/core/grad_probe/grad_compare.py +18 -1
  39. msprobe/core/grad_probe/utils.py +30 -2
  40. msprobe/core/overflow_check/abnormal_scene.py +189 -0
  41. msprobe/core/overflow_check/api_info.py +55 -0
  42. msprobe/core/overflow_check/checker.py +138 -0
  43. msprobe/core/overflow_check/filter.py +157 -0
  44. msprobe/core/overflow_check/ignore_rules.yaml +55 -0
  45. msprobe/core/overflow_check/level.py +22 -0
  46. msprobe/core/overflow_check/utils.py +28 -0
  47. msprobe/docs/01.installation.md +96 -7
  48. msprobe/docs/02.config_introduction.md +50 -23
  49. msprobe/docs/03.config_examples.md +2 -9
  50. msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
  51. msprobe/docs/05.data_dump_PyTorch.md +93 -61
  52. msprobe/docs/06.data_dump_MindSpore.md +200 -95
  53. msprobe/docs/07.accuracy_checker_PyTorch.md +28 -28
  54. msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
  55. msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
  56. msprobe/docs/10.accuracy_compare_PyTorch.md +114 -50
  57. msprobe/docs/11.accuracy_compare_MindSpore.md +340 -48
  58. msprobe/docs/12.overflow_check_PyTorch.md +2 -2
  59. msprobe/docs/13.overflow_check_MindSpore.md +6 -6
  60. msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
  61. msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
  62. msprobe/docs/17.grad_probe.md +5 -6
  63. msprobe/docs/19.monitor.md +561 -0
  64. msprobe/docs/20.monitor_performance_baseline.md +52 -0
  65. msprobe/docs/21.visualization_PyTorch.md +466 -0
  66. msprobe/docs/22.visualization_MindSpore.md +481 -0
  67. msprobe/docs/23.generate_operator_PyTorch.md +107 -0
  68. msprobe/docs/24.code_mapping_Mindspore.md +28 -0
  69. msprobe/docs/25.tool_function_introduction.md +29 -0
  70. msprobe/docs/26.data_dump_PyTorch_baseline.md +37 -0
  71. msprobe/docs/27.dump_json_instruction.md +521 -0
  72. msprobe/docs/FAQ.md +29 -2
  73. msprobe/docs/accuracy_checker_MindSpore/accuracy_checker_MindSpore_baseline.md +14 -0
  74. msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +22 -0
  75. msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +211 -0
  76. msprobe/docs/img/compare_result.png +0 -0
  77. msprobe/docs/img/merge_result.png +0 -0
  78. msprobe/docs/img/monitor/cpu_info.png +0 -0
  79. msprobe/docs/img/visualization/fuzzy_match_ms.png +0 -0
  80. msprobe/docs/img/visualization/fuzzy_match_pt.png +0 -0
  81. msprobe/docs/img/visualization/tensorboard_1.png +0 -0
  82. msprobe/docs/img/visualization/tensorboard_2.png +0 -0
  83. msprobe/docs/img/visualization/vis_browser_1.png +0 -0
  84. msprobe/docs/img/visualization/vis_browser_2.png +0 -0
  85. msprobe/docs/img/visualization/vis_precision_info.png +0 -0
  86. msprobe/docs/img/visualization/vis_search_info.png +0 -0
  87. msprobe/docs/img/visualization/vis_show_info.png +0 -0
  88. msprobe/docs/img/visualization/vis_showcase.png +0 -0
  89. msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
  90. msprobe/docs/visualization/GPTModel.png +0 -0
  91. msprobe/docs/visualization/ParallelMLP.png +0 -0
  92. msprobe/docs/visualization/layer_mapping_example.md +132 -0
  93. msprobe/docs/visualization/mapping.png +0 -0
  94. msprobe/docs/visualization/mapping1.png +0 -0
  95. msprobe/docs/visualization/module_name.png +0 -0
  96. msprobe/docs/visualization/module_name1.png +0 -0
  97. msprobe/docs/visualization/no_mapping.png +0 -0
  98. msprobe/docs/visualization/no_mapping1.png +0 -0
  99. msprobe/docs/visualization/no_mapping_analyze.png +0 -0
  100. msprobe/docs/visualization/top_layer.png +0 -0
  101. msprobe/mindspore/__init__.py +25 -0
  102. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -151
  103. msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
  104. msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
  105. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
  106. msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
  107. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +64 -1
  108. msprobe/mindspore/api_accuracy_checker/compute_element.py +64 -31
  109. msprobe/mindspore/api_accuracy_checker/data_manager.py +301 -0
  110. msprobe/mindspore/api_accuracy_checker/main.py +28 -3
  111. msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +212 -0
  112. msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +60 -0
  113. msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
  114. msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
  115. msprobe/mindspore/cell_processor.py +33 -12
  116. msprobe/mindspore/code_mapping/bind.py +264 -0
  117. msprobe/mindspore/code_mapping/cmd_parser.py +40 -0
  118. msprobe/mindspore/code_mapping/graph.py +49 -0
  119. msprobe/mindspore/code_mapping/graph_parser.py +226 -0
  120. msprobe/mindspore/code_mapping/main.py +24 -0
  121. msprobe/mindspore/code_mapping/processor.py +34 -0
  122. msprobe/mindspore/common/const.py +35 -13
  123. msprobe/mindspore/common/log.py +5 -9
  124. msprobe/mindspore/common/utils.py +88 -4
  125. msprobe/mindspore/compare/distributed_compare.py +22 -24
  126. msprobe/mindspore/compare/ms_compare.py +333 -268
  127. msprobe/mindspore/compare/ms_graph_compare.py +95 -52
  128. msprobe/mindspore/debugger/debugger_config.py +7 -1
  129. msprobe/mindspore/debugger/precision_debugger.py +87 -12
  130. msprobe/mindspore/dump/dump_tool_factory.py +3 -1
  131. msprobe/mindspore/dump/hook_cell/api_registry.py +95 -18
  132. msprobe/mindspore/dump/hook_cell/hook_cell.py +60 -38
  133. msprobe/mindspore/dump/hook_cell/primitive_hooks.py +45 -30
  134. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +36 -1
  135. msprobe/mindspore/dump/hook_cell/wrap_api.py +92 -1
  136. msprobe/mindspore/dump/jit_dump.py +17 -5
  137. msprobe/mindspore/dump/kernel_dump/kernel_config.py +33 -0
  138. msprobe/mindspore/dump/kernel_graph_dump.py +9 -4
  139. msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
  140. msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
  141. msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
  142. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +156 -41
  143. msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
  144. msprobe/mindspore/free_benchmark/common/utils.py +19 -4
  145. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
  146. msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
  147. msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
  148. msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
  149. msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
  150. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
  151. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
  152. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +2 -2
  153. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
  154. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
  155. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
  156. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
  157. msprobe/mindspore/grad_probe/global_context.py +28 -8
  158. msprobe/mindspore/grad_probe/grad_analyzer.py +50 -24
  159. msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
  160. msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
  161. msprobe/mindspore/grad_probe/hook.py +35 -12
  162. msprobe/mindspore/grad_probe/utils.py +18 -5
  163. msprobe/mindspore/mindtorch/__init__.py +18 -0
  164. msprobe/mindspore/mindtorch/mindtorch_adaptor.py +255 -0
  165. msprobe/mindspore/ms_config.py +27 -16
  166. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +9 -4
  167. msprobe/mindspore/runtime.py +15 -0
  168. msprobe/mindspore/service.py +285 -113
  169. msprobe/mindspore/task_handler_factory.py +15 -0
  170. msprobe/msprobe.py +48 -10
  171. msprobe/pytorch/__init__.py +8 -6
  172. msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
  173. msprobe/pytorch/api_accuracy_checker/common/utils.py +31 -16
  174. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +41 -8
  175. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +103 -271
  176. msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +4 -1
  177. msprobe/pytorch/api_accuracy_checker/compare/compare.py +69 -68
  178. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +54 -0
  179. msprobe/pytorch/api_accuracy_checker/compare/compare_input.py +51 -0
  180. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +2 -4
  181. msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
  182. msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +478 -0
  183. msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
  184. msprobe/pytorch/api_accuracy_checker/precision_standard/absolute_threshold.py +106 -0
  185. msprobe/pytorch/api_accuracy_checker/precision_standard/accumulative_error_compare.py +107 -0
  186. msprobe/pytorch/api_accuracy_checker/precision_standard/base_standard.py +151 -0
  187. msprobe/pytorch/api_accuracy_checker/precision_standard/benchmark_compare.py +226 -0
  188. msprobe/pytorch/api_accuracy_checker/precision_standard/binary_consistency.py +68 -0
  189. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_config.py +218 -0
  190. msprobe/pytorch/api_accuracy_checker/precision_standard/standard_register.py +104 -0
  191. msprobe/pytorch/api_accuracy_checker/precision_standard/thousandth_standard.py +63 -0
  192. msprobe/pytorch/api_accuracy_checker/precision_standard/ulp_compare.py +200 -0
  193. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +63 -2
  194. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +21 -15
  195. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +54 -22
  196. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +140 -71
  197. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +49 -8
  198. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
  199. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
  200. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +5 -3
  201. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
  202. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
  203. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
  204. msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
  205. msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
  206. msprobe/pytorch/bench_functions/npu_fusion_attention.py +142 -16
  207. msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
  208. msprobe/pytorch/bench_functions/swiglu.py +10 -2
  209. msprobe/pytorch/common/parse_json.py +7 -6
  210. msprobe/pytorch/common/utils.py +101 -7
  211. msprobe/pytorch/compare/distributed_compare.py +17 -30
  212. msprobe/pytorch/compare/pt_compare.py +44 -22
  213. msprobe/pytorch/debugger/debugger_config.py +46 -27
  214. msprobe/pytorch/debugger/precision_debugger.py +42 -12
  215. msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
  216. msprobe/pytorch/dump/module_dump/module_dump.py +86 -0
  217. msprobe/pytorch/{module_processer.py → dump/module_dump/module_processer.py} +81 -10
  218. msprobe/pytorch/free_benchmark/common/constant.py +15 -0
  219. msprobe/pytorch/free_benchmark/common/counter.py +15 -0
  220. msprobe/pytorch/free_benchmark/common/enums.py +15 -0
  221. msprobe/pytorch/free_benchmark/common/params.py +10 -2
  222. msprobe/pytorch/free_benchmark/common/utils.py +29 -4
  223. msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -5
  224. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
  225. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
  226. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
  227. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
  228. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
  229. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +41 -47
  230. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
  231. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +0 -4
  232. msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
  233. msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
  234. msprobe/pytorch/hook_module/__init__.py +1 -1
  235. msprobe/pytorch/hook_module/hook_module.py +14 -11
  236. msprobe/pytorch/hook_module/register_optimizer_hook.py +59 -0
  237. msprobe/pytorch/hook_module/support_wrap_ops.yaml +35 -0
  238. msprobe/pytorch/hook_module/wrap_distributed.py +6 -8
  239. msprobe/pytorch/hook_module/wrap_functional.py +0 -38
  240. msprobe/pytorch/monitor/__init__.py +0 -0
  241. msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
  242. msprobe/pytorch/monitor/anomaly_detect.py +425 -0
  243. msprobe/pytorch/monitor/csv2tb.py +166 -0
  244. msprobe/pytorch/monitor/distributed/__init__.py +0 -0
  245. msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
  246. msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
  247. msprobe/pytorch/monitor/distributed/wrap_distributed.py +283 -0
  248. msprobe/pytorch/monitor/features.py +108 -0
  249. msprobe/pytorch/monitor/module_hook.py +1076 -0
  250. msprobe/pytorch/monitor/module_metric.py +172 -0
  251. msprobe/pytorch/monitor/module_spec_verifier.py +95 -0
  252. msprobe/pytorch/monitor/optimizer_collect.py +333 -0
  253. msprobe/pytorch/monitor/unittest/__init__.py +0 -0
  254. msprobe/pytorch/monitor/unittest/test_monitor.py +160 -0
  255. msprobe/pytorch/monitor/utils.py +321 -0
  256. msprobe/pytorch/monitor/visualizer.py +59 -0
  257. msprobe/pytorch/online_dispatch/__init__.py +2 -3
  258. msprobe/pytorch/online_dispatch/compare.py +29 -38
  259. msprobe/pytorch/online_dispatch/dispatch.py +58 -27
  260. msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
  261. msprobe/pytorch/online_dispatch/single_compare.py +53 -32
  262. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
  263. msprobe/pytorch/online_dispatch/utils.py +49 -21
  264. msprobe/pytorch/parse_tool/lib/compare.py +21 -27
  265. msprobe/pytorch/parse_tool/lib/config.py +6 -8
  266. msprobe/pytorch/parse_tool/lib/file_desc.py +15 -1
  267. msprobe/pytorch/parse_tool/lib/interactive_cli.py +10 -10
  268. msprobe/pytorch/parse_tool/lib/parse_exception.py +7 -7
  269. msprobe/pytorch/parse_tool/lib/parse_tool.py +12 -12
  270. msprobe/pytorch/parse_tool/lib/utils.py +33 -53
  271. msprobe/pytorch/parse_tool/lib/visualization.py +11 -10
  272. msprobe/pytorch/pt_config.py +31 -8
  273. msprobe/pytorch/service.py +188 -108
  274. msprobe/visualization/__init__.py +14 -0
  275. msprobe/visualization/builder/__init__.py +14 -0
  276. msprobe/visualization/builder/graph_builder.py +222 -0
  277. msprobe/visualization/builder/msprobe_adapter.py +227 -0
  278. msprobe/visualization/compare/__init__.py +14 -0
  279. msprobe/visualization/compare/graph_comparator.py +180 -0
  280. msprobe/visualization/compare/mode_adapter.py +197 -0
  281. msprobe/visualization/graph/__init__.py +14 -0
  282. msprobe/visualization/graph/base_node.py +119 -0
  283. msprobe/visualization/graph/distributed_analyzer.py +318 -0
  284. msprobe/visualization/graph/graph.py +209 -0
  285. msprobe/visualization/graph/node_colors.py +95 -0
  286. msprobe/visualization/graph/node_op.py +39 -0
  287. msprobe/visualization/graph_service.py +288 -0
  288. msprobe/visualization/utils.py +217 -0
  289. mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
  290. msprobe/docs/04.acl_config_examples.md +0 -78
  291. msprobe/mindspore/compare/layer_mapping.py +0 -146
  292. msprobe/mindspore/compare/modify_mapping.py +0 -107
  293. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
  294. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
  295. msprobe/pytorch/functional/module_dump.py +0 -84
  296. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/LICENSE +0 -0
  297. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.2.1.dist-info}/top_level.txt +0 -0
  298. /msprobe/mindspore/{free_benchmark/decorator → code_mapping}/__init__.py +0 -0
  299. /msprobe/pytorch/{functional → dump/module_dump}/__init__.py +0 -0
@@ -0,0 +1,22 @@
1
+ # MindSpore 场景的精度数据采集基线
2
+
3
+ ## "tensor"模式采集数据量参考基线
4
+
5
+ 该基线为MindSpore框架下,使用"tensor"模式采集数据量参考基线。本基线测试了38B语言大模型在不同采集模式下,不同global_batch_size下,单卡和8卡下,数据量的变化。
6
+
7
+ ### 38B语言大模型
8
+
9
+ <table>
10
+ <tr><th>采集模式</th><th>global_batch_size</th><th>单卡</th><th>8卡</th></tr>
11
+ </td><td rowspan="3">L0</td><td>1</td><td>262GB</td><td>2.1T</td></tr>
12
+ <tr><td>2</td><td>480GB</td><td>3.8T</td></tr>
13
+ <tr><td>3</td><td>928GB</td><td>7.4T</td></tr>
14
+ </td><td rowspan="3">L1</td><td>1</td><td>2.1TB</td><td>17.1TB</td></tr>
15
+ <tr><td>2</td><td>2.8T</td><td>22.7TB</td></tr>
16
+ <tr><td>3</td><td>4.2T</td><td>34.3TB</td></tr>
17
+ </td><td rowspan="3">mix</td><td>1</td><td>2.4T</td><td>19.2TB</td></tr>
18
+ <tr><td>2</td><td>3.3TB</td><td>26.6TB</td></tr>
19
+ <tr><td>3</td><td>5.1TB</td><td>41.4TB</td></tr>
20
+
21
+ </table>
22
+
@@ -0,0 +1,211 @@
1
+ # 动态图精度数据采集快速入门示例
2
+
3
+ 本示例将展示如何在 MindSpore 动态图模式下使用 msprobe 工具进行精度数据采集。
4
+
5
+ ## 1. 配置文件
6
+
7
+ 请在当前目录下创建一个名为 `config.json` 的配置文件,内容如下:
8
+
9
+ ```json
10
+ {
11
+ "task": "statistics",
12
+ "dump_path": "./output",
13
+ "rank": [],
14
+ "step": ["0-2"],
15
+ "level": "L1",
16
+ "statistics": {
17
+ "scope": [],
18
+ "list": [],
19
+ "data_mode": [
20
+ "all"
21
+ ],
22
+ "summary_mode": "statistics"
23
+ }
24
+ }
25
+
26
+ ```
27
+ 以上配置参数详细介绍和使用请参见[《config.json 配置文件介绍》](../02.config_introduction.md)和[《config.json 配置示例》](../03.config_examples.md#3-mindspore-动态图场景) 中的“MindSpore动态图场景”。
28
+
29
+ ## 2. 模型脚本
30
+
31
+ 在当前目录下创建一个 Python 脚本文件,例如 `alexnet_model.py`,将以下代码粘贴进去:
32
+
33
+ ```python
34
+ import os
35
+ import numpy as np
36
+ import mindspore as ms
37
+ from mindspore import nn, ops
38
+ from mindspore import context
39
+ from mindspore import Tensor
40
+ from msprobe.mindspore import PrecisionDebugger, seed_all
41
+
42
+ # 设置随机种子以确保结果可重现
43
+ seed_all(seed=1234, mode=False, rm_dropout=True)
44
+
45
+ # 配置文件路径
46
+ script_dir = os.path.dirname(os.path.abspath(__file__))
47
+ config_path = os.path.join(script_dir, 'config.json')
48
+
49
+ # 初始化精度调试器
50
+ debugger = PrecisionDebugger(config_path=config_path)
51
+
52
+ # 设置 MindSpore 设备上下文
53
+ context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend", device_id=0)
54
+
55
+ # 定义卷积层
56
+ def conv_layer(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid", has_bias=True):
57
+ return nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding,
58
+ has_bias=has_bias, pad_mode=pad_mode)
59
+
60
+ # 定义全连接层
61
+ def fc_layer(input_channels, out_channels, has_bias=True):
62
+ return nn.Dense(input_channels, out_channels, has_bias=has_bias)
63
+
64
+
65
+ class AlexNet(nn.Cell):
66
+ """
67
+ AlexNet 模型定义
68
+
69
+ 参数:
70
+ - num_classes: 分类数量
71
+ - channel: 输入通道数(图像的颜色通道数)
72
+ - phase: 模型运行阶段('train' 或 'test')
73
+ - include_top: 是否包含全连接层的顶部(最后的分类层)
74
+ """
75
+ def __init__(self, num_classes=10, channel=3, phase='train', include_top=True):
76
+ super(AlexNet, self).__init__()
77
+
78
+ # 卷积层
79
+ self.conv1 = conv_layer(channel, 64, 11, stride=4, pad_mode="same")
80
+ self.conv2 = conv_layer(64, 128, 5, pad_mode="same")
81
+ self.conv3 = conv_layer(128, 192, 3, pad_mode="same")
82
+ self.conv4 = conv_layer(192, 256, 3, pad_mode="same")
83
+ self.conv5 = conv_layer(256, 256, 3, pad_mode="same")
84
+
85
+ # 激活函数和池化层
86
+ self.relu = nn.ReLU()
87
+ self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='valid')
88
+
89
+ # 如果包括顶部(全连接层)
90
+ self.include_top = include_top
91
+ if self.include_top:
92
+ self.flatten = nn.Flatten()
93
+ self.fc1 = fc_layer(256 * 28 * 28, 4096)
94
+ self.fc2 = fc_layer(4096, 4096)
95
+ self.fc3 = fc_layer(4096, num_classes)
96
+
97
+ # 数学操作
98
+ self.add = ops.Add()
99
+ self.mul = ops.Mul()
100
+
101
+ def construct(self, x):
102
+ """定义前向传播过程"""
103
+
104
+ x = self.conv1(x)
105
+ x = self.add(x, 0.1) # 偏置加法
106
+ x = self.mul(x, 2.0) # 乘法操作
107
+ x = self.relu(x) # ReLU 激活函数
108
+ x = ops.celu(x)
109
+ x = x + 2
110
+
111
+ # 打印每层输出形状,调试时可使用
112
+ print(f"After Conv1: {x.shape}")
113
+
114
+ x = self.max_pool2d(x) # Max pooling 操作
115
+ print(f"After MaxPool: {x.shape}") # 打印池化后的形状
116
+
117
+ x = self.conv2(x)
118
+ x = self.relu(x)
119
+
120
+ x = self.conv3(x)
121
+ x = self.relu(x)
122
+
123
+ x = self.conv4(x)
124
+ x = self.relu(x)
125
+
126
+ x = self.conv5(x)
127
+ x = self.relu(x)
128
+
129
+ # 打印卷积层后的形状,调试时使用
130
+ print(f"After Conv5: {x.shape}")
131
+
132
+ # 可选的全连接层部分
133
+ if self.include_top:
134
+ x = self.flatten(x)
135
+ x = self.fc1(x)
136
+ x = self.fc2(x)
137
+ x = self.fc3(x)
138
+
139
+ return x
140
+
141
+ # 前向函数
142
+ def forward_fn(data, label):
143
+ out = net(data)
144
+ loss = criterion(out, label)
145
+ return loss
146
+
147
+ # 训练步骤
148
+ def train_step(data, label):
149
+ loss, grads = grad_fn(data, label)
150
+ optimizer(grads)
151
+ return loss
152
+
153
+ # 测试模型
154
+ if __name__ == "__main__":
155
+ net = AlexNet()
156
+ optimizer = nn.SGD(net.trainable_params(), learning_rate=0.01)
157
+ criterion = nn.MSELoss()
158
+
159
+ grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters)
160
+
161
+ # 生成数据和标签
162
+ batch_size = 1
163
+ num_classes = 10
164
+ data = np.random.normal(1, 1, (batch_size, 3, 227, 227)).astype(np.float32)
165
+ label = np.random.randint(0, num_classes, (batch_size,)).astype(np.float32) # 注意此处类型应为 float32
166
+
167
+ # 转换为 MindSpore 张量
168
+ data = Tensor(data)
169
+ label = Tensor(label)
170
+
171
+ steps = 5
172
+ for i in range(steps):
173
+ debugger.start(net) # 启动调试器
174
+ loss = train_step(data, label) # 执行训练步骤
175
+ print(f"Step {i}, Loss: {loss}")
176
+ debugger.stop() # 停止调试器
177
+ debugger.step() # 计数步数
178
+ ```
179
+
180
+ ## 3. 运行训练脚本
181
+
182
+ 在命令行中执行以下命令:
183
+
184
+ ```bash
185
+ python alexnet_model.py
186
+ ```
187
+
188
+ ## 4. 查看采集结果
189
+
190
+ 执行训练命令后,工具会将模型训练过程中的精度数据采集下来。
191
+
192
+ 日志中打印出现如下信息表示数据采集成功,即可手动停止模型训练查看采集数据。
193
+
194
+ ```markdown
195
+ ****************************************************************************
196
+ * msprobe ends successfully. *
197
+ ****************************************************************************
198
+ ```
199
+
200
+ ## 5. 数据分析
201
+
202
+ 在 `dump_path` 参数指定的路径下(本例中为 `./output`),会出现如下目录结构,后续精度数据分析操作可使用 msprobe 工具的精度预检和精度比对等功能,详细流程请参见[《msprobe使用手册》](../../README.md#2-精度预检)。:
203
+
204
+ ```bash
205
+ output/
206
+ └── step0
207
+ └── rank
208
+ ├── construct.json # level为L0时,保存Cell的层级关系信息。当前场景为空
209
+ ├── dump.json # 保存API前反向输入输出数据的统计量信息
210
+ └── stack.json # 保存API的调用栈
211
+ ```
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,132 @@
1
+ # 模型分级可视化如何配置layer mapping映射文件
2
+
3
+ ## 1.使用场景
4
+ 同框架跨套件比对(例如PyTorch DeepSpeed vs Megatron),或者跨框架比对(例如PyTorch vs MindSpore),**由于代码实现的差异,导致一些模型层级和层级命名有所不同无法进行匹配**,需要进行layer层名称映射,才能够比对。
5
+
6
+ ## 2.模块命名说明
7
+
8
+ 由于有些节点的名称比较长,例如Module.module.module.language_model.embedding.Embedding.forward.0,在图节点上由于字符串过长无法完整显示,forward或backward信息被省略,**因此节点中显示的名称字符串去掉了Module前缀,并将forward或backward信息提取到名称字符串的第二位展示**。
9
+
10
+ ![module_name.png](./module_name.png)
11
+
12
+ ![module_name1.png](./module_name1.png)
13
+
14
+ ### 2.1 命名格式
15
+
16
+ **{Module}.{module_name}.{class_name}.{forward/backward}.{调用次数}**
17
+
18
+ **layer mapping主要是针对module_name的映射**
19
+
20
+ #### 2.1.1 命名示例
21
+
22
+ - **Module.module.Float16Module.forward.0** -----> Module{**Module**}.module{**module_name**}.Float16Module{**class_name**}.forward.0{**调用次数**}
23
+ - **Module.module.module.GPTModel.forward.0** -----> Module{**Module**}.module.module{**module_name**}.GPTModel{**class_name**}.forward.0{**调用次数**}
24
+ - **Module.module.module.language_model.TransformerLanguageModel.forward.0** -----> Module{**Module**}.module.module.language_model{**module_name**}.TransformerLanguageModel{**class_name**}.forward.0{**调用次数**}
25
+ - **Module.module.module.language_model.embedding.Embedding.forward.0** -----> Module{**Module**}.module.module.language_model.embedding{**module_name**}.Embedding{**class_name**}.forward.0{**调用次数**}
26
+
27
+ 可以看到,module_name随着模型层级的深入在变长,**embedding层module_name拼接了它的上层language_model、上上层module和顶层module**。
28
+
29
+ ## 3.示例
30
+
31
+ 如图所示,左边为NPU模型,右边为GPU模型,由于代码实现上的差异,导致模型层级和层级命名有所不同,导致节点无法匹配,**图上节点显示为灰色,表示节点未匹配**。
32
+
33
+ ![no_mapping.png](./no_mapping.png)
34
+
35
+ ### 3.1 看图分析
36
+
37
+ 同一模型使用了不同套件或者框架,虽然两个模型的层级关系和层级命名可能有所不同,但也可以从图上的**节点名称**看出一些匹配关系,例如同是embedding层,代码里也是会命名为xxx_embedding,不会命名为xxx_norm,体现在节点名称上也是带有embedding的信息,并且层级关系也是大致相同的。
38
+
39
+ ![no_mapping_analyze.png](./no_mapping_analyze.png)
40
+
41
+ 分析可知,节点匹配关系如下:
42
+
43
+ **注意,仅需关注module_name的差异**
44
+
45
+ | NPU节点名称 | GPU节点名称 | module_name差异 |
46
+ |-------------------|----------------------------------------------------------------|---------------------------|
47
+ | Module.module.Float16Module.forward.0 | Module.model.FloatModule.forward.0 | NPU为module,GPU为model |
48
+ | Module.module.module.GPTModel.forward.0 | Module.model.module.GPT2Model.forward.0 | NPU为module,GPU为module,无差异 |
49
+ | Module.module.module.language_model.TransformerLanguageModel.forward.0 | 无 | NPU多了一层 |
50
+ | Module.module.module.language_model.embedding.Embedding.forward.0 | Module.module.module.embedding.LanguageModelEmbedding.forward.0 | NPU为language_model.embedding,GPU为embedding |
51
+ | Module.module.module.language_model.rotary_pos_emb.RotaryEmbedding.forward.0 | Module.module.module.rotary_pos_emb.RotaryEmbedding.forward.0 | NPU为language_model.rotary_pos_emb,GPU为rotary_pos_emb |
52
+ | Module.module.module.language_model.encoder.ParallelTransformer.forward.0 | Module.module.module.decoder.TransformerBlock.forward.0 | NPU为language_model.encoder,GPU为decoder |
53
+ | Module.module.module.language_model.encoder.layers.0.ParallelTransformerLayer.forward.0 | Module.module.module.decoder.layers.0.TransformerLayer.forward.0 | 父层级有差异,本层级NPU和GPU都叫layers,无差异 |
54
+
55
+ ### 3.2 构建layer_mapping配置文件
56
+ 准备一个命名为mapping.yaml文件,建立**module_name**的映射关系
57
+
58
+ #### 3.2.1 顶层模块映射
59
+ NPU和GPU侧的模块Module.module.Float16Module.forward.0和Module.model.FloatModule.forward.0处于图的顶层,需要进行如下配置:
60
+
61
+ ![top_layer.png](./top_layer.png)
62
+
63
+ ```yaml
64
+ TopLayer:
65
+ module: model
66
+ ```
67
+
68
+ #### 3.2.2 其他模块映射
69
+ 配置module下的子模块,虽然两边的class_name不同(NPU侧为GPTModel,GPU侧为GPT2Model),**但是仅需取NPU侧也就是左边图的class_name进行配置,无需关心右边图的class_name叫什么**。
70
+
71
+ **这里涉及到跨层级的配置,NPU多了一层language_model层**,将language_model作为embedding层、rotary_pos_emb层和encoder层的前缀,进行如下配置:
72
+
73
+ ![GPTModel.png](./GPTModel.png)
74
+
75
+ ```yaml
76
+ GPTModel:
77
+ language_model.embedding: embedding
78
+ language_model.rotary_pos_emb: rotary_pos_emb
79
+ language_model.encoder: decoder
80
+ ```
81
+ 然后看Module.module.module.language_model.encoder.ParallelTransformer.forward.0层下的子模块:
82
+
83
+ 此层下的若干个层,NPU和GPU的层名都叫layers,**当前层名称相同,则不用进行配置**。
84
+
85
+ ### 3.3 查看效果
86
+
87
+ 执行命令,指定-lm:
88
+ ```
89
+ msprobe -f pytorch graph -i ./compare.json -o ./output -lm ./mapping.yaml
90
+ ```
91
+
92
+ ```
93
+ msprobe -f mindspore graph -i ./compare.json -o ./output -lm ./mapping.yaml
94
+ ```
95
+ 可以看到,除了language_model层(NPU多的一层,GPU没有层与其匹配),其余在mapping.yaml文件配置的层均匹配上了。
96
+
97
+ ![mapping.png](./mapping.png)
98
+
99
+ ### 3.4 继续配置
100
+
101
+ 展开节点过程中,如果发现还有未匹配节点,则继续配置mapping.yaml
102
+
103
+ ![no_mapping1.png](./no_mapping1.png)
104
+
105
+ 按前一章过程进行分析配置,分析可知,节点匹配关系如下:
106
+
107
+ | NPU节点名称 | GPU节点名称 | 差异 |
108
+ |-------------------|------------------------------------------------------------------|---------------------------------------------|
109
+ | Module.module.module.language_model.encoder.layers.0.mlp.dense_h_to_4h.ColumnParallelLinear.forward.0 | Module.module.module.decoder.layers.0.mlp.linear_fc1.TELayerNormColumnParallelLinear.forward.0 | NPU为dense_h_to_4h,GPU为linear_fc1 |
110
+ | Module.module.module.language_model.encoder.layers.0.mlp.dense_4h_to_h.RowParallelLinear.forward.0 | Module.module.module.decoder.layers.0.mlp.linear_fc2.TERowParallelLinear.forward.0 | NPU为dense_4h_to_h,GPU为linear_fc2 |
111
+
112
+ ![ParallelMLP.png](./ParallelMLP.png)
113
+
114
+ 追加mapping.yaml配置:
115
+
116
+ ```yaml
117
+ TopLayer:
118
+ module: model
119
+
120
+ GPTModel:
121
+ language_model.embedding: embedding
122
+ language_model.rotary_pos_emb: rotary_pos_emb
123
+ language_model.encoder: decoder
124
+
125
+ ParallelMLP:
126
+ dense_h_to_4h: linear_fc1
127
+ dense_4h_to_h: linear_fc2
128
+ ```
129
+
130
+ 执行命令,查看效果,可以看到节点已成功匹配上。
131
+
132
+ ![mapping1.png](./mapping1.png)
Binary file
Binary file
Binary file
@@ -1,2 +1,27 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+
18
+ try:
19
+ from msprobe.lib import _msprobe_c
20
+ os.environ["MS_HOOK_ENABLE"] = "on"
21
+ os.environ["HOOK_TOOL_PATH"] = _msprobe_c.__file__
22
+ except ImportError:
23
+ from .common.log import logger
24
+ logger.info("Module _msprobe_c has not been installed. L2-Dump may not work normally.")
25
+
1
26
  from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger
2
27
  from msprobe.mindspore.common.utils import seed_all