mindstudio-probe 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/METADATA +2 -2
  2. {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/RECORD +172 -147
  3. msprobe/README.md +6 -6
  4. msprobe/core/common/const.py +98 -41
  5. msprobe/core/common/db_manager.py +256 -0
  6. msprobe/core/common/file_utils.py +28 -5
  7. msprobe/core/common/log.py +7 -0
  8. msprobe/core/common/megatron_utils.py +59 -0
  9. msprobe/core/common/parallel_state.py +193 -0
  10. msprobe/core/common/utils.py +20 -13
  11. msprobe/core/common_config.py +5 -0
  12. msprobe/core/compare/acc_compare.py +140 -93
  13. msprobe/core/compare/check.py +13 -0
  14. msprobe/core/compare/compare_cli.py +64 -6
  15. msprobe/core/compare/config.py +10 -8
  16. msprobe/core/compare/diff_analyze/diff_analyze_threshold.yaml +14 -0
  17. msprobe/core/compare/diff_analyze/first_diff_analyze.py +135 -0
  18. msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
  19. msprobe/core/compare/find_first/__init__.py +0 -0
  20. msprobe/core/compare/find_first/analyzer.py +282 -0
  21. msprobe/core/compare/find_first/data_processor.py +35 -0
  22. msprobe/core/compare/find_first/graph.py +188 -0
  23. msprobe/core/compare/find_first/utils.py +189 -0
  24. msprobe/core/compare/highlight.py +74 -101
  25. msprobe/core/compare/layer_mapping/layer_mapping.py +14 -9
  26. msprobe/core/compare/merge_result/merge_result.py +2 -2
  27. msprobe/core/compare/multiprocessing_compute.py +45 -28
  28. msprobe/core/compare/npy_compare.py +7 -10
  29. msprobe/core/compare/utils.py +338 -130
  30. msprobe/core/config_check/checkers/dataset_checker.py +2 -1
  31. msprobe/core/config_check/checkers/env_args_checker.py +5 -5
  32. msprobe/core/config_check/checkers/hyperparameter_checker.py +30 -10
  33. msprobe/core/config_check/checkers/pip_checker.py +4 -3
  34. msprobe/core/config_check/checkers/random_checker.py +3 -3
  35. msprobe/core/config_check/checkers/weights_checker.py +2 -1
  36. msprobe/core/config_check/ckpt_compare/megatron_loader.py +2 -0
  37. msprobe/core/config_check/resource/hyperparameter.yaml +11 -1
  38. msprobe/core/config_check/utils/hyperparameter_parser.py +7 -3
  39. msprobe/core/config_check/utils/utils.py +10 -0
  40. msprobe/core/data_dump/api_registry.py +49 -30
  41. msprobe/core/data_dump/data_collector.py +71 -29
  42. msprobe/core/data_dump/data_processor/base.py +2 -0
  43. msprobe/core/data_dump/data_processor/mindspore_processor.py +47 -53
  44. msprobe/core/data_dump/data_processor/pytorch_processor.py +227 -93
  45. msprobe/core/data_dump/json_writer.py +81 -7
  46. msprobe/core/data_dump/scope.py +4 -6
  47. msprobe/core/hook_manager.py +129 -70
  48. msprobe/core/monitor/csv2db.py +361 -0
  49. msprobe/core/monitor/db_utils.py +278 -0
  50. msprobe/core/monitor/utils.py +35 -1
  51. msprobe/core/service.py +31 -39
  52. msprobe/core/single_save/single_comparator.py +16 -3
  53. msprobe/docs/01.installation.md +51 -19
  54. msprobe/docs/02.config_introduction.md +16 -20
  55. msprobe/docs/03.config_examples.md +26 -0
  56. msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
  57. msprobe/docs/05.data_dump_PyTorch.md +6 -2
  58. msprobe/docs/06.data_dump_MindSpore.md +44 -7
  59. msprobe/docs/07.accuracy_checker_PyTorch.md +1 -1
  60. msprobe/docs/10.accuracy_compare_PyTorch.md +124 -44
  61. msprobe/docs/11.accuracy_compare_MindSpore.md +75 -7
  62. msprobe/docs/14.data_parse_PyTorch.md +1 -1
  63. msprobe/docs/19.monitor.md +94 -7
  64. msprobe/docs/21.visualization_PyTorch.md +71 -101
  65. msprobe/docs/22.visualization_MindSpore.md +69 -119
  66. msprobe/docs/23.generate_operator_PyTorch.md +1 -1
  67. msprobe/docs/25.tool_function_introduction.md +0 -1
  68. msprobe/docs/26.data_dump_PyTorch_baseline.md +7 -7
  69. msprobe/docs/28.debugger_save_instruction.md +184 -81
  70. msprobe/docs/29.data_dump_MSAdapter.md +6 -0
  71. msprobe/docs/31.config_check.md +4 -2
  72. msprobe/docs/36.calculation_result_change.md +75 -0
  73. msprobe/docs/FAQ.md +22 -1
  74. msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +6 -2
  75. msprobe/docs/img/compare_result.png +0 -0
  76. msprobe/docs/img/visualization/vis_browser_1.png +0 -0
  77. msprobe/docs/img/visualization/vis_match_info.png +0 -0
  78. msprobe/docs/img/visualization/vis_precision_info.png +0 -0
  79. msprobe/docs/img/visualization/vis_search_info.png +0 -0
  80. msprobe/docs/img/visualization/vis_show_info.png +0 -0
  81. msprobe/docs/img/visualization/vis_showcase.png +0 -0
  82. msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
  83. msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
  84. msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
  85. msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
  86. msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
  87. msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
  88. msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
  89. msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
  90. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +59 -0
  91. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
  92. msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
  93. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +80 -0
  94. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
  95. msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
  96. msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +330 -0
  97. msprobe/mindspore/__init__.py +1 -1
  98. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +1 -1
  99. msprobe/mindspore/api_accuracy_checker/api_runner.py +9 -6
  100. msprobe/mindspore/api_accuracy_checker/compute_element.py +18 -12
  101. msprobe/mindspore/cell_processor.py +64 -25
  102. msprobe/mindspore/common/utils.py +51 -7
  103. msprobe/mindspore/compare/common_dir_compare.py +45 -37
  104. msprobe/mindspore/compare/ms_compare.py +10 -2
  105. msprobe/mindspore/compare/ms_graph_compare.py +47 -52
  106. msprobe/mindspore/debugger/debugger_config.py +18 -7
  107. msprobe/mindspore/debugger/precision_debugger.py +16 -12
  108. msprobe/mindspore/dump/cell_dump_process.py +130 -68
  109. msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +10 -2
  110. msprobe/mindspore/dump/graph_mode_cell_dump.py +35 -9
  111. msprobe/mindspore/dump/graph_tensor_dump.py +11 -0
  112. msprobe/mindspore/dump/hook_cell/api_register.py +19 -20
  113. msprobe/mindspore/dump/hook_cell/hook_cell.py +12 -34
  114. msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +142 -21
  115. msprobe/mindspore/dump/kernel_kbyk_dump.py +24 -0
  116. msprobe/mindspore/exception_dump/__init__.py +0 -0
  117. msprobe/mindspore/exception_dump/exception_dump_tool_factory.py +51 -0
  118. msprobe/mindspore/exception_dump/kernel_graph_exception_dump.py +57 -0
  119. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +5 -4
  120. msprobe/mindspore/mindspore_service.py +2 -2
  121. msprobe/mindspore/mindtorch/mindtorch_adaptor.py +12 -7
  122. msprobe/mindspore/monitor/features.py +82 -0
  123. msprobe/mindspore/monitor/module_hook.py +168 -10
  124. msprobe/mindspore/monitor/utils.py +27 -1
  125. msprobe/mindspore/ms_config.py +12 -4
  126. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +1 -1
  127. msprobe/mindspore/task_handler_factory.py +3 -1
  128. msprobe/nan_analyze/graph.py +1 -1
  129. msprobe/pytorch/api_accuracy_checker/common/config.py +3 -36
  130. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +0 -24
  131. msprobe/pytorch/api_accuracy_checker/compare/compare.py +2 -12
  132. msprobe/pytorch/api_accuracy_checker/config.yaml +1 -6
  133. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -2
  134. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +12 -132
  135. msprobe/pytorch/common/utils.py +1 -21
  136. msprobe/pytorch/compare/pt_compare.py +10 -2
  137. msprobe/pytorch/{hook_module/jit_script_wrapper.py → compare/pt_diff_analyze.py} +3 -15
  138. msprobe/pytorch/compare/utils.py +2 -1
  139. msprobe/pytorch/debugger/debugger_config.py +18 -23
  140. msprobe/pytorch/dump/module_dump/hook_wrapper.py +10 -7
  141. msprobe/pytorch/dump/module_dump/module_processer.py +41 -19
  142. msprobe/pytorch/free_benchmark/main.py +7 -4
  143. msprobe/pytorch/hook_module/api_register.py +62 -24
  144. msprobe/pytorch/hook_module/hook_module.py +9 -29
  145. msprobe/pytorch/hook_module/pt_hook_manager.py +84 -15
  146. msprobe/pytorch/hook_module/script_wrapper.py +140 -0
  147. msprobe/pytorch/hook_module/support_wrap_ops.yaml +6 -0
  148. msprobe/pytorch/monitor/csv2tb.py +1 -1
  149. msprobe/pytorch/monitor/features.py +94 -0
  150. msprobe/pytorch/monitor/module_hook.py +221 -81
  151. msprobe/pytorch/monitor/module_metric.py +27 -1
  152. msprobe/pytorch/monitor/optimizer_collect.py +109 -4
  153. msprobe/pytorch/online_dispatch/dispatch.py +42 -24
  154. msprobe/pytorch/online_dispatch/dump_compare.py +1 -1
  155. msprobe/pytorch/parse_tool/lib/visualization.py +0 -1
  156. msprobe/pytorch/pt_config.py +2 -51
  157. msprobe/pytorch/pytorch_service.py +7 -14
  158. msprobe/visualization/builder/graph_builder.py +192 -63
  159. msprobe/visualization/builder/graph_merger.py +986 -0
  160. msprobe/visualization/builder/msprobe_adapter.py +17 -15
  161. msprobe/visualization/compare/graph_comparator.py +26 -16
  162. msprobe/visualization/db_utils.py +252 -0
  163. msprobe/visualization/graph/base_node.py +2 -22
  164. msprobe/visualization/graph/distributed_analyzer.py +12 -12
  165. msprobe/visualization/graph/graph.py +44 -16
  166. msprobe/visualization/graph_service.py +143 -59
  167. msprobe/visualization/utils.py +103 -4
  168. msprobe/docs/08.accuracy_checker_online_PyTorch.md +0 -295
  169. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +0 -205
  170. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +0 -378
  171. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +0 -239
  172. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +0 -115
  173. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +0 -250
  174. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +0 -63
  175. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +0 -198
  176. msprobe/pytorch/attl_manager.py +0 -65
  177. {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/LICENSE +0 -0
  178. {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/WHEEL +0 -0
  179. {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/entry_points.txt +0 -0
  180. {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/top_level.txt +0 -0
  181. /msprobe/{pytorch/api_accuracy_checker/tensor_transport_layer → core/compare/diff_analyze}/__init__.py +0 -0
@@ -0,0 +1,80 @@
1
+ [DistributedDataParallel(
2
+ (module): Float16Module(
3
+ (module): VLMModel(
4
+ (image_encoder): VisionModel(
5
+ (encoder): Qwen2VLViT(
6
+ (patch_embed): PatchEmbed(
7
+ (proj): Conv3d(3, 1280, kernel_size=(2, 14, 14), stride=(2, 14, 14), bias=False)
8
+ )
9
+ (rotary_pos_emb): VisionRotaryEmbedding()
10
+ (blocks): Qwen2VLVisionTransformerBlock(
11
+ (layers): ModuleList(
12
+ (0-15): 16 x TransformerLayer(
13
+ (input_layernorm): RMSNorm()
14
+ (self_attention): Qwen2vlVitSelfAttention(
15
+ (core_attention): DotProductAttention(
16
+ (scale_mask_softmax): FusedScaleMaskSoftmax()
17
+ (attention_dropout): Dropout(p=0.0, inplace=False)
18
+ )
19
+ (linear_proj): RowParallelLinear()
20
+ (linear_qkv): ColumnParallelLinear()
21
+ (q_layernorm): IdentityOp()
22
+ (k_layernorm): IdentityOp()
23
+ )
24
+ (pre_cross_attn_layernorm): IdentityOp()
25
+ (cross_attention): IdentityOp()
26
+ (cross_attn_bda): IdentityFuncOp()
27
+ (pre_mlp_layernorm): RMSNorm()
28
+ (mlp): MLP(
29
+ (linear_fc1): ColumnParallelLinear()
30
+ (linear_fc2): RowParallelLinear()
31
+ )
32
+ )
33
+ )
34
+ )
35
+ )
36
+ (projector): MultimodalProjector(
37
+ (layernorm): RMSNorm()
38
+ (encoder): MLP(
39
+ (linear_fc1): ColumnParallelLinear()
40
+ (linear_fc2): RowParallelLinear()
41
+ )
42
+ )
43
+ )
44
+ (text_decoder): MMGPTModel(
45
+ (embedding): LanguageModelEmbedding(
46
+ (word_embeddings): VocabParallelEmbedding()
47
+ (embedding_dropout): Dropout(p=0.0, inplace=False)
48
+ )
49
+ (rotary_pos_emb): Qwen2VLRotaryEmbedding_llm()
50
+ (decoder): TransformerBlock(
51
+ (layers): ModuleList(
52
+ (0-7): 8 x TransformerLayer(
53
+ (input_layernorm): RMSNorm()
54
+ (self_attention): Qwen2vlSelfAttention(
55
+ (core_attention): DotProductAttention(
56
+ (scale_mask_softmax): FusedScaleMaskSoftmax()
57
+ (attention_dropout): Dropout(p=0.0, inplace=False)
58
+ )
59
+ (linear_proj): RowParallelLinear()
60
+ (linear_qkv): ColumnParallelLinear()
61
+ (q_layernorm): IdentityOp()
62
+ (k_layernorm): IdentityOp()
63
+ )
64
+ (pre_cross_attn_layernorm): IdentityOp()
65
+ (cross_attention): IdentityOp()
66
+ (cross_attn_bda): IdentityFuncOp()
67
+ (pre_mlp_layernorm): RMSNorm()
68
+ (mlp): MLP(
69
+ (linear_fc1): ColumnParallelLinear()
70
+ (linear_fc2): RowParallelLinear()
71
+ )
72
+ )
73
+ )
74
+ (final_layernorm): RMSNorm()
75
+ )
76
+ (output_layer): ColumnParallelLinear()
77
+ )
78
+ )
79
+ )
80
+ )]
@@ -0,0 +1,330 @@
1
+ # MindSpeed&LLamaFactory数据采集和自动比对
2
+
3
+ ## 0. 使用场景
4
+ 基于MindSpeed和LLamaFactory框架实现的同一模型,在模型超参、环境变量、初始权重、训练数据等一致的前提下,训练过程中出现了精度差异,需要进行**整网比对**寻找精度差异点。
5
+
6
+ 本文选取Qwen2.5vl和Qwen2.5模型,指导用户如何进行MindSpeed&LLamaFactory数据采集和自动比对。
7
+
8
+ ## 1. 数据采集
9
+
10
+ ### 1.1 准备数据采集配置文件
11
+
12
+ 数据采集前需要准备一个json文件,本案例命名为config.json,其内容包含了数据采集的所需配置。
13
+
14
+ 本案例使用的配置内容如下,更多配置请参考[config.json 配置示例](../03.config_examples.md),配置详细介绍请参考[配置文件介绍](../02.config_introduction.md)。
15
+
16
+ ```json
17
+ {
18
+ "task": "statistics",
19
+ "dump_path": "/home/data_dump",
20
+ "rank": [],
21
+ "step": [0],
22
+ "level": "mix",
23
+ "async_dump": false,
24
+
25
+ "statistics": {
26
+ "scope": [],
27
+ "list": [],
28
+ "tensor_list": [],
29
+ "data_mode": ["all"],
30
+ "summary_mode": "statistics"
31
+ }
32
+ }
33
+ ```
34
+ 请注意,在数据采集结束后将进行模型分级可视化比对,配置文件中的`level`需要配置为`L0`(模块数据)或`mix`(模块+API数据)。
35
+
36
+ ### 1.2 添加msprobe工具采集接口
37
+
38
+ 本案例使用的工具采集接口配置如下,更多配置和接口介绍请参考[PyTorch 场景的精度数据采集](../05.data_dump_PyTorch.md)。
39
+
40
+ #### 1.2.1 LLamaFactory数据采集
41
+
42
+ LLamaFactory依赖Transformers的底层能力,msprobe工具采集功能将添加在Transformers中。
43
+
44
+ 以Transformers 4.49.0版本为例,通过`pip3 show Transformers`获取`Location路径`,打开`Location路径/transformers/trainer.py`文件。
45
+
46
+ 1. 在trainer.py文件中添加工具接口,初始化数据采集配置以及固定随机数:
47
+
48
+ ![llamafactory1.png](./mindspeed_llamafactoary_img/llamafactory1.png)
49
+
50
+ 2. 在trainer.py文件**训练循环逻辑位置**添加工具接口,控制数据采集的启动、停止和step计数:
51
+
52
+ ![llamafactory2.png](./mindspeed_llamafactoary_img/llamafactory2.png)
53
+
54
+ 3. 配置完成,启动模型训练脚本,数据将自动采集,落盘数据格式请参考[PyTorch 场景的精度数据采集-dump-结果文件介绍](../05.data_dump_PyTorch.md#3-dump-结果文件介绍)。
55
+
56
+ #### 1.2.2 MindSpeed数据采集
57
+
58
+ 打开training.py文件,MindSpeed-MM路径为`mindspeed_mm/training.py`,MindSpeed-LLM路径为`mindspeed_llm/training/training.py`。
59
+
60
+ 1. 在training.py文件中添加工具接口,初始化数据采集配置以及固定随机数:
61
+
62
+ ![mindspeed1.png](./mindspeed_llamafactoary_img/mindspeed1.png)
63
+
64
+ 2. 在training.py文件**训练循环逻辑位置**添加工具接口,控制数据采集的启动、停止和step计数:
65
+
66
+ ![mindspeed2.png](./mindspeed_llamafactoary_img/mindspeed2.png)
67
+
68
+ 3. 配置完成,启动模型训练脚本,数据将自动采集,落盘数据格式请参考[PyTorch 场景的精度数据采集-dump-结果文件介绍](../05.data_dump_PyTorch.md#3-dump-结果文件介绍)。
69
+
70
+ ## 2. 自动比对
71
+
72
+ ### 2.1 模型分级可视化比对
73
+
74
+ 该功能将msprobe工具dump的精度数据进行解析,还原模型图结构,实现模型各个层级的精度数据比对,方便用户理解模型结构、分析精度问题。
75
+
76
+ 我们将使用以下命令行进行模型分级可视化比对:
77
+
78
+ ```
79
+ msprobe -f pytorch graph -i ./compare.json -o ./output -lm ./layer_mapping.yaml
80
+ ```
81
+ 具体的参数说明请点击查看[分级可视化构图比对-构图命令行说明](../21.visualization_PyTorch.md#31-构图命令行说明)。
82
+
83
+ 在基于MindSpeed和LLamaFactory框架的模型比对场景中,**-lm参数是必填的**,-lm参数所需的layer_mapping.yaml如何配置将在下面的章节进行介绍。
84
+
85
+ 模型分级可视化比对完成后,可通过tensorboard(需安装[tb_graph_ascend插件](../21.visualization_PyTorch.md#1依赖安装))启动端口,在浏览器页面查看模型结构和精度比对结果,请参考[分级可视化构图比对-启动tensorboard](../21.visualization_PyTorch.md#4启动tensorboard)和[分级可视化构图比对-浏览器查看](../21.visualization_PyTorch.md#5浏览器查看)。
86
+
87
+ ### 2.2 layer_mapping映射文件配置
88
+ msprobe工具的比对功能会将比对双方dump名称一致的数据进行比对。由于MindSpeed和LLamaFactory框架代码实现的差异,一些模型层级和层级名称有所不同导致无法进行匹配,需要进行layer层名称映射,才能够比对。
89
+
90
+ #### 2.2.1 layer_mapping映射文件模板
91
+
92
+ 此处提供了Qwen2.5vl和Qwen2.5模型的layer_mapping映射文件模板,可直接使用。**如果您使用其他模型,或对MindSpeed和LLamaFactory框架进行过定制开发修改过框架源码,此layer_mapping映射文件模板可能会失效,请按照后续步骤修改layer_mapping映射文件模板**。
93
+
94
+ 每个模型有两个layer_mapping映射文件模板,分别是NPU侧为Mindspeed Bench侧为LLamaFactory,以及NPU侧为LLamaFactory Bench侧为Mindspeed,映射内容有所不同。
95
+
96
+ 文件名格式:\*.yaml,*为文件名,可自定义。本文命名为layer_mapping.yaml。
97
+
98
+ **Qwen2.5vl**
99
+
100
+ ```yaml
101
+ # NPU侧为Mindspeed-MM, Bench侧为LLamaFactory
102
+ TopLayer:
103
+ 0.module: module
104
+
105
+ Float16Module:
106
+ module.image_encoder: visual
107
+ module.text_decoder: model
108
+
109
+ VisionModel:
110
+ encoder.patch_embed: patch_embed
111
+ encoder.rotary_pos_emb: rotary_pos_emb
112
+ encoder.blocks.layers: blocks
113
+ projector: merger
114
+
115
+ TransformerLayer:
116
+ input_layernorm: norm1
117
+ self_attention: attn
118
+ pre_mlp_layernorm: norm2
119
+
120
+ Qwen2vlVitSelfAttention:
121
+ linear_qkv: qkv
122
+ linear_proj: proj
123
+
124
+ MLP:
125
+ linear_fc1: up_proj
126
+ linear_fc2: down_proj
127
+
128
+ MultimodalProjector:
129
+ layernorm: ln_q
130
+ encoder: mlp
131
+ encoder.linear_fc1: mlp.0
132
+ encoder.linear_fc2: mlp.2
133
+
134
+ MMGPTModel:
135
+ embedding.word_embeddings: embed_tokens
136
+ rotary_pos_emb: rotary_emb
137
+ decoder.layers: layers
138
+ decoder.final_layernorm: norm
139
+ output_layer: lm_head
140
+ ```
141
+ ```yaml
142
+ # NPU侧为LLamaFactory, Bench侧为Mindspeed-MM
143
+ TopLayer:
144
+ module: 0.module
145
+
146
+ Qwen2_5_VLForConditionalGeneration:
147
+ visual: module.image_encoder
148
+ model: module.text_decoder
149
+ lm_head: module.text_decoder.output_layer
150
+
151
+ Qwen2_5_VisionTransformerPretrainedModel:
152
+ patch_embed: encoder.patch_embed
153
+ rotary_pos_emb: encoder.rotary_pos_emb
154
+ blocks: encoder.blocks.layers
155
+ merger: projector
156
+
157
+ Qwen2_5_VLVisionBlock:
158
+ norm1: input_layernorm
159
+ attn: self_attention
160
+ norm2: pre_mlp_layernorm
161
+
162
+ Qwen2_5_VLVisionSdpaAttention:
163
+ qkv: linear_qkv
164
+ proj: linear_proj
165
+
166
+ Qwen2_5_VLMLP:
167
+ up_proj: linear_fc1
168
+ down_proj: linear_fc2
169
+
170
+ Qwen2_5_VLPatchMerger:
171
+ ln_q: layernorm
172
+ mlp: encoder
173
+ mlp.0: encoder.linear_fc1
174
+ mlp.2: encoder.linear_fc2
175
+
176
+ Qwen2_5_VLModel:
177
+ embed_tokens: embedding.word_embeddings
178
+ rotary_emb: rotary_pos_emb
179
+ layers: decoder.layers
180
+ norm: decoder.final_layernorm
181
+
182
+ Qwen2_5_VLDecoderLayer:
183
+ self_attn: self_attention
184
+ self_attn.o_proj: self_attention.linear_proj
185
+ post_attention_layernorm: pre_mlp_layernorm
186
+ ```
187
+
188
+ **Qwen2.5**
189
+
190
+ ```yaml
191
+ # NPU侧为Mindspeed-LLM, Bench侧为LLamaFactory
192
+ TopLayer:
193
+ 0.module: module
194
+
195
+ Float16Module:
196
+ module: model
197
+ module.output_layer: lm_head
198
+
199
+ GPTModel:
200
+ embedding.word_embeddings: embed_tokens
201
+ decoder.layers: layers
202
+ decoder.final_layernorm: norm
203
+
204
+ TransformerLayer:
205
+ self_attention: self_attn
206
+ pre_mlp_layernorm: post_attention_layernorm
207
+
208
+ SelfAttention:
209
+ linear_proj: o_proj
210
+
211
+ MLP:
212
+ linear_fc1: up_proj
213
+ linear_fc2: down_proj
214
+ ```
215
+ ```yaml
216
+ # NPU侧为LLamaFactory, Bench侧为Mindspeed-LLM
217
+ TopLayer:
218
+ module: 0.module
219
+
220
+ Qwen2ForCausalLM:
221
+ model: module
222
+ lm_head: module.output_layer
223
+
224
+ Qwen2Model:
225
+ embed_tokens: embedding.word_embeddings
226
+ layers: decoder.layers
227
+ norm: decoder.final_layernorm
228
+
229
+ Qwen2DecoderLayer:
230
+ self_attn: self_attention
231
+ post_attention_layernorm: pre_mlp_layernorm
232
+
233
+ Qwen2Attention:
234
+ o_proj: linear_proj
235
+
236
+ Qwen2MLP:
237
+ up_proj: linear_fc1
238
+ down_proj: linear_fc2
239
+ ```
240
+
241
+ #### 2.2.2 layer_mapping映射文件配置过程
242
+ 以Qwen2.5vl模型,NPU侧MindSpeed,Bench侧LLamaFactory为例。
243
+
244
+ 1. 模型结构打印
245
+
246
+ 参考[添加msprobe工具采集接口](#12-添加msprobe工具采集接口)章节,配置过程中会在模型文件中添加`debugger.start(model=model)`,针对`start接口`中的`model`进行`print(model)`即可打印模型结构。
247
+
248
+ 打印的模型结构:[mindspeed-mm-qwen25vl.txt](./mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt),[llamafactory-qwen25vl.txt](./mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt)
249
+
250
+ 2. 基于模型结构由外到内进行layer mapping配置
251
+
252
+ - 结构1
253
+
254
+ ![1.png](./mindspeed_llamafactoary_img/1.png)
255
+
256
+ ```yaml
257
+ TopLayer: # 代表模型最顶层
258
+ 0.module: module # MindSpeed的model类型是list,msprobe采集会对其添加数字前缀,代表当前模型在list中的索引,因此要做0.module -> module的映射
259
+
260
+ Float16Module: # MindSpeed的Float16Module与LLamaFactory的Qwen2_5_VLForConditionalGeneration同级,对它们的子层进行映射
261
+ module.image_encoder: visual # MindSpeed的Float16Module多了一个子层module,跨层级用"."分隔,配置为module.image_encoder
262
+ module.text_decoder: model
263
+ ```
264
+ - 结构2
265
+
266
+ ![2.png](./mindspeed_llamafactoary_img/2.png)
267
+
268
+ ```yaml
269
+ VisionModel: # MindSpeed的VisionModel与LLamaFactory的Qwen2_5_VisionPatchEmbed同级,对它们的子层进行映射
270
+ encoder.patch_embed: patch_embed
271
+ encoder.rotary_pos_emb: rotary_pos_emb
272
+ encoder.blocks.layers: blocks
273
+ projector: merger
274
+ ```
275
+ - 结构3
276
+
277
+ ![3.png](./mindspeed_llamafactoary_img/3.png)
278
+
279
+ ```yaml
280
+ TransformerLayer: # MindSpeed的TransformerLayer与LLamaFactory的Qwen2_5_VLVisionBlock同级,对它们的子层进行映射
281
+ input_layernorm: norm1
282
+ self_attention: attn
283
+ pre_mlp_layernorm: norm2
284
+ ```
285
+ - 结构4
286
+
287
+ ![4.png](./mindspeed_llamafactoary_img/4.png)
288
+
289
+ ```yaml
290
+ Qwen2vlVitSelfAttention: # MindSpeed的Qwen2vlVitSelfAttention与LLamaFactory的Qwen2_5_VLVisionSdpaAttention同级,对它们的子层进行映射
291
+ linear_qkv: qkv
292
+ linear_proj: proj
293
+
294
+ MLP: # MindSpeed的MLP与LLamaFactory的Qwen2_5_VLMLP同级,对它们的子层进行映射
295
+ linear_fc1: up_proj
296
+ linear_fc2: down_proj
297
+ ```
298
+ - 结构5
299
+
300
+ ![5.png](./mindspeed_llamafactoary_img/5.png)
301
+
302
+ ```yaml
303
+ MultimodalProjector: # MindSpeed的MultimodalProjector与LLamaFactory的Qwen2_5_VLPatchMerger同级,对它们的子层进行映射
304
+ layernorm: ln_q
305
+ encoder: mlp
306
+ encoder.linear_fc1: mlp.0
307
+ encoder.linear_fc2: mlp.2
308
+ ```
309
+ - 结构6
310
+
311
+ ![6.png](./mindspeed_llamafactoary_img/6.png)
312
+
313
+ ```yaml
314
+ MMGPTModel: # MindSpeed的MMGPTModel与LLamaFactory的Qwen2_5_VLModel同级,对它们的子层进行映射
315
+ embedding.word_embeddings: embed_tokens
316
+ rotary_pos_emb: rotary_emb
317
+ decoder.layers: layers
318
+ decoder.final_layernorm: norm
319
+ output_layer: lm_head
320
+ ```
321
+ - 结构7
322
+
323
+ ![7.png](./mindspeed_llamafactoary_img/7.png)
324
+
325
+ 由于TransformerLayer和MLP层已经配置过,无法再重复配置,此处的节点映射可通过[手动选择节点匹配](#23-手动选择节点匹配)完成。
326
+
327
+ ### 2.3 手动选择节点匹配
328
+ 如果通过layer_mapping映射配置后,还有节点未匹配上,可通过浏览器界面,使用鼠标选择两个待匹配的灰色节点进行匹配。
329
+
330
+ 请参考[分级可视化构图比对-手动选择节点匹配](../21.visualization_PyTorch.md#56-手动选择节点匹配)。
@@ -25,4 +25,4 @@ except ImportError:
25
25
  from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger
26
26
  from msprobe.mindspore.common.utils import seed_all, MsprobeStep, MsprobeInitStep
27
27
  from msprobe.mindspore.monitor.module_hook import TrainerMon
28
- from msprobe.mindspore.dump.graph_tensor_dump import save, save_grad
28
+ from msprobe.mindspore.dump.graph_tensor_dump import save, save_grad, step
@@ -17,7 +17,7 @@ import os
17
17
  from dataclasses import dataclass
18
18
  from typing import Any, Optional
19
19
  from tqdm import tqdm
20
- import numpy as np
20
+
21
21
  from msprobe.core.common.const import Const, CompareConst
22
22
  from msprobe.core.common.file_utils import FileOpen, create_directory, write_csv, load_json, load_yaml
23
23
  from msprobe.core.common.utils import add_time_as_suffix
@@ -152,18 +152,21 @@ class ApiRunner:
152
152
  """
153
153
  api_name_list = api_name_str.split(Const.SEP)
154
154
  if len(api_name_list) != 3:
155
- err_msg = f"ApiRunner.get_info_from_name failed: api_name_str: {api_name_str} is not in defined format"
156
- logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
155
+ err_msg = f"ApiRunner.get_info_from_name failed: api_name_str: {api_name_str} is not in defined format." \
156
+ f" Exception has been raised and will be captured/logged externally."
157
+ logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
157
158
  api_type_str, api_sub_name = api_name_list[0], api_name_list[1]
158
159
  if api_type_str not in [MsCompareConst.MINT, MsCompareConst.MINT_FUNCTIONAL, MsCompareConst.TENSOR_API,
159
160
  MsCompareConst.FUNCTIONAL_API] \
160
161
  and api_platform == Const.MS_FRAMEWORK:
161
- err_msg = f"ApiRunner.get_info_from_name failed: not mint, mint.nn.functional or Tensor api"
162
- logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
162
+ err_msg = f"ApiRunner.get_info_from_name failed: not mint, mint.nn.functional or Tensor api," \
163
+ f" api_name={api_name_str}. Exception has been raised and will be captured/logged externally."
164
+ logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
163
165
 
164
166
  if api_type_str not in MsCompareConst.MT_VALID_API_TYPES and api_platform == Const.MT_FRAMEWORK:
165
- err_msg = f"ApiRunner.get_info_from_name failed: not torch, functional or Tensor api"
166
- logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
167
+ err_msg = f"ApiRunner.get_info_from_name failed: not torch, functional or Tensor api," \
168
+ f" api_name={api_name_str}. Exception has been raised and will be captured/logged externally."
169
+ logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
167
170
  return api_type_str, api_sub_name
168
171
 
169
172
  @staticmethod
@@ -67,8 +67,9 @@ class ComputeElement:
67
67
  elif compute_element_info is None:
68
68
  self._init_from_null_compute_element_info()
69
69
  else:
70
- logger.error_log_with_exp(
71
- "ComputeElement.__init__ failed: not init with parameter or compute_element info is not (list, dict)",
70
+ logger.warning_log_with_exp(
71
+ "ComputeElement.__init__ failed: not init with parameter or compute_element info is not (list, dict)."
72
+ " Exception has been raised and will be captured/logged externally.",
72
73
  ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
73
74
 
74
75
  @staticmethod
@@ -82,8 +83,9 @@ class ComputeElement:
82
83
  ms_dtype = ms_tensor.dtype
83
84
  dtype_str = ms_dtype_to_dtype_str.get(ms_dtype)
84
85
  if dtype_str not in dtype_str_to_torch_dtype:
85
- err_msg = f"ComputeElement.transfer_to_torch_tensor failed: no matching torch dtype for {dtype_str}"
86
- logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
86
+ err_msg = f"ComputeElement.transfer_to_torch_tensor failed: no matching torch dtype" \
87
+ f" for {dtype_str}. Exception has been raised and will be captured/logged externally."
88
+ logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
87
89
  else:
88
90
  torch_dtype = dtype_str_to_torch_dtype.get(dtype_str)
89
91
 
@@ -109,8 +111,9 @@ class ComputeElement:
109
111
  dtype_str = ms_dtype_to_dtype_str.get(ms_dtype)
110
112
 
111
113
  if dtype_str not in dtype_str_to_mindtorch_dtype:
112
- err_msg = f"ComputeElement.transfer_to_mindtorch_tensor failed: no matching mindtorch dtype for {dtype_str}"
113
- logger.error_log_with_exp(err_msg,
114
+ err_msg = f"ComputeElement.transfer_to_mindtorch_tensor failed: no matching mindtorch dtype for" \
115
+ f" {dtype_str}. Exception has been raised and will be captured/logged externally."
116
+ logger.warning_log_with_exp(err_msg,
114
117
  ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
115
118
  else:
116
119
  mindtorch_dtype = dtype_str_to_mindtorch_dtype.get(dtype_str)
@@ -139,8 +142,9 @@ class ComputeElement:
139
142
  dtype_str = torch_dtype_to_dtype_str.get(torch_dtype)
140
143
  if dtype_str not in dtype_str_to_ms_dtype:
141
144
  err_msg = \
142
- f"ComputeElement._transfer_to_mindspore_tensor failed: no matching mindspore dtype for {dtype_str}"
143
- logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
145
+ f"ComputeElement._transfer_to_mindspore_tensor failed: no matching mindspore dtype for {dtype_str}. " \
146
+ f"Exception has been raised and will be captured/logged externally."
147
+ logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
144
148
  else:
145
149
  ms_dtype = dtype_str_to_ms_dtype.get(dtype_str)
146
150
 
@@ -198,8 +202,9 @@ class ComputeElement:
198
202
  parameter_tmp = mindspore.Tensor(ndarray, dtype=ms_dtype)
199
203
  else:
200
204
  err_msg = "ComputeElement.get_parameter failed: self.parameter type is not in " \
201
- "(int, float, str, slice, bool, torch.Tensor, mindspore.Tensor, MstensorMetaData)"
202
- logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
205
+ "(int, float, str, slice, bool, torch.Tensor, mindspore.Tensor, MstensorMetaData)." \
206
+ "Exception has been raised and will be captured/logged externally."
207
+ logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
203
208
 
204
209
  # if necessary, do transfer
205
210
  if not get_origin and isinstance(parameter_tmp, mindspore.Tensor) and tensor_platform == Const.PT_FRAMEWORK:
@@ -296,8 +301,9 @@ class ComputeElement:
296
301
  self.shape = tuple()
297
302
  if not isinstance(parameter, self.supported_parameter_type):
298
303
  err_msg = "ComputeElement._init_with_parameter failed: " \
299
- "parameter type is not in (int, float, str, slice, bool, torch.Tensor, mindspore.Tensor)"
300
- logger.error_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
304
+ "parameter type is not in (int, float, str, slice, bool, torch.Tensor, mindspore.Tensor)." \
305
+ "Exception has been raised and will be captured/logged externally."
306
+ logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
301
307
  if isinstance(parameter, mindspore.Tensor):
302
308
  self.shape = tuple(parameter.shape)
303
309
  self.dtype_str = ms_dtype_to_dtype_str.get(parameter.dtype)