mindstudio-probe 8.1.2__py3-none-any.whl → 8.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/METADATA +2 -2
- {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/RECORD +172 -147
- msprobe/README.md +6 -6
- msprobe/core/common/const.py +98 -41
- msprobe/core/common/db_manager.py +256 -0
- msprobe/core/common/file_utils.py +28 -5
- msprobe/core/common/log.py +7 -0
- msprobe/core/common/megatron_utils.py +59 -0
- msprobe/core/common/parallel_state.py +193 -0
- msprobe/core/common/utils.py +20 -13
- msprobe/core/common_config.py +5 -0
- msprobe/core/compare/acc_compare.py +140 -93
- msprobe/core/compare/check.py +13 -0
- msprobe/core/compare/compare_cli.py +64 -6
- msprobe/core/compare/config.py +10 -8
- msprobe/core/compare/diff_analyze/diff_analyze_threshold.yaml +14 -0
- msprobe/core/compare/diff_analyze/first_diff_analyze.py +135 -0
- msprobe/core/compare/diff_analyze/ignore_op_list.yaml +3 -0
- msprobe/core/compare/find_first/__init__.py +0 -0
- msprobe/core/compare/find_first/analyzer.py +282 -0
- msprobe/core/compare/find_first/data_processor.py +35 -0
- msprobe/core/compare/find_first/graph.py +188 -0
- msprobe/core/compare/find_first/utils.py +189 -0
- msprobe/core/compare/highlight.py +74 -101
- msprobe/core/compare/layer_mapping/layer_mapping.py +14 -9
- msprobe/core/compare/merge_result/merge_result.py +2 -2
- msprobe/core/compare/multiprocessing_compute.py +45 -28
- msprobe/core/compare/npy_compare.py +7 -10
- msprobe/core/compare/utils.py +338 -130
- msprobe/core/config_check/checkers/dataset_checker.py +2 -1
- msprobe/core/config_check/checkers/env_args_checker.py +5 -5
- msprobe/core/config_check/checkers/hyperparameter_checker.py +30 -10
- msprobe/core/config_check/checkers/pip_checker.py +4 -3
- msprobe/core/config_check/checkers/random_checker.py +3 -3
- msprobe/core/config_check/checkers/weights_checker.py +2 -1
- msprobe/core/config_check/ckpt_compare/megatron_loader.py +2 -0
- msprobe/core/config_check/resource/hyperparameter.yaml +11 -1
- msprobe/core/config_check/utils/hyperparameter_parser.py +7 -3
- msprobe/core/config_check/utils/utils.py +10 -0
- msprobe/core/data_dump/api_registry.py +49 -30
- msprobe/core/data_dump/data_collector.py +71 -29
- msprobe/core/data_dump/data_processor/base.py +2 -0
- msprobe/core/data_dump/data_processor/mindspore_processor.py +47 -53
- msprobe/core/data_dump/data_processor/pytorch_processor.py +227 -93
- msprobe/core/data_dump/json_writer.py +81 -7
- msprobe/core/data_dump/scope.py +4 -6
- msprobe/core/hook_manager.py +129 -70
- msprobe/core/monitor/csv2db.py +361 -0
- msprobe/core/monitor/db_utils.py +278 -0
- msprobe/core/monitor/utils.py +35 -1
- msprobe/core/service.py +31 -39
- msprobe/core/single_save/single_comparator.py +16 -3
- msprobe/docs/01.installation.md +51 -19
- msprobe/docs/02.config_introduction.md +16 -20
- msprobe/docs/03.config_examples.md +26 -0
- msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
- msprobe/docs/05.data_dump_PyTorch.md +6 -2
- msprobe/docs/06.data_dump_MindSpore.md +44 -7
- msprobe/docs/07.accuracy_checker_PyTorch.md +1 -1
- msprobe/docs/10.accuracy_compare_PyTorch.md +124 -44
- msprobe/docs/11.accuracy_compare_MindSpore.md +75 -7
- msprobe/docs/14.data_parse_PyTorch.md +1 -1
- msprobe/docs/19.monitor.md +94 -7
- msprobe/docs/21.visualization_PyTorch.md +71 -101
- msprobe/docs/22.visualization_MindSpore.md +69 -119
- msprobe/docs/23.generate_operator_PyTorch.md +1 -1
- msprobe/docs/25.tool_function_introduction.md +0 -1
- msprobe/docs/26.data_dump_PyTorch_baseline.md +7 -7
- msprobe/docs/28.debugger_save_instruction.md +184 -81
- msprobe/docs/29.data_dump_MSAdapter.md +6 -0
- msprobe/docs/31.config_check.md +4 -2
- msprobe/docs/36.calculation_result_change.md +75 -0
- msprobe/docs/FAQ.md +22 -1
- msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +6 -2
- msprobe/docs/img/compare_result.png +0 -0
- msprobe/docs/img/visualization/vis_browser_1.png +0 -0
- msprobe/docs/img/visualization/vis_match_info.png +0 -0
- msprobe/docs/img/visualization/vis_precision_info.png +0 -0
- msprobe/docs/img/visualization/vis_search_info.png +0 -0
- msprobe/docs/img/visualization/vis_show_info.png +0 -0
- msprobe/docs/img/visualization/vis_showcase.png +0 -0
- msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/1.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/2.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/3.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/4.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/5.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/6.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/7.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt +59 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory1.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/llamafactory2.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt +80 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed1.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactoary_img/mindspeed2.png +0 -0
- msprobe/docs/visualization/mindspeed_llamafactory_mapping.md +330 -0
- msprobe/mindspore/__init__.py +1 -1
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +1 -1
- msprobe/mindspore/api_accuracy_checker/api_runner.py +9 -6
- msprobe/mindspore/api_accuracy_checker/compute_element.py +18 -12
- msprobe/mindspore/cell_processor.py +64 -25
- msprobe/mindspore/common/utils.py +51 -7
- msprobe/mindspore/compare/common_dir_compare.py +45 -37
- msprobe/mindspore/compare/ms_compare.py +10 -2
- msprobe/mindspore/compare/ms_graph_compare.py +47 -52
- msprobe/mindspore/debugger/debugger_config.py +18 -7
- msprobe/mindspore/debugger/precision_debugger.py +16 -12
- msprobe/mindspore/dump/cell_dump_process.py +130 -68
- msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +10 -2
- msprobe/mindspore/dump/graph_mode_cell_dump.py +35 -9
- msprobe/mindspore/dump/graph_tensor_dump.py +11 -0
- msprobe/mindspore/dump/hook_cell/api_register.py +19 -20
- msprobe/mindspore/dump/hook_cell/hook_cell.py +12 -34
- msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +142 -21
- msprobe/mindspore/dump/kernel_kbyk_dump.py +24 -0
- msprobe/mindspore/exception_dump/__init__.py +0 -0
- msprobe/mindspore/exception_dump/exception_dump_tool_factory.py +51 -0
- msprobe/mindspore/exception_dump/kernel_graph_exception_dump.py +57 -0
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +5 -4
- msprobe/mindspore/mindspore_service.py +2 -2
- msprobe/mindspore/mindtorch/mindtorch_adaptor.py +12 -7
- msprobe/mindspore/monitor/features.py +82 -0
- msprobe/mindspore/monitor/module_hook.py +168 -10
- msprobe/mindspore/monitor/utils.py +27 -1
- msprobe/mindspore/ms_config.py +12 -4
- msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +1 -1
- msprobe/mindspore/task_handler_factory.py +3 -1
- msprobe/nan_analyze/graph.py +1 -1
- msprobe/pytorch/api_accuracy_checker/common/config.py +3 -36
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +0 -24
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +2 -12
- msprobe/pytorch/api_accuracy_checker/config.yaml +1 -6
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +2 -2
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +12 -132
- msprobe/pytorch/common/utils.py +1 -21
- msprobe/pytorch/compare/pt_compare.py +10 -2
- msprobe/pytorch/{hook_module/jit_script_wrapper.py → compare/pt_diff_analyze.py} +3 -15
- msprobe/pytorch/compare/utils.py +2 -1
- msprobe/pytorch/debugger/debugger_config.py +18 -23
- msprobe/pytorch/dump/module_dump/hook_wrapper.py +10 -7
- msprobe/pytorch/dump/module_dump/module_processer.py +41 -19
- msprobe/pytorch/free_benchmark/main.py +7 -4
- msprobe/pytorch/hook_module/api_register.py +62 -24
- msprobe/pytorch/hook_module/hook_module.py +9 -29
- msprobe/pytorch/hook_module/pt_hook_manager.py +84 -15
- msprobe/pytorch/hook_module/script_wrapper.py +140 -0
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +6 -0
- msprobe/pytorch/monitor/csv2tb.py +1 -1
- msprobe/pytorch/monitor/features.py +94 -0
- msprobe/pytorch/monitor/module_hook.py +221 -81
- msprobe/pytorch/monitor/module_metric.py +27 -1
- msprobe/pytorch/monitor/optimizer_collect.py +109 -4
- msprobe/pytorch/online_dispatch/dispatch.py +42 -24
- msprobe/pytorch/online_dispatch/dump_compare.py +1 -1
- msprobe/pytorch/parse_tool/lib/visualization.py +0 -1
- msprobe/pytorch/pt_config.py +2 -51
- msprobe/pytorch/pytorch_service.py +7 -14
- msprobe/visualization/builder/graph_builder.py +192 -63
- msprobe/visualization/builder/graph_merger.py +986 -0
- msprobe/visualization/builder/msprobe_adapter.py +17 -15
- msprobe/visualization/compare/graph_comparator.py +26 -16
- msprobe/visualization/db_utils.py +252 -0
- msprobe/visualization/graph/base_node.py +2 -22
- msprobe/visualization/graph/distributed_analyzer.py +12 -12
- msprobe/visualization/graph/graph.py +44 -16
- msprobe/visualization/graph_service.py +143 -59
- msprobe/visualization/utils.py +103 -4
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +0 -295
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +0 -205
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +0 -378
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +0 -239
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +0 -115
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +0 -250
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +0 -63
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +0 -198
- msprobe/pytorch/attl_manager.py +0 -65
- {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/LICENSE +0 -0
- {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/WHEEL +0 -0
- {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/entry_points.txt +0 -0
- {mindstudio_probe-8.1.2.dist-info → mindstudio_probe-8.2.1.dist-info}/top_level.txt +0 -0
- /msprobe/{pytorch/api_accuracy_checker/tensor_transport_layer → core/compare/diff_analyze}/__init__.py +0 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
[DistributedDataParallel(
|
|
2
|
+
(module): Float16Module(
|
|
3
|
+
(module): VLMModel(
|
|
4
|
+
(image_encoder): VisionModel(
|
|
5
|
+
(encoder): Qwen2VLViT(
|
|
6
|
+
(patch_embed): PatchEmbed(
|
|
7
|
+
(proj): Conv3d(3, 1280, kernel_size=(2, 14, 14), stride=(2, 14, 14), bias=False)
|
|
8
|
+
)
|
|
9
|
+
(rotary_pos_emb): VisionRotaryEmbedding()
|
|
10
|
+
(blocks): Qwen2VLVisionTransformerBlock(
|
|
11
|
+
(layers): ModuleList(
|
|
12
|
+
(0-15): 16 x TransformerLayer(
|
|
13
|
+
(input_layernorm): RMSNorm()
|
|
14
|
+
(self_attention): Qwen2vlVitSelfAttention(
|
|
15
|
+
(core_attention): DotProductAttention(
|
|
16
|
+
(scale_mask_softmax): FusedScaleMaskSoftmax()
|
|
17
|
+
(attention_dropout): Dropout(p=0.0, inplace=False)
|
|
18
|
+
)
|
|
19
|
+
(linear_proj): RowParallelLinear()
|
|
20
|
+
(linear_qkv): ColumnParallelLinear()
|
|
21
|
+
(q_layernorm): IdentityOp()
|
|
22
|
+
(k_layernorm): IdentityOp()
|
|
23
|
+
)
|
|
24
|
+
(pre_cross_attn_layernorm): IdentityOp()
|
|
25
|
+
(cross_attention): IdentityOp()
|
|
26
|
+
(cross_attn_bda): IdentityFuncOp()
|
|
27
|
+
(pre_mlp_layernorm): RMSNorm()
|
|
28
|
+
(mlp): MLP(
|
|
29
|
+
(linear_fc1): ColumnParallelLinear()
|
|
30
|
+
(linear_fc2): RowParallelLinear()
|
|
31
|
+
)
|
|
32
|
+
)
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
)
|
|
36
|
+
(projector): MultimodalProjector(
|
|
37
|
+
(layernorm): RMSNorm()
|
|
38
|
+
(encoder): MLP(
|
|
39
|
+
(linear_fc1): ColumnParallelLinear()
|
|
40
|
+
(linear_fc2): RowParallelLinear()
|
|
41
|
+
)
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
(text_decoder): MMGPTModel(
|
|
45
|
+
(embedding): LanguageModelEmbedding(
|
|
46
|
+
(word_embeddings): VocabParallelEmbedding()
|
|
47
|
+
(embedding_dropout): Dropout(p=0.0, inplace=False)
|
|
48
|
+
)
|
|
49
|
+
(rotary_pos_emb): Qwen2VLRotaryEmbedding_llm()
|
|
50
|
+
(decoder): TransformerBlock(
|
|
51
|
+
(layers): ModuleList(
|
|
52
|
+
(0-7): 8 x TransformerLayer(
|
|
53
|
+
(input_layernorm): RMSNorm()
|
|
54
|
+
(self_attention): Qwen2vlSelfAttention(
|
|
55
|
+
(core_attention): DotProductAttention(
|
|
56
|
+
(scale_mask_softmax): FusedScaleMaskSoftmax()
|
|
57
|
+
(attention_dropout): Dropout(p=0.0, inplace=False)
|
|
58
|
+
)
|
|
59
|
+
(linear_proj): RowParallelLinear()
|
|
60
|
+
(linear_qkv): ColumnParallelLinear()
|
|
61
|
+
(q_layernorm): IdentityOp()
|
|
62
|
+
(k_layernorm): IdentityOp()
|
|
63
|
+
)
|
|
64
|
+
(pre_cross_attn_layernorm): IdentityOp()
|
|
65
|
+
(cross_attention): IdentityOp()
|
|
66
|
+
(cross_attn_bda): IdentityFuncOp()
|
|
67
|
+
(pre_mlp_layernorm): RMSNorm()
|
|
68
|
+
(mlp): MLP(
|
|
69
|
+
(linear_fc1): ColumnParallelLinear()
|
|
70
|
+
(linear_fc2): RowParallelLinear()
|
|
71
|
+
)
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
(final_layernorm): RMSNorm()
|
|
75
|
+
)
|
|
76
|
+
(output_layer): ColumnParallelLinear()
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
)]
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
# MindSpeed&LLamaFactory数据采集和自动比对
|
|
2
|
+
|
|
3
|
+
## 0. 使用场景
|
|
4
|
+
基于MindSpeed和LLamaFactory框架实现的同一模型,在模型超参、环境变量、初始权重、训练数据等一致的前提下,训练过程中出现了精度差异,需要进行**整网比对**寻找精度差异点。
|
|
5
|
+
|
|
6
|
+
本文选取Qwen2.5vl和Qwen2.5模型,指导用户如何进行MindSpeed&LLamaFactory数据采集和自动比对。
|
|
7
|
+
|
|
8
|
+
## 1. 数据采集
|
|
9
|
+
|
|
10
|
+
### 1.1 准备数据采集配置文件
|
|
11
|
+
|
|
12
|
+
数据采集前需要准备一个json文件,本案例命名为config.json,其内容包含了数据采集的所需配置。
|
|
13
|
+
|
|
14
|
+
本案例使用的配置内容如下,更多配置请参考[config.json 配置示例](../03.config_examples.md),配置详细介绍请参考[配置文件介绍](../02.config_introduction.md)。
|
|
15
|
+
|
|
16
|
+
```json
|
|
17
|
+
{
|
|
18
|
+
"task": "statistics",
|
|
19
|
+
"dump_path": "/home/data_dump",
|
|
20
|
+
"rank": [],
|
|
21
|
+
"step": [0],
|
|
22
|
+
"level": "mix",
|
|
23
|
+
"async_dump": false,
|
|
24
|
+
|
|
25
|
+
"statistics": {
|
|
26
|
+
"scope": [],
|
|
27
|
+
"list": [],
|
|
28
|
+
"tensor_list": [],
|
|
29
|
+
"data_mode": ["all"],
|
|
30
|
+
"summary_mode": "statistics"
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
```
|
|
34
|
+
请注意,在数据采集结束后将进行模型分级可视化比对,配置文件中的`level`需要配置为`L0`(模块数据)或`mix`(模块+API数据)。
|
|
35
|
+
|
|
36
|
+
### 1.2 添加msprobe工具采集接口
|
|
37
|
+
|
|
38
|
+
本案例使用的工具采集接口配置如下,更多配置和接口介绍请参考[PyTorch 场景的精度数据采集](../05.data_dump_PyTorch.md)。
|
|
39
|
+
|
|
40
|
+
#### 1.2.1 LLamaFactory数据采集
|
|
41
|
+
|
|
42
|
+
LLamaFactory依赖Transformers的底层能力,msprobe工具采集功能将添加在Transformers中。
|
|
43
|
+
|
|
44
|
+
以Transformers 4.49.0版本为例,通过`pip3 show Transformers`获取`Location路径`,打开`Location路径/transformers/trainer.py`文件。
|
|
45
|
+
|
|
46
|
+
1. 在trainer.py文件中添加工具接口,初始化数据采集配置以及固定随机数:
|
|
47
|
+
|
|
48
|
+

|
|
49
|
+
|
|
50
|
+
2. 在trainer.py文件**训练循环逻辑位置**添加工具接口,控制数据采集的启动、停止和step计数:
|
|
51
|
+
|
|
52
|
+

|
|
53
|
+
|
|
54
|
+
3. 配置完成,启动模型训练脚本,数据将自动采集,落盘数据格式请参考[PyTorch 场景的精度数据采集-dump-结果文件介绍](../05.data_dump_PyTorch.md#3-dump-结果文件介绍)。
|
|
55
|
+
|
|
56
|
+
#### 1.2.2 MindSpeed数据采集
|
|
57
|
+
|
|
58
|
+
打开training.py文件,MindSpeed-MM路径为`mindspeed_mm/training.py`,MindSpeed-LLM路径为`mindspeed_llm/training/training.py`。
|
|
59
|
+
|
|
60
|
+
1. 在training.py文件中添加工具接口,初始化数据采集配置以及固定随机数:
|
|
61
|
+
|
|
62
|
+

|
|
63
|
+
|
|
64
|
+
2. 在training.py文件**训练循环逻辑位置**添加工具接口,控制数据采集的启动、停止和step计数:
|
|
65
|
+
|
|
66
|
+

|
|
67
|
+
|
|
68
|
+
3. 配置完成,启动模型训练脚本,数据将自动采集,落盘数据格式请参考[PyTorch 场景的精度数据采集-dump-结果文件介绍](../05.data_dump_PyTorch.md#3-dump-结果文件介绍)。
|
|
69
|
+
|
|
70
|
+
## 2. 自动比对
|
|
71
|
+
|
|
72
|
+
### 2.1 模型分级可视化比对
|
|
73
|
+
|
|
74
|
+
该功能将msprobe工具dump的精度数据进行解析,还原模型图结构,实现模型各个层级的精度数据比对,方便用户理解模型结构、分析精度问题。
|
|
75
|
+
|
|
76
|
+
我们将使用以下命令行进行模型分级可视化比对:
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
msprobe -f pytorch graph -i ./compare.json -o ./output -lm ./layer_mapping.yaml
|
|
80
|
+
```
|
|
81
|
+
具体的参数说明请点击查看[分级可视化构图比对-构图命令行说明](../21.visualization_PyTorch.md#31-构图命令行说明)。
|
|
82
|
+
|
|
83
|
+
在基于MindSpeed和LLamaFactory框架的模型比对场景中,**-lm参数是必填的**,-lm参数所需的layer_mapping.yaml如何配置将在下面的章节进行介绍。
|
|
84
|
+
|
|
85
|
+
模型分级可视化比对完成后,可通过tensorboard(需安装[tb_graph_ascend插件](../21.visualization_PyTorch.md#1依赖安装))启动端口,在浏览器页面查看模型结构和精度比对结果,请参考[分级可视化构图比对-启动tensorboard](../21.visualization_PyTorch.md#4启动tensorboard)和[分级可视化构图比对-浏览器查看](../21.visualization_PyTorch.md#5浏览器查看)。
|
|
86
|
+
|
|
87
|
+
### 2.2 layer_mapping映射文件配置
|
|
88
|
+
msprobe工具的比对功能会将比对双方dump名称一致的数据进行比对。由于MindSpeed和LLamaFactory框架代码实现的差异,一些模型层级和层级名称有所不同导致无法进行匹配,需要进行layer层名称映射,才能够比对。
|
|
89
|
+
|
|
90
|
+
#### 2.2.1 layer_mapping映射文件模板
|
|
91
|
+
|
|
92
|
+
此处提供了Qwen2.5vl和Qwen2.5模型的layer_mapping映射文件模板,可直接使用。**如果您使用其他模型,或对MindSpeed和LLamaFactory框架进行过定制开发修改过框架源码,此layer_mapping映射文件模板可能会失效,请按照后续步骤修改layer_mapping映射文件模板**。
|
|
93
|
+
|
|
94
|
+
每个模型有两个layer_mapping映射文件模板,分别是NPU侧为Mindspeed Bench侧为LLamaFactory,以及NPU侧为LLamaFactory Bench侧为Mindspeed,映射内容有所不同。
|
|
95
|
+
|
|
96
|
+
文件名格式:\*.yaml,*为文件名,可自定义。本文命名为layer_mapping.yaml。
|
|
97
|
+
|
|
98
|
+
**Qwen2.5vl**
|
|
99
|
+
|
|
100
|
+
```yaml
|
|
101
|
+
# NPU侧为Mindspeed-MM, Bench侧为LLamaFactory
|
|
102
|
+
TopLayer:
|
|
103
|
+
0.module: module
|
|
104
|
+
|
|
105
|
+
Float16Module:
|
|
106
|
+
module.image_encoder: visual
|
|
107
|
+
module.text_decoder: model
|
|
108
|
+
|
|
109
|
+
VisionModel:
|
|
110
|
+
encoder.patch_embed: patch_embed
|
|
111
|
+
encoder.rotary_pos_emb: rotary_pos_emb
|
|
112
|
+
encoder.blocks.layers: blocks
|
|
113
|
+
projector: merger
|
|
114
|
+
|
|
115
|
+
TransformerLayer:
|
|
116
|
+
input_layernorm: norm1
|
|
117
|
+
self_attention: attn
|
|
118
|
+
pre_mlp_layernorm: norm2
|
|
119
|
+
|
|
120
|
+
Qwen2vlVitSelfAttention:
|
|
121
|
+
linear_qkv: qkv
|
|
122
|
+
linear_proj: proj
|
|
123
|
+
|
|
124
|
+
MLP:
|
|
125
|
+
linear_fc1: up_proj
|
|
126
|
+
linear_fc2: down_proj
|
|
127
|
+
|
|
128
|
+
MultimodalProjector:
|
|
129
|
+
layernorm: ln_q
|
|
130
|
+
encoder: mlp
|
|
131
|
+
encoder.linear_fc1: mlp.0
|
|
132
|
+
encoder.linear_fc2: mlp.2
|
|
133
|
+
|
|
134
|
+
MMGPTModel:
|
|
135
|
+
embedding.word_embeddings: embed_tokens
|
|
136
|
+
rotary_pos_emb: rotary_emb
|
|
137
|
+
decoder.layers: layers
|
|
138
|
+
decoder.final_layernorm: norm
|
|
139
|
+
output_layer: lm_head
|
|
140
|
+
```
|
|
141
|
+
```yaml
|
|
142
|
+
# NPU侧为LLamaFactory, Bench侧为Mindspeed-MM
|
|
143
|
+
TopLayer:
|
|
144
|
+
module: 0.module
|
|
145
|
+
|
|
146
|
+
Qwen2_5_VLForConditionalGeneration:
|
|
147
|
+
visual: module.image_encoder
|
|
148
|
+
model: module.text_decoder
|
|
149
|
+
lm_head: module.text_decoder.output_layer
|
|
150
|
+
|
|
151
|
+
Qwen2_5_VisionTransformerPretrainedModel:
|
|
152
|
+
patch_embed: encoder.patch_embed
|
|
153
|
+
rotary_pos_emb: encoder.rotary_pos_emb
|
|
154
|
+
blocks: encoder.blocks.layers
|
|
155
|
+
merger: projector
|
|
156
|
+
|
|
157
|
+
Qwen2_5_VLVisionBlock:
|
|
158
|
+
norm1: input_layernorm
|
|
159
|
+
attn: self_attention
|
|
160
|
+
norm2: pre_mlp_layernorm
|
|
161
|
+
|
|
162
|
+
Qwen2_5_VLVisionSdpaAttention:
|
|
163
|
+
qkv: linear_qkv
|
|
164
|
+
proj: linear_proj
|
|
165
|
+
|
|
166
|
+
Qwen2_5_VLMLP:
|
|
167
|
+
up_proj: linear_fc1
|
|
168
|
+
down_proj: linear_fc2
|
|
169
|
+
|
|
170
|
+
Qwen2_5_VLPatchMerger:
|
|
171
|
+
ln_q: layernorm
|
|
172
|
+
mlp: encoder
|
|
173
|
+
mlp.0: encoder.linear_fc1
|
|
174
|
+
mlp.2: encoder.linear_fc2
|
|
175
|
+
|
|
176
|
+
Qwen2_5_VLModel:
|
|
177
|
+
embed_tokens: embedding.word_embeddings
|
|
178
|
+
rotary_emb: rotary_pos_emb
|
|
179
|
+
layers: decoder.layers
|
|
180
|
+
norm: decoder.final_layernorm
|
|
181
|
+
|
|
182
|
+
Qwen2_5_VLDecoderLayer:
|
|
183
|
+
self_attn: self_attention
|
|
184
|
+
self_attn.o_proj: self_attention.linear_proj
|
|
185
|
+
post_attention_layernorm: pre_mlp_layernorm
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Qwen2.5**
|
|
189
|
+
|
|
190
|
+
```yaml
|
|
191
|
+
# NPU侧为Mindspeed-LLM, Bench侧为LLamaFactory
|
|
192
|
+
TopLayer:
|
|
193
|
+
0.module: module
|
|
194
|
+
|
|
195
|
+
Float16Module:
|
|
196
|
+
module: model
|
|
197
|
+
module.output_layer: lm_head
|
|
198
|
+
|
|
199
|
+
GPTModel:
|
|
200
|
+
embedding.word_embeddings: embed_tokens
|
|
201
|
+
decoder.layers: layers
|
|
202
|
+
decoder.final_layernorm: norm
|
|
203
|
+
|
|
204
|
+
TransformerLayer:
|
|
205
|
+
self_attention: self_attn
|
|
206
|
+
pre_mlp_layernorm: post_attention_layernorm
|
|
207
|
+
|
|
208
|
+
SelfAttention:
|
|
209
|
+
linear_proj: o_proj
|
|
210
|
+
|
|
211
|
+
MLP:
|
|
212
|
+
linear_fc1: up_proj
|
|
213
|
+
linear_fc2: down_proj
|
|
214
|
+
```
|
|
215
|
+
```yaml
|
|
216
|
+
# NPU侧为LLamaFactory, Bench侧为Mindspeed-LLM
|
|
217
|
+
TopLayer:
|
|
218
|
+
module: 0.module
|
|
219
|
+
|
|
220
|
+
Qwen2ForCausalLM:
|
|
221
|
+
model: module
|
|
222
|
+
lm_head: module.output_layer
|
|
223
|
+
|
|
224
|
+
Qwen2Model:
|
|
225
|
+
embed_tokens: embedding.word_embeddings
|
|
226
|
+
layers: decoder.layers
|
|
227
|
+
norm: decoder.final_layernorm
|
|
228
|
+
|
|
229
|
+
Qwen2DecoderLayer:
|
|
230
|
+
self_attn: self_attention
|
|
231
|
+
post_attention_layernorm: pre_mlp_layernorm
|
|
232
|
+
|
|
233
|
+
Qwen2Attention:
|
|
234
|
+
o_proj: linear_proj
|
|
235
|
+
|
|
236
|
+
Qwen2MLP:
|
|
237
|
+
up_proj: linear_fc1
|
|
238
|
+
down_proj: linear_fc2
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
#### 2.2.2 layer_mapping映射文件配置过程
|
|
242
|
+
以Qwen2.5vl模型,NPU侧MindSpeed,Bench侧LLamaFactory为例。
|
|
243
|
+
|
|
244
|
+
1. 模型结构打印
|
|
245
|
+
|
|
246
|
+
参考[添加msprobe工具采集接口](#12-添加msprobe工具采集接口)章节,配置过程中会在模型文件中添加`debugger.start(model=model)`,针对`start接口`中的`model`进行`print(model)`即可打印模型结构。
|
|
247
|
+
|
|
248
|
+
打印的模型结构:[mindspeed-mm-qwen25vl.txt](./mindspeed_llamafactoary_img/mindspeed-mm-qwen25vl.txt),[llamafactory-qwen25vl.txt](./mindspeed_llamafactoary_img/llamafactory-qwen25vl.txt)
|
|
249
|
+
|
|
250
|
+
2. 基于模型结构由外到内进行layer mapping配置
|
|
251
|
+
|
|
252
|
+
- 结构1
|
|
253
|
+
|
|
254
|
+

|
|
255
|
+
|
|
256
|
+
```yaml
|
|
257
|
+
TopLayer: # 代表模型最顶层
|
|
258
|
+
0.module: module # MindSpeed的model类型是list,msprobe采集会对其添加数字前缀,代表当前模型在list中的索引,因此要做0.module -> module的映射
|
|
259
|
+
|
|
260
|
+
Float16Module: # MindSpeed的Float16Module与LLamaFactory的Qwen2_5_VLForConditionalGeneration同级,对它们的子层进行映射
|
|
261
|
+
module.image_encoder: visual # MindSpeed的Float16Module多了一个子层module,跨层级用"."分隔,配置为module.image_encoder
|
|
262
|
+
module.text_decoder: model
|
|
263
|
+
```
|
|
264
|
+
- 结构2
|
|
265
|
+
|
|
266
|
+

|
|
267
|
+
|
|
268
|
+
```yaml
|
|
269
|
+
VisionModel: # MindSpeed的VisionModel与LLamaFactory的Qwen2_5_VisionPatchEmbed同级,对它们的子层进行映射
|
|
270
|
+
encoder.patch_embed: patch_embed
|
|
271
|
+
encoder.rotary_pos_emb: rotary_pos_emb
|
|
272
|
+
encoder.blocks.layers: blocks
|
|
273
|
+
projector: merger
|
|
274
|
+
```
|
|
275
|
+
- 结构3
|
|
276
|
+
|
|
277
|
+

|
|
278
|
+
|
|
279
|
+
```yaml
|
|
280
|
+
TransformerLayer: # MindSpeed的TransformerLayer与LLamaFactory的Qwen2_5_VLVisionBlock同级,对它们的子层进行映射
|
|
281
|
+
input_layernorm: norm1
|
|
282
|
+
self_attention: attn
|
|
283
|
+
pre_mlp_layernorm: norm2
|
|
284
|
+
```
|
|
285
|
+
- 结构4
|
|
286
|
+
|
|
287
|
+

|
|
288
|
+
|
|
289
|
+
```yaml
|
|
290
|
+
Qwen2vlVitSelfAttention: # MindSpeed的Qwen2vlVitSelfAttention与LLamaFactory的Qwen2_5_VLVisionSdpaAttention同级,对它们的子层进行映射
|
|
291
|
+
linear_qkv: qkv
|
|
292
|
+
linear_proj: proj
|
|
293
|
+
|
|
294
|
+
MLP: # MindSpeed的MLP与LLamaFactory的Qwen2_5_VLMLP同级,对它们的子层进行映射
|
|
295
|
+
linear_fc1: up_proj
|
|
296
|
+
linear_fc2: down_proj
|
|
297
|
+
```
|
|
298
|
+
- 结构5
|
|
299
|
+
|
|
300
|
+

|
|
301
|
+
|
|
302
|
+
```yaml
|
|
303
|
+
MultimodalProjector: # MindSpeed的MultimodalProjector与LLamaFactory的Qwen2_5_VLPatchMerger同级,对它们的子层进行映射
|
|
304
|
+
layernorm: ln_q
|
|
305
|
+
encoder: mlp
|
|
306
|
+
encoder.linear_fc1: mlp.0
|
|
307
|
+
encoder.linear_fc2: mlp.2
|
|
308
|
+
```
|
|
309
|
+
- 结构6
|
|
310
|
+
|
|
311
|
+

|
|
312
|
+
|
|
313
|
+
```yaml
|
|
314
|
+
MMGPTModel: # MindSpeed的MMGPTModel与LLamaFactory的Qwen2_5_VLModel同级,对它们的子层进行映射
|
|
315
|
+
embedding.word_embeddings: embed_tokens
|
|
316
|
+
rotary_pos_emb: rotary_emb
|
|
317
|
+
decoder.layers: layers
|
|
318
|
+
decoder.final_layernorm: norm
|
|
319
|
+
output_layer: lm_head
|
|
320
|
+
```
|
|
321
|
+
- 结构7
|
|
322
|
+
|
|
323
|
+

|
|
324
|
+
|
|
325
|
+
由于TransformerLayer和MLP层已经配置过,无法再重复配置,此处的节点映射可通过[手动选择节点匹配](#23-手动选择节点匹配)完成。
|
|
326
|
+
|
|
327
|
+
### 2.3 手动选择节点匹配
|
|
328
|
+
如果通过layer_mapping映射配置后,还有节点未匹配上,可通过浏览器界面,使用鼠标选择两个待匹配的灰色节点进行匹配。
|
|
329
|
+
|
|
330
|
+
请参考[分级可视化构图比对-手动选择节点匹配](../21.visualization_PyTorch.md#56-手动选择节点匹配)。
|
msprobe/mindspore/__init__.py
CHANGED
|
@@ -25,4 +25,4 @@ except ImportError:
|
|
|
25
25
|
from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger
|
|
26
26
|
from msprobe.mindspore.common.utils import seed_all, MsprobeStep, MsprobeInitStep
|
|
27
27
|
from msprobe.mindspore.monitor.module_hook import TrainerMon
|
|
28
|
-
from msprobe.mindspore.dump.graph_tensor_dump import save, save_grad
|
|
28
|
+
from msprobe.mindspore.dump.graph_tensor_dump import save, save_grad, step
|
|
@@ -17,7 +17,7 @@ import os
|
|
|
17
17
|
from dataclasses import dataclass
|
|
18
18
|
from typing import Any, Optional
|
|
19
19
|
from tqdm import tqdm
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
from msprobe.core.common.const import Const, CompareConst
|
|
22
22
|
from msprobe.core.common.file_utils import FileOpen, create_directory, write_csv, load_json, load_yaml
|
|
23
23
|
from msprobe.core.common.utils import add_time_as_suffix
|
|
@@ -152,18 +152,21 @@ class ApiRunner:
|
|
|
152
152
|
"""
|
|
153
153
|
api_name_list = api_name_str.split(Const.SEP)
|
|
154
154
|
if len(api_name_list) != 3:
|
|
155
|
-
err_msg = f"ApiRunner.get_info_from_name failed: api_name_str: {api_name_str} is not in defined format"
|
|
156
|
-
|
|
155
|
+
err_msg = f"ApiRunner.get_info_from_name failed: api_name_str: {api_name_str} is not in defined format." \
|
|
156
|
+
f" Exception has been raised and will be captured/logged externally."
|
|
157
|
+
logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
|
|
157
158
|
api_type_str, api_sub_name = api_name_list[0], api_name_list[1]
|
|
158
159
|
if api_type_str not in [MsCompareConst.MINT, MsCompareConst.MINT_FUNCTIONAL, MsCompareConst.TENSOR_API,
|
|
159
160
|
MsCompareConst.FUNCTIONAL_API] \
|
|
160
161
|
and api_platform == Const.MS_FRAMEWORK:
|
|
161
|
-
err_msg = f"ApiRunner.get_info_from_name failed: not mint, mint.nn.functional or Tensor api"
|
|
162
|
-
|
|
162
|
+
err_msg = f"ApiRunner.get_info_from_name failed: not mint, mint.nn.functional or Tensor api," \
|
|
163
|
+
f" api_name={api_name_str}. Exception has been raised and will be captured/logged externally."
|
|
164
|
+
logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
|
|
163
165
|
|
|
164
166
|
if api_type_str not in MsCompareConst.MT_VALID_API_TYPES and api_platform == Const.MT_FRAMEWORK:
|
|
165
|
-
err_msg = f"ApiRunner.get_info_from_name failed: not torch, functional or Tensor api"
|
|
166
|
-
|
|
167
|
+
err_msg = f"ApiRunner.get_info_from_name failed: not torch, functional or Tensor api," \
|
|
168
|
+
f" api_name={api_name_str}. Exception has been raised and will be captured/logged externally."
|
|
169
|
+
logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.WrongValue))
|
|
167
170
|
return api_type_str, api_sub_name
|
|
168
171
|
|
|
169
172
|
@staticmethod
|
|
@@ -67,8 +67,9 @@ class ComputeElement:
|
|
|
67
67
|
elif compute_element_info is None:
|
|
68
68
|
self._init_from_null_compute_element_info()
|
|
69
69
|
else:
|
|
70
|
-
logger.
|
|
71
|
-
"ComputeElement.__init__ failed: not init with parameter or compute_element info is not (list, dict)"
|
|
70
|
+
logger.warning_log_with_exp(
|
|
71
|
+
"ComputeElement.__init__ failed: not init with parameter or compute_element info is not (list, dict)."
|
|
72
|
+
" Exception has been raised and will be captured/logged externally.",
|
|
72
73
|
ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
|
|
73
74
|
|
|
74
75
|
@staticmethod
|
|
@@ -82,8 +83,9 @@ class ComputeElement:
|
|
|
82
83
|
ms_dtype = ms_tensor.dtype
|
|
83
84
|
dtype_str = ms_dtype_to_dtype_str.get(ms_dtype)
|
|
84
85
|
if dtype_str not in dtype_str_to_torch_dtype:
|
|
85
|
-
err_msg = f"ComputeElement.transfer_to_torch_tensor failed: no matching torch dtype
|
|
86
|
-
|
|
86
|
+
err_msg = f"ComputeElement.transfer_to_torch_tensor failed: no matching torch dtype" \
|
|
87
|
+
f" for {dtype_str}. Exception has been raised and will be captured/logged externally."
|
|
88
|
+
logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
|
|
87
89
|
else:
|
|
88
90
|
torch_dtype = dtype_str_to_torch_dtype.get(dtype_str)
|
|
89
91
|
|
|
@@ -109,8 +111,9 @@ class ComputeElement:
|
|
|
109
111
|
dtype_str = ms_dtype_to_dtype_str.get(ms_dtype)
|
|
110
112
|
|
|
111
113
|
if dtype_str not in dtype_str_to_mindtorch_dtype:
|
|
112
|
-
err_msg = f"ComputeElement.transfer_to_mindtorch_tensor failed: no matching mindtorch dtype for
|
|
113
|
-
|
|
114
|
+
err_msg = f"ComputeElement.transfer_to_mindtorch_tensor failed: no matching mindtorch dtype for" \
|
|
115
|
+
f" {dtype_str}. Exception has been raised and will be captured/logged externally."
|
|
116
|
+
logger.warning_log_with_exp(err_msg,
|
|
114
117
|
ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
|
|
115
118
|
else:
|
|
116
119
|
mindtorch_dtype = dtype_str_to_mindtorch_dtype.get(dtype_str)
|
|
@@ -139,8 +142,9 @@ class ComputeElement:
|
|
|
139
142
|
dtype_str = torch_dtype_to_dtype_str.get(torch_dtype)
|
|
140
143
|
if dtype_str not in dtype_str_to_ms_dtype:
|
|
141
144
|
err_msg = \
|
|
142
|
-
f"ComputeElement._transfer_to_mindspore_tensor failed: no matching mindspore dtype for {dtype_str}"
|
|
143
|
-
|
|
145
|
+
f"ComputeElement._transfer_to_mindspore_tensor failed: no matching mindspore dtype for {dtype_str}. " \
|
|
146
|
+
f"Exception has been raised and will be captured/logged externally."
|
|
147
|
+
logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
|
|
144
148
|
else:
|
|
145
149
|
ms_dtype = dtype_str_to_ms_dtype.get(dtype_str)
|
|
146
150
|
|
|
@@ -198,8 +202,9 @@ class ComputeElement:
|
|
|
198
202
|
parameter_tmp = mindspore.Tensor(ndarray, dtype=ms_dtype)
|
|
199
203
|
else:
|
|
200
204
|
err_msg = "ComputeElement.get_parameter failed: self.parameter type is not in " \
|
|
201
|
-
"(int, float, str, slice, bool, torch.Tensor, mindspore.Tensor, MstensorMetaData)"
|
|
202
|
-
|
|
205
|
+
"(int, float, str, slice, bool, torch.Tensor, mindspore.Tensor, MstensorMetaData)." \
|
|
206
|
+
"Exception has been raised and will be captured/logged externally."
|
|
207
|
+
logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
|
|
203
208
|
|
|
204
209
|
# if necessary, do transfer
|
|
205
210
|
if not get_origin and isinstance(parameter_tmp, mindspore.Tensor) and tensor_platform == Const.PT_FRAMEWORK:
|
|
@@ -296,8 +301,9 @@ class ComputeElement:
|
|
|
296
301
|
self.shape = tuple()
|
|
297
302
|
if not isinstance(parameter, self.supported_parameter_type):
|
|
298
303
|
err_msg = "ComputeElement._init_with_parameter failed: " \
|
|
299
|
-
"parameter type is not in (int, float, str, slice, bool, torch.Tensor, mindspore.Tensor)"
|
|
300
|
-
|
|
304
|
+
"parameter type is not in (int, float, str, slice, bool, torch.Tensor, mindspore.Tensor)." \
|
|
305
|
+
"Exception has been raised and will be captured/logged externally."
|
|
306
|
+
logger.warning_log_with_exp(err_msg, ApiAccuracyCheckerException(ApiAccuracyCheckerException.UnsupportType))
|
|
301
307
|
if isinstance(parameter, mindspore.Tensor):
|
|
302
308
|
self.shape = tuple(parameter.shape)
|
|
303
309
|
self.dtype_str = ms_dtype_to_dtype_str.get(parameter.dtype)
|