mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
- mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
- {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
- {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
- msprobe/README.md +39 -3
- msprobe/config.json +1 -3
- msprobe/core/advisor/advisor.py +8 -3
- msprobe/core/common/const.py +113 -13
- msprobe/core/common/exceptions.py +25 -3
- msprobe/core/common/file_utils.py +150 -26
- msprobe/core/common/inplace_op_checker.py +15 -0
- msprobe/core/common/log.py +27 -9
- msprobe/core/common/utils.py +182 -69
- msprobe/core/common_config.py +44 -15
- msprobe/core/compare/acc_compare.py +207 -142
- msprobe/core/compare/check.py +2 -5
- msprobe/core/compare/compare_cli.py +21 -4
- msprobe/core/compare/highlight.py +124 -55
- msprobe/core/compare/layer_mapping/__init__.py +19 -0
- msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
- msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
- msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
- msprobe/core/compare/npy_compare.py +52 -23
- msprobe/core/compare/utils.py +272 -247
- msprobe/core/data_dump/data_collector.py +13 -11
- msprobe/core/data_dump/data_processor/base.py +46 -16
- msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
- msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
- msprobe/core/data_dump/scope.py +113 -34
- msprobe/core/grad_probe/constant.py +27 -13
- msprobe/core/grad_probe/grad_compare.py +18 -1
- msprobe/core/grad_probe/utils.py +30 -2
- msprobe/core/overflow_check/abnormal_scene.py +185 -0
- msprobe/core/overflow_check/api_info.py +55 -0
- msprobe/core/overflow_check/checker.py +138 -0
- msprobe/core/overflow_check/filter.py +157 -0
- msprobe/core/overflow_check/ignore_rules.yaml +55 -0
- msprobe/core/overflow_check/level.py +22 -0
- msprobe/core/overflow_check/utils.py +28 -0
- msprobe/docs/01.installation.md +10 -0
- msprobe/docs/02.config_introduction.md +49 -22
- msprobe/docs/03.config_examples.md +2 -9
- msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
- msprobe/docs/05.data_dump_PyTorch.md +3 -1
- msprobe/docs/06.data_dump_MindSpore.md +157 -90
- msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
- msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
- msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
- msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
- msprobe/docs/12.overflow_check_PyTorch.md +1 -1
- msprobe/docs/13.overflow_check_MindSpore.md +6 -6
- msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
- msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
- msprobe/docs/17.grad_probe.md +5 -6
- msprobe/docs/19.monitor.md +468 -0
- msprobe/docs/20.monitor_performance_baseline.md +52 -0
- msprobe/docs/21.visualization_PyTorch.md +386 -0
- msprobe/docs/22.visualization_MindSpore.md +384 -0
- msprobe/docs/23.tool_function_introduction.md +28 -0
- msprobe/docs/FAQ.md +3 -0
- msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
- msprobe/docs/img/compare_result.png +0 -0
- msprobe/docs/img/monitor/cpu_info.png +0 -0
- msprobe/mindspore/__init__.py +15 -0
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
- msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
- msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
- msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
- msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
- msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
- msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
- msprobe/mindspore/api_accuracy_checker/main.py +27 -3
- msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
- msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
- msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
- msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
- msprobe/mindspore/cell_processor.py +33 -12
- msprobe/mindspore/common/const.py +33 -13
- msprobe/mindspore/common/log.py +5 -9
- msprobe/mindspore/common/utils.py +43 -4
- msprobe/mindspore/compare/distributed_compare.py +22 -22
- msprobe/mindspore/compare/ms_compare.py +271 -248
- msprobe/mindspore/compare/ms_graph_compare.py +81 -47
- msprobe/mindspore/debugger/debugger_config.py +4 -1
- msprobe/mindspore/debugger/precision_debugger.py +7 -1
- msprobe/mindspore/dump/dump_tool_factory.py +3 -1
- msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
- msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
- msprobe/mindspore/dump/jit_dump.py +17 -5
- msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
- msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
- msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
- msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
- msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
- msprobe/mindspore/free_benchmark/common/utils.py +19 -4
- msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
- msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
- msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
- msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
- msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
- msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
- msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
- msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
- msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
- msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
- msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
- msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
- msprobe/mindspore/grad_probe/global_context.py +28 -8
- msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
- msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
- msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
- msprobe/mindspore/grad_probe/hook.py +24 -10
- msprobe/mindspore/grad_probe/utils.py +18 -5
- msprobe/mindspore/ms_config.py +22 -15
- msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
- msprobe/mindspore/runtime.py +15 -0
- msprobe/mindspore/service.py +36 -30
- msprobe/mindspore/task_handler_factory.py +15 -0
- msprobe/msprobe.py +24 -7
- msprobe/pytorch/__init__.py +3 -2
- msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
- msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
- msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
- msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
- msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
- msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
- msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
- msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
- msprobe/pytorch/bench_functions/swiglu.py +10 -2
- msprobe/pytorch/common/parse_json.py +6 -6
- msprobe/pytorch/common/utils.py +56 -5
- msprobe/pytorch/compare/distributed_compare.py +8 -9
- msprobe/pytorch/compare/pt_compare.py +8 -6
- msprobe/pytorch/debugger/debugger_config.py +19 -15
- msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
- msprobe/pytorch/free_benchmark/common/constant.py +15 -0
- msprobe/pytorch/free_benchmark/common/counter.py +15 -0
- msprobe/pytorch/free_benchmark/common/enums.py +15 -0
- msprobe/pytorch/free_benchmark/common/params.py +8 -1
- msprobe/pytorch/free_benchmark/common/utils.py +26 -4
- msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
- msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
- msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
- msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
- msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
- msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
- msprobe/pytorch/hook_module/wrap_functional.py +14 -12
- msprobe/pytorch/module_processer.py +2 -5
- msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
- msprobe/pytorch/monitor/anomaly_detect.py +340 -0
- msprobe/pytorch/monitor/distributed/__init__.py +0 -0
- msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
- msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
- msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
- msprobe/pytorch/monitor/features.py +108 -0
- msprobe/pytorch/monitor/module_hook.py +870 -0
- msprobe/pytorch/monitor/module_metric.py +193 -0
- msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
- msprobe/pytorch/monitor/optimizer_collect.py +295 -0
- msprobe/pytorch/monitor/unittest/__init__.py +0 -0
- msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
- msprobe/pytorch/monitor/utils.py +250 -0
- msprobe/pytorch/monitor/visualizer.py +59 -0
- msprobe/pytorch/online_dispatch/__init__.py +2 -3
- msprobe/pytorch/online_dispatch/compare.py +29 -38
- msprobe/pytorch/online_dispatch/dispatch.py +50 -25
- msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
- msprobe/pytorch/online_dispatch/single_compare.py +53 -32
- msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
- msprobe/pytorch/online_dispatch/utils.py +49 -21
- msprobe/pytorch/parse_tool/lib/compare.py +12 -18
- msprobe/pytorch/parse_tool/lib/config.py +1 -1
- msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
- msprobe/pytorch/parse_tool/lib/utils.py +16 -35
- msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
- msprobe/pytorch/pt_config.py +31 -8
- msprobe/pytorch/service.py +15 -5
- msprobe/visualization/__init__.py +14 -0
- msprobe/visualization/builder/__init__.py +14 -0
- msprobe/visualization/builder/graph_builder.py +165 -0
- msprobe/visualization/builder/msprobe_adapter.py +205 -0
- msprobe/visualization/compare/__init__.py +14 -0
- msprobe/visualization/compare/graph_comparator.py +130 -0
- msprobe/visualization/compare/mode_adapter.py +211 -0
- msprobe/visualization/graph/__init__.py +14 -0
- msprobe/visualization/graph/base_node.py +124 -0
- msprobe/visualization/graph/graph.py +200 -0
- msprobe/visualization/graph/node_colors.py +95 -0
- msprobe/visualization/graph/node_op.py +39 -0
- msprobe/visualization/graph_service.py +214 -0
- msprobe/visualization/utils.py +232 -0
- mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
- msprobe/docs/04.acl_config_examples.md +0 -78
- msprobe/mindspore/compare/layer_mapping.py +0 -146
- msprobe/mindspore/compare/modify_mapping.py +0 -107
- msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
- msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
- {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
- {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
- /msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from msprobe.visualization.utils import GraphConst, ToolTip
|
|
18
|
+
|
|
19
|
+
SUMMARY_DESCRIPTION = "此节点所有输入输出的统计量相对误差, 值越大代表测量值与标杆值的偏差越大, 相对误差计算方式:|(测量值-标杆值)/标杆值|"
|
|
20
|
+
REAL_DATA_DESCRIPTION = (f"此节点所有输入的最小双千分之一和所有输出的最小双千分之一的差值的绝对值, 代表双千指标的变化情况, "
|
|
21
|
+
f"值越大代表测量值与标杆值的偏差越大, 双千分之一指标计算方式:{ToolTip.ONE_THOUSANDTH_ERR_RATIO}")
|
|
22
|
+
MD5_DESCRIPTION_N = "与标杆相比, 此节点任意输入输出的md5值不同"
|
|
23
|
+
MD5_DESCRIPTION_Y = "与标杆相比, 此节点所有输入输出的md5值相同"
|
|
24
|
+
NOT_MATCHED = "比对过程中节点未匹配上"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class NodeColors(Enum):
|
|
28
|
+
# 枚举值后缀数字越小, 颜色越浅
|
|
29
|
+
# value值左闭右开, 两个值相同代表固定值
|
|
30
|
+
YELLOW_1 = ("#FFFCF3", {
|
|
31
|
+
GraphConst.SUMMARY_COMPARE: {GraphConst.VALUE: [0, 0.2], GraphConst.DESCRIPTION: SUMMARY_DESCRIPTION},
|
|
32
|
+
GraphConst.REAL_DATA_COMPARE: {GraphConst.VALUE: [0, 0.05], GraphConst.DESCRIPTION: REAL_DATA_DESCRIPTION},
|
|
33
|
+
GraphConst.MD5_COMPARE: {GraphConst.VALUE: [1, 1], GraphConst.DESCRIPTION: MD5_DESCRIPTION_Y},
|
|
34
|
+
})
|
|
35
|
+
YELLOW_2 = ("#FFEDBE", {
|
|
36
|
+
GraphConst.SUMMARY_COMPARE: {GraphConst.VALUE: [0.2, 0.4], GraphConst.DESCRIPTION: SUMMARY_DESCRIPTION},
|
|
37
|
+
GraphConst.REAL_DATA_COMPARE: {GraphConst.VALUE: [0.05, 0.1], GraphConst.DESCRIPTION: REAL_DATA_DESCRIPTION}
|
|
38
|
+
})
|
|
39
|
+
ORANGE_1 = ("#FFDC7F", {
|
|
40
|
+
GraphConst.SUMMARY_COMPARE: {GraphConst.VALUE: [0.4, 0.6], GraphConst.DESCRIPTION: SUMMARY_DESCRIPTION},
|
|
41
|
+
GraphConst.REAL_DATA_COMPARE: {GraphConst.VALUE: [0.1, 0.15], GraphConst.DESCRIPTION: REAL_DATA_DESCRIPTION}
|
|
42
|
+
})
|
|
43
|
+
ORANGE_2 = ("#FFC62E", {
|
|
44
|
+
GraphConst.SUMMARY_COMPARE: {GraphConst.VALUE: [0.6, 0.8], GraphConst.DESCRIPTION: SUMMARY_DESCRIPTION},
|
|
45
|
+
GraphConst.REAL_DATA_COMPARE: {GraphConst.VALUE: [0.15, 0.2], GraphConst.DESCRIPTION: REAL_DATA_DESCRIPTION}
|
|
46
|
+
})
|
|
47
|
+
RED = ("#FF704D", {
|
|
48
|
+
GraphConst.SUMMARY_COMPARE: {GraphConst.VALUE: [0.8, 1], GraphConst.DESCRIPTION: SUMMARY_DESCRIPTION},
|
|
49
|
+
GraphConst.REAL_DATA_COMPARE: {GraphConst.VALUE: [0.2, 1], GraphConst.DESCRIPTION: REAL_DATA_DESCRIPTION},
|
|
50
|
+
GraphConst.MD5_COMPARE: {GraphConst.VALUE: [0, 0], GraphConst.DESCRIPTION: MD5_DESCRIPTION_N},
|
|
51
|
+
})
|
|
52
|
+
GREY = ("#C7C7C7", {
|
|
53
|
+
GraphConst.VALUE: [], GraphConst.DESCRIPTION: NOT_MATCHED
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
def __init__(self, hex_value, mode_info):
|
|
57
|
+
self.hex_value = hex_value
|
|
58
|
+
self.mode_info = mode_info
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def get_node_colors(mode):
|
|
62
|
+
"""
|
|
63
|
+
获取不同比对模式下的颜色说明
|
|
64
|
+
Args:
|
|
65
|
+
mode: 比对模式
|
|
66
|
+
Returns: 颜色说明
|
|
67
|
+
"""
|
|
68
|
+
return {
|
|
69
|
+
color.hex_value: color.get_info_by_mode(mode) for color in NodeColors if color.get_info_by_mode(mode)
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def get_node_error_status(mode, value):
|
|
74
|
+
"""
|
|
75
|
+
判断精度数据比对指标是否大于基准值
|
|
76
|
+
Args:
|
|
77
|
+
mode: 比对模式
|
|
78
|
+
value: 精度数据比对指标
|
|
79
|
+
Returns: bool
|
|
80
|
+
"""
|
|
81
|
+
info = NodeColors.ORANGE_1.get_info_by_mode(mode)
|
|
82
|
+
if info and GraphConst.VALUE in info:
|
|
83
|
+
value_range = info[GraphConst.VALUE]
|
|
84
|
+
return value > value_range[0]
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
def get_info_by_mode(self, mode):
|
|
88
|
+
if isinstance(self.mode_info, dict):
|
|
89
|
+
# 检查是否是模式特定的信息
|
|
90
|
+
if isinstance(next(iter(self.mode_info.values())), dict):
|
|
91
|
+
return self.mode_info.get(mode, {})
|
|
92
|
+
else:
|
|
93
|
+
# 所有模式共享相同的信息
|
|
94
|
+
return self.mode_info
|
|
95
|
+
return {}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from enum import Enum
|
|
17
|
+
import re
|
|
18
|
+
from msprobe.visualization.builder.msprobe_adapter import op_patterns
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class NodeOp(Enum):
|
|
22
|
+
module = 0
|
|
23
|
+
function_api = 1
|
|
24
|
+
api_collection = 9
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def get_node_op(node_name: str):
|
|
29
|
+
"""
|
|
30
|
+
基于代表节点的字符串,解析节点种类
|
|
31
|
+
"""
|
|
32
|
+
for op in NodeOp:
|
|
33
|
+
index = op.value
|
|
34
|
+
if index < 0 or index >= len(op_patterns):
|
|
35
|
+
raise Exception("NodeOp and op_patterns in MsprobeAdapter do not match")
|
|
36
|
+
pattern = op_patterns[index]
|
|
37
|
+
if re.match(pattern, node_name):
|
|
38
|
+
return op
|
|
39
|
+
raise Exception(f"Cannot parse node_name {node_name} into NodeOp")
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# Copyright (c) 2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import time
|
|
18
|
+
import json
|
|
19
|
+
from msprobe.core.common.file_utils import (FileOpen, check_file_type, create_directory, FileChecker,
|
|
20
|
+
check_file_or_directory_path)
|
|
21
|
+
from msprobe.core.common.const import FileCheckConst, Const
|
|
22
|
+
from msprobe.core.common.utils import CompareException
|
|
23
|
+
from msprobe.core.overflow_check.checker import AnomalyDetector
|
|
24
|
+
from msprobe.visualization.compare.graph_comparator import GraphComparator
|
|
25
|
+
from msprobe.visualization.utils import GraphConst, check_directory_content
|
|
26
|
+
from msprobe.visualization.builder.graph_builder import GraphBuilder, GraphExportConfig
|
|
27
|
+
from msprobe.core.common.log import logger
|
|
28
|
+
from msprobe.visualization.graph.node_colors import NodeColors
|
|
29
|
+
from msprobe.core.compare.layer_mapping import generate_api_mapping_by_layer_mapping
|
|
30
|
+
from msprobe.core.compare.utils import check_and_return_dir_contents
|
|
31
|
+
|
|
32
|
+
current_time = time.strftime("%Y%m%d%H%M%S")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _compare_graph(input_param, args, output_file_name=f'compare_{current_time}.vis'):
|
|
36
|
+
logger.info('Start building model graphs...')
|
|
37
|
+
# 对两个数据进行构图
|
|
38
|
+
dump_path_n = input_param.get('npu_path')
|
|
39
|
+
dump_path_b = input_param.get('bench_path')
|
|
40
|
+
construct_path_n = FileChecker(os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE),
|
|
41
|
+
FileCheckConst.FILE, FileCheckConst.READ_ABLE).common_check()
|
|
42
|
+
construct_path_b = FileChecker(os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE),
|
|
43
|
+
FileCheckConst.FILE, FileCheckConst.READ_ABLE).common_check()
|
|
44
|
+
data_path_n = FileChecker(os.path.join(dump_path_n, GraphConst.DUMP_FILE), FileCheckConst.FILE,
|
|
45
|
+
FileCheckConst.READ_ABLE).common_check()
|
|
46
|
+
data_path_b = FileChecker(os.path.join(dump_path_b, GraphConst.DUMP_FILE), FileCheckConst.FILE,
|
|
47
|
+
FileCheckConst.READ_ABLE).common_check()
|
|
48
|
+
stack_path_n = FileChecker(os.path.join(dump_path_n, GraphConst.STACK_FILE), FileCheckConst.FILE,
|
|
49
|
+
FileCheckConst.READ_ABLE).common_check()
|
|
50
|
+
stack_path_b = FileChecker(os.path.join(dump_path_b, GraphConst.STACK_FILE), FileCheckConst.FILE,
|
|
51
|
+
FileCheckConst.READ_ABLE).common_check()
|
|
52
|
+
graph_n = GraphBuilder.build(construct_path_n, data_path_n, stack_path_n)
|
|
53
|
+
graph_b = GraphBuilder.build(construct_path_b, data_path_b, stack_path_b)
|
|
54
|
+
logger.info('Model graphs built successfully, start Comparing graphs...')
|
|
55
|
+
# 基于graph、stack和data进行比较
|
|
56
|
+
dump_path_param = {
|
|
57
|
+
'npu_json_path': data_path_n,
|
|
58
|
+
'bench_json_path': data_path_b,
|
|
59
|
+
'stack_json_path': stack_path_n,
|
|
60
|
+
'is_print_compare_log': input_param.get("is_print_compare_log", True)
|
|
61
|
+
}
|
|
62
|
+
mapping_dict = None
|
|
63
|
+
if args.layer_mapping:
|
|
64
|
+
yaml_path = FileChecker(args.layer_mapping, FileCheckConst.FILE, FileCheckConst.READ_ABLE).common_check()
|
|
65
|
+
try:
|
|
66
|
+
mapping_dict = generate_api_mapping_by_layer_mapping(data_path_n, data_path_b, yaml_path)
|
|
67
|
+
except Exception:
|
|
68
|
+
logger.warning('The layer mapping file parsing failed, please check file format, mapping is not effective.')
|
|
69
|
+
graph_comparator = GraphComparator([graph_n, graph_b], dump_path_param, args.output_path, args.framework,
|
|
70
|
+
mapping_dict=mapping_dict)
|
|
71
|
+
graph_comparator.compare()
|
|
72
|
+
micro_steps = graph_n.paging_by_micro_step(graph_b)
|
|
73
|
+
# 开启溢出检测
|
|
74
|
+
if args.overflow_check:
|
|
75
|
+
graph_n.overflow_check()
|
|
76
|
+
graph_b.overflow_check()
|
|
77
|
+
|
|
78
|
+
create_directory(args.output_path)
|
|
79
|
+
output_path = os.path.join(args.output_path, output_file_name)
|
|
80
|
+
task = GraphConst.GRAPHCOMPARE_MODE_TO_DUMP_MODE_TO_MAPPING.get(graph_comparator.ma.compare_mode)
|
|
81
|
+
export_config = GraphExportConfig(graph_n, graph_b, graph_comparator.ma.get_tool_tip(),
|
|
82
|
+
NodeColors.get_node_colors(graph_comparator.ma.compare_mode), micro_steps, task)
|
|
83
|
+
GraphBuilder.to_json(output_path, export_config)
|
|
84
|
+
logger.info(f'Model graphs compared successfully, the result file is saved in {output_path}')
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _build_graph(dump_path, out_path, overflow_check=False, output_file_name=f'build_{current_time}.vis'):
|
|
88
|
+
logger.info('Start building model graph...')
|
|
89
|
+
construct_path = FileChecker(os.path.join(dump_path, GraphConst.CONSTRUCT_FILE), FileCheckConst.FILE,
|
|
90
|
+
FileCheckConst.READ_ABLE).common_check()
|
|
91
|
+
data_path = FileChecker(os.path.join(dump_path, GraphConst.DUMP_FILE), FileCheckConst.FILE,
|
|
92
|
+
FileCheckConst.READ_ABLE).common_check()
|
|
93
|
+
stack_path = FileChecker(os.path.join(dump_path, GraphConst.STACK_FILE), FileCheckConst.FILE,
|
|
94
|
+
FileCheckConst.READ_ABLE).common_check()
|
|
95
|
+
create_directory(out_path)
|
|
96
|
+
output_path = os.path.join(out_path, output_file_name)
|
|
97
|
+
graph = GraphBuilder.build(construct_path, data_path, stack_path)
|
|
98
|
+
micro_steps = graph.paging_by_micro_step()
|
|
99
|
+
# 开启溢出检测
|
|
100
|
+
if overflow_check:
|
|
101
|
+
graph.overflow_check()
|
|
102
|
+
GraphBuilder.to_json(output_path, GraphExportConfig(graph, micro_steps=micro_steps))
|
|
103
|
+
logger.info(f'Model graph built successfully, the result file is saved in {output_path}')
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _compare_graph_ranks(input_param, args, step=None):
|
|
107
|
+
dump_rank_n = input_param.get('npu_path')
|
|
108
|
+
dump_rank_b = input_param.get('bench_path')
|
|
109
|
+
npu_ranks = sorted(check_and_return_dir_contents(dump_rank_n, Const.RANK))
|
|
110
|
+
bench_ranks = sorted(check_and_return_dir_contents(dump_rank_b, Const.RANK))
|
|
111
|
+
if npu_ranks != bench_ranks:
|
|
112
|
+
logger.error('The number of ranks in the two runs are different. Unable to match the ranks.')
|
|
113
|
+
raise CompareException(CompareException.INVALID_PATH_ERROR)
|
|
114
|
+
for nr, br in zip(npu_ranks, bench_ranks):
|
|
115
|
+
logger.info(f'Start processing data for {nr}...')
|
|
116
|
+
input_param['npu_path'] = os.path.join(dump_rank_n, nr)
|
|
117
|
+
input_param['bench_path'] = os.path.join(dump_rank_b, br)
|
|
118
|
+
output_file_name = f'compare_{step}_{nr}_{current_time}.vis' if step else f'compare_{nr}_{current_time}.vis'
|
|
119
|
+
_compare_graph(input_param, args, output_file_name=output_file_name)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _compare_graph_steps(input_param, args):
|
|
123
|
+
dump_step_n = input_param.get('npu_path')
|
|
124
|
+
dump_step_b = input_param.get('bench_path')
|
|
125
|
+
|
|
126
|
+
npu_steps = sorted(check_and_return_dir_contents(dump_step_n, Const.STEP))
|
|
127
|
+
bench_steps = sorted(check_and_return_dir_contents(dump_step_b, Const.STEP))
|
|
128
|
+
|
|
129
|
+
if npu_steps != bench_steps:
|
|
130
|
+
logger.error('The number of steps in the two runs are different. Unable to match the steps.')
|
|
131
|
+
raise CompareException(CompareException.INVALID_PATH_ERROR)
|
|
132
|
+
|
|
133
|
+
for folder_step in npu_steps:
|
|
134
|
+
logger.info(f'Start processing data for {folder_step}...')
|
|
135
|
+
input_param['npu_path'] = os.path.join(dump_step_n, folder_step)
|
|
136
|
+
input_param['bench_path'] = os.path.join(dump_step_b, folder_step)
|
|
137
|
+
|
|
138
|
+
_compare_graph_ranks(input_param, args, step=folder_step)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _build_graph_ranks(dump_ranks_path, out_path, overflow_check=False, step=None):
|
|
142
|
+
ranks = sorted(check_and_return_dir_contents(dump_ranks_path, Const.RANK))
|
|
143
|
+
for rank in ranks:
|
|
144
|
+
logger.info(f'Start processing data for {rank}...')
|
|
145
|
+
dump_path = os.path.join(dump_ranks_path, rank)
|
|
146
|
+
output_file_name = f'build_{step}_{rank}_{current_time}.vis' if step else f'build_{rank}_{current_time}.vis'
|
|
147
|
+
_build_graph(dump_path, out_path, overflow_check, output_file_name)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _build_graph_steps(dump_steps_path, out_path, overflow_check=False):
|
|
151
|
+
steps = sorted(check_and_return_dir_contents(dump_steps_path, Const.STEP))
|
|
152
|
+
for step in steps:
|
|
153
|
+
logger.info(f'Start processing data for {step}...')
|
|
154
|
+
dump_ranks_path = os.path.join(dump_steps_path, step)
|
|
155
|
+
_build_graph_ranks(dump_ranks_path, out_path, overflow_check, step)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _graph_service_parser(parser):
|
|
159
|
+
parser.add_argument("-i", "--input_path", dest="input_path", type=str,
|
|
160
|
+
help="<Required> The compare input path, a dict json.", required=True)
|
|
161
|
+
parser.add_argument("-o", "--output_path", dest="output_path", type=str,
|
|
162
|
+
help="<Required> The compare task result out path.", required=True)
|
|
163
|
+
parser.add_argument("-lm", "--layer_mapping", dest="layer_mapping", type=str,
|
|
164
|
+
help="<optional> The layer mapping file path.", required=False)
|
|
165
|
+
parser.add_argument("-oc", "--overflow_check", dest="overflow_check", action="store_true",
|
|
166
|
+
help="<Optional> whether open overflow_check for graph.", required=False)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _graph_service_command(args):
|
|
170
|
+
with FileOpen(args.input_path, "r") as file:
|
|
171
|
+
input_param = json.load(file)
|
|
172
|
+
npu_path = input_param.get("npu_path")
|
|
173
|
+
bench_path = input_param.get("bench_path")
|
|
174
|
+
check_file_or_directory_path(npu_path, isdir=True)
|
|
175
|
+
if bench_path:
|
|
176
|
+
check_file_or_directory_path(bench_path, isdir=True)
|
|
177
|
+
if check_file_type(npu_path) == FileCheckConst.DIR and not bench_path:
|
|
178
|
+
content = check_directory_content(npu_path)
|
|
179
|
+
if content == GraphConst.RANKS:
|
|
180
|
+
_build_graph_ranks(npu_path, args.output_path, args.overflow_check)
|
|
181
|
+
elif content == GraphConst.STEPS:
|
|
182
|
+
_build_graph_steps(npu_path, args.output_path, args.overflow_check)
|
|
183
|
+
else:
|
|
184
|
+
_build_graph(npu_path, args.output_path, args.overflow_check)
|
|
185
|
+
elif check_file_type(npu_path) == FileCheckConst.DIR and check_file_type(bench_path) == FileCheckConst.DIR:
|
|
186
|
+
content_n = check_directory_content(npu_path)
|
|
187
|
+
content_b = check_directory_content(bench_path)
|
|
188
|
+
if content_n != content_b:
|
|
189
|
+
raise ValueError('The directory structures of npu_path and bench_path are inconsistent.')
|
|
190
|
+
if content_n == GraphConst.RANKS:
|
|
191
|
+
_compare_graph_ranks(input_param, args)
|
|
192
|
+
elif content_n == GraphConst.STEPS:
|
|
193
|
+
_compare_graph_steps(input_param, args)
|
|
194
|
+
else:
|
|
195
|
+
_compare_graph(input_param, args)
|
|
196
|
+
else:
|
|
197
|
+
logger.error("The npu_path or bench_path should be a folder.")
|
|
198
|
+
raise CompareException(CompareException.INVALID_COMPARE_MODE)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _pt_graph_service_parser(parser):
|
|
202
|
+
_graph_service_parser(parser)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _pt_graph_service_command(args):
|
|
206
|
+
_graph_service_command(args)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _ms_graph_service_parser(parser):
|
|
210
|
+
_graph_service_parser(parser)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _ms_graph_service_command(args):
|
|
214
|
+
_graph_service_command(args)
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# Copyright (c) 2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import re
|
|
18
|
+
import json
|
|
19
|
+
from msprobe.core.common.file_utils import FileOpen
|
|
20
|
+
from msprobe.core.common.const import CompareConst, Const
|
|
21
|
+
from msprobe.core.compare.acc_compare import Comparator
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load_json_file(file_path):
|
|
25
|
+
"""
|
|
26
|
+
加载json文件
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
with FileOpen(file_path, 'r') as f:
|
|
30
|
+
file_dict = json.load(f)
|
|
31
|
+
if not isinstance(file_dict, dict):
|
|
32
|
+
return {}
|
|
33
|
+
return file_dict
|
|
34
|
+
except json.JSONDecodeError:
|
|
35
|
+
return {}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def load_data_json_file(file_path):
|
|
39
|
+
"""
|
|
40
|
+
加载dump.json中的data字段
|
|
41
|
+
"""
|
|
42
|
+
return load_json_file(file_path).get(GraphConst.DATA_KEY, {})
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def save_json_file(file_path, data):
|
|
46
|
+
"""
|
|
47
|
+
保存json文件
|
|
48
|
+
"""
|
|
49
|
+
with FileOpen(file_path, 'w') as f:
|
|
50
|
+
f.write(json.dumps(data, indent=4))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_csv_df(stack, csv_data, compare_mode):
|
|
54
|
+
"""
|
|
55
|
+
调用acc接口写入csv
|
|
56
|
+
"""
|
|
57
|
+
dump_mode = GraphConst.GRAPHCOMPARE_MODE_TO_DUMP_MODE_TO_MAPPING.get(compare_mode)
|
|
58
|
+
return Comparator.make_result_table(csv_data, stack, dump_mode)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def str2float(percentage_str):
|
|
62
|
+
"""
|
|
63
|
+
百分比字符串转换转换为浮点型
|
|
64
|
+
Args:
|
|
65
|
+
percentage_str: '0.00%', '23.4%'
|
|
66
|
+
Returns: float 0.00, 0.234
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
percentage_str = percentage_str.strip('%')
|
|
70
|
+
return float(percentage_str) / 100
|
|
71
|
+
except (ValueError, AttributeError):
|
|
72
|
+
return 0
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def is_integer(s):
|
|
76
|
+
try:
|
|
77
|
+
int(s)
|
|
78
|
+
return True
|
|
79
|
+
except Exception:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def check_directory_content(input_path):
|
|
84
|
+
"""
|
|
85
|
+
检查input_path内容, 是否全是step{数字}命名的文件夹(例如step0), 或者全是rank{数字}命名的文件夹(例如rank0), 或者全是文件
|
|
86
|
+
"""
|
|
87
|
+
contents = os.listdir(input_path)
|
|
88
|
+
if not contents:
|
|
89
|
+
raise ValueError(f'The path {input_path} is empty.')
|
|
90
|
+
|
|
91
|
+
# 真实数据dump会有dump_tensor_data文件夹
|
|
92
|
+
if os.path.exists(os.path.join(input_path, Const.DUMP_TENSOR_DATA)):
|
|
93
|
+
return GraphConst.FILES
|
|
94
|
+
|
|
95
|
+
# 检查是否全是文件
|
|
96
|
+
if all(os.path.isfile(os.path.join(input_path, item)) for item in contents):
|
|
97
|
+
return GraphConst.FILES
|
|
98
|
+
|
|
99
|
+
rank_pattern = re.compile(r'^rank\d+$')
|
|
100
|
+
step_pattern = re.compile(r'^step\d+$')
|
|
101
|
+
|
|
102
|
+
rank_all = True
|
|
103
|
+
step_all = True
|
|
104
|
+
|
|
105
|
+
for item in contents:
|
|
106
|
+
item_path = os.path.join(input_path, item)
|
|
107
|
+
if not os.path.isdir(item_path):
|
|
108
|
+
continue
|
|
109
|
+
if not rank_pattern.match(item):
|
|
110
|
+
rank_all = False
|
|
111
|
+
if not step_pattern.match(item):
|
|
112
|
+
step_all = False
|
|
113
|
+
|
|
114
|
+
if rank_all:
|
|
115
|
+
return GraphConst.RANKS
|
|
116
|
+
if step_all:
|
|
117
|
+
return GraphConst.STEPS
|
|
118
|
+
|
|
119
|
+
raise ValueError("The input path content does not conform to the expected naming convention. "
|
|
120
|
+
"It is expected to be all step{number} named folders (such as step0), "
|
|
121
|
+
"all rank{number} named folders (such as rank0), or all files.")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ToolTip:
|
|
125
|
+
MAX_DIFF = 'NPU与标杆API统计信息比对,最大值的差值'
|
|
126
|
+
MIN_DIFF = 'NPU与标杆API统计信息比对,最小值的差值'
|
|
127
|
+
MEAN_DIFF = 'NPU与标杆API统计信息比对,平均值的差值'
|
|
128
|
+
NORM_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值'
|
|
129
|
+
MD5 = '数据MD5信息,用于比较两个数据信息是否完全一致'
|
|
130
|
+
ONE_THOUSANDTH_ERR_RATIO = 'Tensor中的元素逐个与对应的标杆数据对比,相对误差小于千分之一的比例占总元素个数的比例,比例越接近1越好'
|
|
131
|
+
FIVE_THOUSANDTHS_ERR_RATIO = 'Tensor中的元素逐个与对应的标杆数据对比,相对误差小于千分之五的比例占总元素个数的比例,比例越接近1越好'
|
|
132
|
+
COSINE = (
|
|
133
|
+
'通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。'
|
|
134
|
+
'在计算中可能会存在nan,主要由于可能会出现其中一个向量为0'
|
|
135
|
+
)
|
|
136
|
+
MAX_ABS_ERR = '当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001'
|
|
137
|
+
MAX_RELATIVE_ERR = (
|
|
138
|
+
'当最大相对误差越接近0表示其计算的误差越小。'
|
|
139
|
+
'当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象'
|
|
140
|
+
)
|
|
141
|
+
SMALL_VALUE_TIP = '{}, 由于{}小于{}, 建议不参考此相对误差,请参考绝对误差'
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Suggestions:
|
|
145
|
+
Module = '此模块精度比对结果疑似异常,请使用msprobe工具的数据采集功能对模块中的api进行dump比对'
|
|
146
|
+
API = '此api精度比对结果疑似异常,请使用msprobe工具的预检功能对api进行精度检测'
|
|
147
|
+
DUMP = 'msprobe工具的数据采集功能'
|
|
148
|
+
DUMP_URL = 'https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/pytorch/doc/dump.md'
|
|
149
|
+
API_ACCURACY_CHECKER = 'msprobe工具的预检功能'
|
|
150
|
+
API_ACCURACY_CHECKER_URL = \
|
|
151
|
+
'https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md'
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class GraphConst:
|
|
155
|
+
CONSTRUCT_FILE = 'construct.json'
|
|
156
|
+
DUMP_FILE = 'dump.json'
|
|
157
|
+
STACK_FILE = 'stack.json'
|
|
158
|
+
GRAPH_FILE = 'graph.vis'
|
|
159
|
+
ERROR_KEY = 'error_key'
|
|
160
|
+
SUMMARY_COMPARE = 0
|
|
161
|
+
MD5_COMPARE = 1
|
|
162
|
+
REAL_DATA_COMPARE = 2
|
|
163
|
+
JSON_NPU_KEY = 'NPU'
|
|
164
|
+
JSON_BENCH_KEY = 'Bench'
|
|
165
|
+
JSON_TIP_KEY = 'ToolTip'
|
|
166
|
+
JSON_ROOT_KEY = 'root'
|
|
167
|
+
JSON_NODE_KEY = 'node'
|
|
168
|
+
JSON_DATA_KEY = 'dump_data_dir'
|
|
169
|
+
JSON_TASK_KEY = 'task'
|
|
170
|
+
DATA_KEY = 'data'
|
|
171
|
+
REAL_DATA_TH = 0.1
|
|
172
|
+
MAX_RELATIVE_ERR_TH = 0.5
|
|
173
|
+
ROUND_TH = 6
|
|
174
|
+
JSON_INDEX_KEY = 'precision_index'
|
|
175
|
+
MAX_INDEX_KEY = 1
|
|
176
|
+
MIN_INDEX_KEY = 0
|
|
177
|
+
SUGGEST_KEY = 'text'
|
|
178
|
+
TAG_NA = 'na'
|
|
179
|
+
OUTPUT_INDEX_TWO = -2
|
|
180
|
+
OUTPUT_INDEX_THREE = -3
|
|
181
|
+
OUTPUT_MIN_LEN = 3
|
|
182
|
+
INPUT = '.input.'
|
|
183
|
+
OUTPUT = '.output.'
|
|
184
|
+
STR_MAX_LEN = 50
|
|
185
|
+
SMALL_VALUE = 1e-3
|
|
186
|
+
MD5_INDEX_LIST = [CompareConst.RESULT]
|
|
187
|
+
REAL_DATA_INDEX_LIST = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR,
|
|
188
|
+
CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO]
|
|
189
|
+
SUMMARY_INDEX_LIST = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF,
|
|
190
|
+
CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR,
|
|
191
|
+
CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR]
|
|
192
|
+
VALUE_INDEX_LIST = [Const.MAX, Const.MIN, Const.MEAN, Const.NORM]
|
|
193
|
+
APIS_BETWEEN_MODULES = 'Apis_Between_Modules'
|
|
194
|
+
NULL = 'null'
|
|
195
|
+
NONE = 'None'
|
|
196
|
+
VALUE = 'value'
|
|
197
|
+
BRACE = '{}'
|
|
198
|
+
DESCRIPTION = 'description'
|
|
199
|
+
COLORS = 'Colors'
|
|
200
|
+
MICRO_STEPS = 'MicroSteps'
|
|
201
|
+
|
|
202
|
+
DUMP_MODE_TO_GRAPHCOMPARE_MODE_MAPPING = {
|
|
203
|
+
Const.ALL: REAL_DATA_COMPARE,
|
|
204
|
+
Const.SUMMARY: SUMMARY_COMPARE,
|
|
205
|
+
Const.MD5: MD5_COMPARE
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
GRAPHCOMPARE_MODE_TO_DUMP_MODE_TO_MAPPING = {
|
|
209
|
+
REAL_DATA_COMPARE: Const.ALL,
|
|
210
|
+
SUMMARY_COMPARE: Const.SUMMARY,
|
|
211
|
+
MD5_COMPARE: Const.MD5
|
|
212
|
+
}
|
|
213
|
+
SMALL_VALUES = {
|
|
214
|
+
Const.TORCH_FLOAT32: 1e-6,
|
|
215
|
+
Const.TORCH_FLOAT16: 1e-3,
|
|
216
|
+
Const.TORCH_BFLOAT16: 1e-3,
|
|
217
|
+
Const.FLOAT32: 1e-6,
|
|
218
|
+
Const.FLOAT16: 1e-3,
|
|
219
|
+
Const.BFLOAT16: 1e-3
|
|
220
|
+
}
|
|
221
|
+
SMALL_VALUES_ABS_ERROR = {
|
|
222
|
+
Const.TORCH_FLOAT32: 1e-6,
|
|
223
|
+
Const.TORCH_FLOAT16: 1e-3,
|
|
224
|
+
Const.TORCH_BFLOAT16: 1e-3,
|
|
225
|
+
Const.FLOAT32: 1e-6,
|
|
226
|
+
Const.FLOAT16: 1e-3,
|
|
227
|
+
Const.BFLOAT16: 1e-3
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
RANKS = 'ranks'
|
|
231
|
+
STEPS = 'steps'
|
|
232
|
+
FILES = 'files'
|