mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
- mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
- msprobe/README.md +84 -18
- msprobe/__init__.py +16 -1
- msprobe/config.json +1 -5
- msprobe/core/advisor/advisor.py +16 -11
- msprobe/core/advisor/advisor_const.py +6 -7
- msprobe/core/advisor/advisor_result.py +12 -12
- msprobe/core/common/const.py +164 -3
- msprobe/core/common/exceptions.py +26 -4
- msprobe/core/common/file_utils.py +196 -27
- msprobe/core/common/inplace_op_checker.py +53 -0
- msprobe/core/common/inplace_ops.yaml +251 -0
- msprobe/core/common/log.py +46 -18
- msprobe/core/common/utils.py +308 -209
- msprobe/core/common_config.py +60 -38
- msprobe/core/compare/acc_compare.py +332 -94
- msprobe/core/compare/check.py +104 -22
- msprobe/core/compare/compare_cli.py +42 -5
- msprobe/core/compare/highlight.py +162 -57
- msprobe/core/compare/layer_mapping/__init__.py +19 -0
- msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
- msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
- msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
- msprobe/core/compare/multiprocessing_compute.py +33 -8
- msprobe/core/compare/npy_compare.py +73 -29
- msprobe/core/compare/utils.py +306 -247
- msprobe/core/data_dump/data_collector.py +44 -43
- msprobe/core/data_dump/data_processor/base.py +88 -35
- msprobe/core/data_dump/data_processor/factory.py +20 -3
- msprobe/core/data_dump/data_processor/mindspore_processor.py +14 -8
- msprobe/core/data_dump/data_processor/pytorch_processor.py +180 -66
- msprobe/core/data_dump/json_writer.py +63 -42
- msprobe/core/data_dump/scope.py +143 -48
- msprobe/core/grad_probe/constant.py +31 -13
- msprobe/core/grad_probe/grad_compare.py +20 -4
- msprobe/core/grad_probe/utils.py +44 -3
- msprobe/core/overflow_check/abnormal_scene.py +185 -0
- msprobe/core/overflow_check/api_info.py +55 -0
- msprobe/core/overflow_check/checker.py +138 -0
- msprobe/core/overflow_check/filter.py +157 -0
- msprobe/core/overflow_check/ignore_rules.yaml +55 -0
- msprobe/core/overflow_check/level.py +22 -0
- msprobe/core/overflow_check/utils.py +28 -0
- msprobe/docs/01.installation.md +29 -9
- msprobe/docs/02.config_introduction.md +83 -84
- msprobe/docs/03.config_examples.md +3 -20
- msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
- msprobe/docs/05.data_dump_PyTorch.md +143 -13
- msprobe/docs/06.data_dump_MindSpore.md +197 -88
- msprobe/docs/07.accuracy_checker_PyTorch.md +69 -46
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +52 -17
- msprobe/docs/09.accuracy_checker_MindSpore.md +51 -15
- msprobe/docs/10.accuracy_compare_PyTorch.md +187 -99
- msprobe/docs/11.accuracy_compare_MindSpore.md +253 -31
- msprobe/docs/12.overflow_check_PyTorch.md +1 -1
- msprobe/docs/13.overflow_check_MindSpore.md +6 -6
- msprobe/docs/15.free_benchmarking_PyTorch.md +60 -55
- msprobe/docs/16.free_benchmarking_MindSpore.md +159 -0
- msprobe/docs/17.grad_probe.md +19 -22
- msprobe/docs/18.online_dispatch.md +89 -0
- msprobe/docs/19.monitor.md +468 -0
- msprobe/docs/20.monitor_performance_baseline.md +52 -0
- msprobe/docs/21.visualization_PyTorch.md +386 -0
- msprobe/docs/22.visualization_MindSpore.md +384 -0
- msprobe/docs/23.tool_function_introduction.md +28 -0
- msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +25 -10
- msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
- msprobe/docs/img/compare_result.png +0 -0
- msprobe/docs/img/monitor/cpu_info.png +0 -0
- msprobe/docs/img/ms_dump.png +0 -0
- msprobe/docs/img/ms_layer.png +0 -0
- msprobe/docs/img/pt_dump.png +0 -0
- msprobe/mindspore/__init__.py +16 -0
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +130 -138
- msprobe/mindspore/api_accuracy_checker/api_info.py +27 -5
- msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
- msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
- msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
- msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
- msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
- msprobe/mindspore/api_accuracy_checker/main.py +27 -3
- msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
- msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
- msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
- msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
- msprobe/mindspore/cell_processor.py +58 -13
- msprobe/mindspore/common/const.py +35 -13
- msprobe/mindspore/common/log.py +5 -9
- msprobe/mindspore/common/utils.py +60 -5
- msprobe/mindspore/compare/distributed_compare.py +15 -28
- msprobe/mindspore/compare/ms_compare.py +319 -158
- msprobe/mindspore/compare/ms_graph_compare.py +99 -49
- msprobe/mindspore/debugger/debugger_config.py +20 -14
- msprobe/mindspore/debugger/precision_debugger.py +43 -13
- msprobe/mindspore/dump/dump_tool_factory.py +18 -1
- msprobe/mindspore/dump/hook_cell/api_registry.py +23 -3
- msprobe/mindspore/dump/hook_cell/primitive_hooks.py +203 -0
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +107 -10
- msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
- msprobe/mindspore/dump/jit_dump.py +56 -20
- msprobe/mindspore/dump/kernel_graph_dump.py +19 -5
- msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -6
- msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
- msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +162 -41
- msprobe/mindspore/free_benchmark/common/config.py +15 -0
- msprobe/mindspore/free_benchmark/common/handler_params.py +15 -1
- msprobe/mindspore/free_benchmark/common/utils.py +37 -8
- msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
- msprobe/mindspore/free_benchmark/handler/base_handler.py +20 -5
- msprobe/mindspore/free_benchmark/handler/check_handler.py +21 -7
- msprobe/mindspore/free_benchmark/handler/fix_handler.py +18 -3
- msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -6
- msprobe/mindspore/free_benchmark/perturbation/add_noise.py +23 -8
- msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +29 -5
- msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +25 -10
- msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +45 -19
- msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +29 -8
- msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
- msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +22 -7
- msprobe/mindspore/free_benchmark/self_check_tool_factory.py +17 -2
- msprobe/mindspore/grad_probe/global_context.py +44 -14
- msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
- msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
- msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
- msprobe/mindspore/grad_probe/hook.py +24 -10
- msprobe/mindspore/grad_probe/utils.py +18 -5
- msprobe/mindspore/ms_config.py +22 -15
- msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -6
- msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
- msprobe/mindspore/runtime.py +15 -0
- msprobe/mindspore/service.py +75 -150
- msprobe/mindspore/task_handler_factory.py +15 -0
- msprobe/msprobe.py +24 -7
- msprobe/pytorch/__init__.py +23 -3
- msprobe/pytorch/api_accuracy_checker/common/config.py +81 -2
- msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
- msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +50 -25
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
- msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
- msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
- msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
- msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
- msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
- msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
- msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +73 -33
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +44 -18
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +32 -11
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +122 -172
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +158 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +30 -24
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +68 -31
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +115 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +26 -9
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
- msprobe/pytorch/bench_functions/__init__.py +18 -3
- msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
- msprobe/pytorch/bench_functions/confusion_transpose.py +20 -1
- msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
- msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
- msprobe/pytorch/bench_functions/linear.py +15 -0
- msprobe/pytorch/bench_functions/matmul_backward.py +33 -6
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +280 -157
- msprobe/pytorch/bench_functions/rms_norm.py +15 -0
- msprobe/pytorch/bench_functions/rotary_mul.py +32 -9
- msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
- msprobe/pytorch/bench_functions/swiglu.py +29 -6
- msprobe/pytorch/common/__init__.py +15 -0
- msprobe/pytorch/common/log.py +18 -6
- msprobe/pytorch/common/parse_json.py +31 -16
- msprobe/pytorch/common/utils.py +96 -40
- msprobe/pytorch/compare/distributed_compare.py +13 -14
- msprobe/pytorch/compare/match.py +15 -0
- msprobe/pytorch/compare/pt_compare.py +44 -10
- msprobe/pytorch/debugger/debugger_config.py +69 -52
- msprobe/pytorch/debugger/precision_debugger.py +72 -24
- msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
- msprobe/pytorch/free_benchmark/__init__.py +20 -5
- msprobe/pytorch/free_benchmark/common/constant.py +15 -0
- msprobe/pytorch/free_benchmark/common/counter.py +15 -0
- msprobe/pytorch/free_benchmark/common/enums.py +43 -0
- msprobe/pytorch/free_benchmark/common/params.py +23 -1
- msprobe/pytorch/free_benchmark/common/utils.py +43 -5
- msprobe/pytorch/free_benchmark/compare/grad_saver.py +47 -9
- msprobe/pytorch/free_benchmark/compare/single_benchmark.py +17 -0
- msprobe/pytorch/free_benchmark/main.py +19 -4
- msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +18 -1
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +21 -4
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +28 -2
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +19 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
- msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +65 -16
- msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
- msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +21 -5
- msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
- msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
- msprobe/pytorch/function_factory.py +17 -2
- msprobe/pytorch/functional/module_dump.py +84 -0
- msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
- msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
- msprobe/pytorch/hook_module/__init__.py +16 -1
- msprobe/pytorch/hook_module/api_registry.py +13 -8
- msprobe/pytorch/hook_module/hook_module.py +17 -19
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
- msprobe/pytorch/hook_module/utils.py +4 -6
- msprobe/pytorch/hook_module/wrap_aten.py +12 -11
- msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
- msprobe/pytorch/hook_module/wrap_functional.py +21 -20
- msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
- msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
- msprobe/pytorch/hook_module/wrap_torch.py +4 -6
- msprobe/pytorch/hook_module/wrap_vf.py +4 -6
- msprobe/pytorch/module_processer.py +18 -6
- msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
- msprobe/pytorch/monitor/anomaly_detect.py +340 -0
- msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
- msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
- msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
- msprobe/pytorch/monitor/features.py +108 -0
- msprobe/pytorch/monitor/module_hook.py +870 -0
- msprobe/pytorch/monitor/module_metric.py +193 -0
- msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
- msprobe/pytorch/monitor/optimizer_collect.py +295 -0
- msprobe/pytorch/monitor/unittest/__init__.py +0 -0
- msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
- msprobe/pytorch/monitor/utils.py +250 -0
- msprobe/pytorch/monitor/visualizer.py +59 -0
- msprobe/pytorch/online_dispatch/__init__.py +2 -3
- msprobe/pytorch/online_dispatch/compare.py +38 -48
- msprobe/pytorch/online_dispatch/dispatch.py +50 -25
- msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
- msprobe/pytorch/online_dispatch/single_compare.py +60 -39
- msprobe/pytorch/online_dispatch/torch_ops_config.yaml +9 -1
- msprobe/pytorch/online_dispatch/utils.py +48 -23
- msprobe/pytorch/parse.py +15 -0
- msprobe/pytorch/parse_tool/cli.py +5 -6
- msprobe/pytorch/parse_tool/lib/compare.py +19 -26
- msprobe/pytorch/parse_tool/lib/config.py +1 -1
- msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -2
- msprobe/pytorch/parse_tool/lib/utils.py +40 -55
- msprobe/pytorch/parse_tool/lib/visualization.py +3 -1
- msprobe/pytorch/pt_config.py +192 -40
- msprobe/pytorch/service.py +110 -35
- msprobe/visualization/__init__.py +14 -0
- msprobe/visualization/builder/__init__.py +14 -0
- msprobe/visualization/builder/graph_builder.py +165 -0
- msprobe/visualization/builder/msprobe_adapter.py +205 -0
- msprobe/visualization/compare/__init__.py +14 -0
- msprobe/visualization/compare/graph_comparator.py +130 -0
- msprobe/visualization/compare/mode_adapter.py +211 -0
- msprobe/visualization/graph/__init__.py +14 -0
- msprobe/visualization/graph/base_node.py +124 -0
- msprobe/visualization/graph/graph.py +200 -0
- msprobe/visualization/graph/node_colors.py +95 -0
- msprobe/visualization/graph/node_op.py +39 -0
- msprobe/visualization/graph_service.py +214 -0
- msprobe/visualization/utils.py +232 -0
- mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
- msprobe/docs/04.acl_config_examples.md +0 -76
- msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -43
- msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -107
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
- msprobe/pytorch/functional/dump_module.py +0 -39
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
- /msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
- /msprobe/pytorch/{functional/data_processor.py → monitor/distributed/__init__.py} +0 -0
msprobe/core/advisor/advisor.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
1
|
+
# Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
5
|
# you may not use this file except in compliance with the License.
|
|
7
6
|
# You may obtain a copy of the License at
|
|
8
7
|
#
|
|
@@ -13,7 +12,6 @@
|
|
|
13
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
13
|
# See the License for the specific language governing permissions and
|
|
15
14
|
# limitations under the License.
|
|
16
|
-
"""
|
|
17
15
|
|
|
18
16
|
import os
|
|
19
17
|
|
|
@@ -24,15 +22,17 @@ from msprobe.core.common.utils import CompareException
|
|
|
24
22
|
from msprobe.core.common.file_utils import FileChecker
|
|
25
23
|
from msprobe.core.common.const import Const, CompareConst, FileCheckConst
|
|
26
24
|
|
|
25
|
+
|
|
27
26
|
class Advisor:
|
|
28
27
|
"""
|
|
29
28
|
Class for generate advisor
|
|
30
29
|
"""
|
|
31
30
|
|
|
32
|
-
def __init__(self, input_data, out_path=""):
|
|
31
|
+
def __init__(self, input_data, out_path="", suffix=""):
|
|
33
32
|
self.input_data = input_data
|
|
34
33
|
self.out_path = os.path.realpath(out_path)
|
|
35
34
|
self.file_type = None
|
|
35
|
+
self.suffix = suffix
|
|
36
36
|
|
|
37
37
|
@staticmethod
|
|
38
38
|
def deterministic_advisor(message, node_name):
|
|
@@ -62,7 +62,12 @@ class Advisor:
|
|
|
62
62
|
.format(item[CompareConst.NPU_NAME]))
|
|
63
63
|
|
|
64
64
|
def gen_advisor_result(self, pd_data):
|
|
65
|
-
|
|
65
|
+
try:
|
|
66
|
+
first_failing_data = pd_data.iloc[0]
|
|
67
|
+
except IndexError as e:
|
|
68
|
+
err_msg = "index out of bounds error occurs, pd_data is empty, please check!"
|
|
69
|
+
logger.error(err_msg)
|
|
70
|
+
raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from e
|
|
66
71
|
node_name = first_failing_data[CompareConst.NPU_NAME]
|
|
67
72
|
index = first_failing_data['index']
|
|
68
73
|
message = self.gen_advisor_message(node_name)
|
|
@@ -87,7 +92,7 @@ class Advisor:
|
|
|
87
92
|
return message
|
|
88
93
|
|
|
89
94
|
def analysis(self):
|
|
90
|
-
self.
|
|
95
|
+
self._check_path_valid()
|
|
91
96
|
analyze_data = self._parse_input_data()
|
|
92
97
|
logger.info("Start analyzing the comparison result: %s" % self.file_type)
|
|
93
98
|
self.analyze_unmatched(analyze_data)
|
|
@@ -103,7 +108,7 @@ class Advisor:
|
|
|
103
108
|
else:
|
|
104
109
|
result = self.gen_advisor_result(failing_data)
|
|
105
110
|
message_list = result.print_advisor_log()
|
|
106
|
-
result.gen_summary_file(self.out_path, message_list)
|
|
111
|
+
result.gen_summary_file(self.out_path, message_list, suffix=self.suffix)
|
|
107
112
|
|
|
108
113
|
def _parse_input_data(self):
|
|
109
114
|
data_columns = self.input_data.columns.values
|
|
@@ -119,6 +124,6 @@ class Advisor:
|
|
|
119
124
|
df = self.input_data.reset_index()
|
|
120
125
|
return df
|
|
121
126
|
|
|
122
|
-
def
|
|
127
|
+
def _check_path_valid(self):
|
|
123
128
|
out_path_checker = FileChecker(self.out_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE)
|
|
124
129
|
out_path_checker.common_check()
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
1
|
+
# Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
5
|
# you may not use this file except in compliance with the License.
|
|
7
6
|
# You may obtain a copy of the License at
|
|
8
7
|
#
|
|
@@ -13,7 +12,6 @@
|
|
|
13
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
13
|
# See the License for the specific language governing permissions and
|
|
15
14
|
# limitations under the License.
|
|
16
|
-
"""
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
class AdvisorConst:
|
|
@@ -35,7 +33,8 @@ class AdvisorConst:
|
|
|
35
33
|
# advisor message
|
|
36
34
|
NO_ERR_SUGGEST = "All data in comparison result meets the accuracy requirements."
|
|
37
35
|
FORWARD_INPUT_SUGGEST = "1. Analyze the model to view the input source.\n" \
|
|
38
|
-
"2. Check whether an inplace API causes the output result to overwrite the input result.
|
|
36
|
+
"2. Check whether an inplace API causes the output result to overwrite the input result. "\
|
|
37
|
+
"That is, the fault is actually caused by a computation error.\n" \
|
|
39
38
|
"3. The fault may be caused by memory corruption and further analysis is required."
|
|
40
39
|
FORWARD_OUTPUT_SUGGEST = "This is a forward API computation error. Check the computation implementation."
|
|
41
40
|
BACKWARD_INPUT_SUGGEST = "Check whether the forward computation result is affected."
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
1
|
+
# Copyright (c) 2022-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
5
|
# you may not use this file except in compliance with the License.
|
|
7
6
|
# You may obtain a copy of the License at
|
|
8
7
|
#
|
|
@@ -13,13 +12,12 @@
|
|
|
13
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
13
|
# See the License for the specific language governing permissions and
|
|
15
14
|
# limitations under the License.
|
|
16
|
-
"""
|
|
17
15
|
import os
|
|
18
16
|
import time
|
|
19
17
|
|
|
20
18
|
from msprobe.core.advisor.advisor_const import AdvisorConst
|
|
21
19
|
from msprobe.core.common.log import logger
|
|
22
|
-
from msprobe.core.common.const import
|
|
20
|
+
from msprobe.core.common.const import FileCheckConst
|
|
23
21
|
from msprobe.core.common.file_utils import change_mode, FileOpen
|
|
24
22
|
|
|
25
23
|
|
|
@@ -34,8 +32,8 @@ class AdvisorResult:
|
|
|
34
32
|
self.advisor_message = message
|
|
35
33
|
|
|
36
34
|
@staticmethod
|
|
37
|
-
def gen_summary_file(out_path, message_list):
|
|
38
|
-
file_name = '
|
|
35
|
+
def gen_summary_file(out_path, message_list, suffix):
|
|
36
|
+
file_name = 'advisor{}_{}.txt'.format(suffix, time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())))
|
|
39
37
|
result_file = os.path.join(out_path, file_name)
|
|
40
38
|
try:
|
|
41
39
|
with FileOpen(result_file, 'w+') as output_file:
|
|
@@ -50,9 +48,11 @@ class AdvisorResult:
|
|
|
50
48
|
|
|
51
49
|
def print_advisor_log(self):
|
|
52
50
|
logger.info("The summary of the expert advice is as follows: ")
|
|
53
|
-
message_list = [
|
|
54
|
-
|
|
55
|
-
|
|
51
|
+
message_list = [
|
|
52
|
+
AdvisorConst.LINE + AdvisorConst.COLON + str(self.line),
|
|
53
|
+
AdvisorConst.SUSPECT_NODES + AdvisorConst.COLON + self.suspect_node,
|
|
54
|
+
AdvisorConst.ADVISOR_SUGGEST + AdvisorConst.COLON + self.advisor_message
|
|
55
|
+
]
|
|
56
56
|
for message in message_list:
|
|
57
57
|
logger.info(message)
|
|
58
58
|
return message_list
|
msprobe/core/common/const.py
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
1
16
|
import os
|
|
2
17
|
import stat
|
|
3
18
|
|
|
@@ -10,15 +25,20 @@ class Const:
|
|
|
10
25
|
"""
|
|
11
26
|
TOOL_NAME = "msprobe"
|
|
12
27
|
|
|
28
|
+
ipv4_pattern = "([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])(\.([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])){3}$"
|
|
13
29
|
SEP = "."
|
|
14
30
|
REGEX_PREFIX_MAX_LENGTH = 20
|
|
15
31
|
REGEX_PREFIX_PATTERN = r"^[a-zA-Z0-9_-]+$"
|
|
16
32
|
FILE_PATTERN = r'^[a-zA-Z0-9_./-]+$'
|
|
33
|
+
STRING_BLACKLIST = r"^[+-=%@\+\-=%@]|;[+-=%@\+\-=%@]"
|
|
17
34
|
COMMA = ","
|
|
18
35
|
FLOAT_EPSILON = np.finfo(float).eps
|
|
19
36
|
OFF = 'OFF'
|
|
20
37
|
BACKWARD = 'backward'
|
|
21
38
|
FORWARD = 'forward'
|
|
39
|
+
PROGRESS_TIMEOUT = 3000
|
|
40
|
+
EXCEPTION_NONE = None
|
|
41
|
+
JIT = 'Jit'
|
|
22
42
|
PRIMITIVE_PREFIX = 'Primitive'
|
|
23
43
|
DEFAULT_LIST = []
|
|
24
44
|
DEFAULT_PATH = './'
|
|
@@ -30,6 +50,7 @@ class Const:
|
|
|
30
50
|
FOUR_SEGMENT = 4
|
|
31
51
|
SIX_SEGMENT = 6
|
|
32
52
|
SEVEN_SEGMENT = 7
|
|
53
|
+
MAX_DEPTH = 10
|
|
33
54
|
|
|
34
55
|
# dump mode
|
|
35
56
|
ALL = "all"
|
|
@@ -78,6 +99,8 @@ class Const:
|
|
|
78
99
|
RUN_UT = "run_ut"
|
|
79
100
|
GRAD_PROBE = "grad_probe"
|
|
80
101
|
TASK_LIST = [TENSOR, STATISTICS, OVERFLOW_CHECK, FREE_BENCHMARK, RUN_UT, GRAD_PROBE]
|
|
102
|
+
DUMP_DATA_COLLECTION_LIST = [STATISTICS, TENSOR]
|
|
103
|
+
DUMP_DATA_MODE_LIST = [ALL, INPUT, OUTPUT, FORWARD, BACKWARD]
|
|
81
104
|
LEVEL_L0 = "L0"
|
|
82
105
|
LEVEL_L1 = "L1"
|
|
83
106
|
LEVEL_L2 = "L2"
|
|
@@ -89,6 +112,7 @@ class Const:
|
|
|
89
112
|
DATA = "data"
|
|
90
113
|
PT_FRAMEWORK = "pytorch"
|
|
91
114
|
MS_FRAMEWORK = "mindspore"
|
|
115
|
+
UNKNOWN_FRAMEWORK = "unknown"
|
|
92
116
|
DIRECTORY_LENGTH = 4096
|
|
93
117
|
FILE_NAME_LENGTH = 255
|
|
94
118
|
FLOAT_TYPE = [np.half, np.single, float, np.double, np.float64, np.longdouble, np.float32, np.float16]
|
|
@@ -99,7 +123,37 @@ class Const:
|
|
|
99
123
|
CPU_LOWERCASE = 'cpu'
|
|
100
124
|
CUDA_LOWERCASE = 'cuda'
|
|
101
125
|
DISTRIBUTED = 'Distributed'
|
|
126
|
+
DUMP_PREFIX = ["Distributed", "Functional", "Torch", "Tensor", "Mint", "MintFunctional", "Primitive",
|
|
127
|
+
"Aten", "VF", "NPU", "Jit"]
|
|
128
|
+
|
|
129
|
+
# struct json param
|
|
130
|
+
ORIGIN_DATA = "origin_data"
|
|
131
|
+
SCOPE = "scope"
|
|
132
|
+
STACK = "stack"
|
|
102
133
|
|
|
134
|
+
ATEN = "Aten"
|
|
135
|
+
MODULE_WHITE_LIST = ["torch", "numpy"]
|
|
136
|
+
|
|
137
|
+
FUNC_SKIP_LIST = ["construct", "__call__"]
|
|
138
|
+
FILE_SKIP_LIST = ["msprobe", "MindSpeed"]
|
|
139
|
+
DATA_TYPE_SKIP_LIST = ["Primitive", "Jit"]
|
|
140
|
+
|
|
141
|
+
STACK_FILE_INDEX = 0
|
|
142
|
+
STACK_FUNC_INDEX = 2
|
|
143
|
+
STACK_FUNC_ELE_INDEX = 1
|
|
144
|
+
|
|
145
|
+
SCOPE_ID_INDEX = -1
|
|
146
|
+
SCOPE_DIRECTION_INDEX = -2
|
|
147
|
+
TYPE_NAME_INDEX = -3
|
|
148
|
+
LAYER_NAME_INDEX = -4
|
|
149
|
+
API_TYPE_INDEX = 0
|
|
150
|
+
LEFT_MOVE_INDEX = -1
|
|
151
|
+
RIGHT_MOVE_INDEX = 1
|
|
152
|
+
|
|
153
|
+
TOP_LAYER = "TopLayer"
|
|
154
|
+
CELL = "Cell"
|
|
155
|
+
MODULE = "Module"
|
|
156
|
+
FRAME_FILE_LIST = ["site-packages/torch", "package/torch", "site-packages/mindspore", "package/mindspore"]
|
|
103
157
|
INPLACE_LIST = [
|
|
104
158
|
"broadcast", "all_reduce", "reduce", "all_gather", "gather", "scatter", "reduce_scatter",
|
|
105
159
|
"_reduce_scatter_base", "_all_gather_base", "send", "recv", "irecv", "isend", "all_to_all_single", "all_to_all",
|
|
@@ -114,6 +168,31 @@ class Const:
|
|
|
114
168
|
"int32_to_int64": ["cross_entropy"]
|
|
115
169
|
}
|
|
116
170
|
|
|
171
|
+
FILL_CHAR_NUMS = 50
|
|
172
|
+
TOOL_ENDS_SUCCESSFULLY = f"{TOOL_NAME} ends successfully."
|
|
173
|
+
WITHOUT_CALL_STACK = "The call stack retrieval failed."
|
|
174
|
+
|
|
175
|
+
STEP = "step"
|
|
176
|
+
RANK = "rank"
|
|
177
|
+
HYPHEN = "-"
|
|
178
|
+
STEP_RANK_MINIMUM_VALUE = 0
|
|
179
|
+
STEP_RANK_MAXIMUM_VALUE = int(1e6)
|
|
180
|
+
|
|
181
|
+
# data type const
|
|
182
|
+
FLOAT16 = "Float16"
|
|
183
|
+
FLOAT32 = "Float32"
|
|
184
|
+
BFLOAT16 = "BFloat16"
|
|
185
|
+
TORCH_FLOAT16 = "torch.float16"
|
|
186
|
+
TORCH_FLOAT32 = "torch.float32"
|
|
187
|
+
TORCH_BFLOAT16 = "torch.bfloat16"
|
|
188
|
+
|
|
189
|
+
DTYPE = 'dtype'
|
|
190
|
+
SHAPE = 'shape'
|
|
191
|
+
MAX = 'Max'
|
|
192
|
+
MIN = 'Min'
|
|
193
|
+
MEAN = 'Mean'
|
|
194
|
+
NORM = 'Norm'
|
|
195
|
+
|
|
117
196
|
|
|
118
197
|
class CompareConst:
|
|
119
198
|
"""
|
|
@@ -156,9 +235,17 @@ class CompareConst:
|
|
|
156
235
|
RESULT = "Result"
|
|
157
236
|
MAGNITUDE = 0.5
|
|
158
237
|
OP_NAME = "op_name"
|
|
238
|
+
STRUCT = "struct"
|
|
159
239
|
INPUT_STRUCT = "input_struct"
|
|
240
|
+
KWARGS_STRUCT = "kwargs_struct"
|
|
160
241
|
OUTPUT_STRUCT = "output_struct"
|
|
161
242
|
SUMMARY = "summary"
|
|
243
|
+
MAX_EXCEL_LENGTH = 1048576
|
|
244
|
+
YES = "Yes"
|
|
245
|
+
NO = "No"
|
|
246
|
+
STATISTICS_INDICATOR_NUM = 4
|
|
247
|
+
EPSILON = 1e-10
|
|
248
|
+
COMPARE_ENDS_SUCCESSFULLY = "msprobe compare ends successfully."
|
|
162
249
|
|
|
163
250
|
COMPARE_RESULT_HEADER = [
|
|
164
251
|
NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, COSINE, MAX_ABS_ERR, MAX_RELATIVE_ERR,
|
|
@@ -176,6 +263,12 @@ class CompareConst:
|
|
|
176
263
|
NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, NPU_MD5, BENCH_MD5, RESULT
|
|
177
264
|
]
|
|
178
265
|
|
|
266
|
+
HEAD_OF_COMPARE_MODE = {
|
|
267
|
+
Const.ALL: COMPARE_RESULT_HEADER,
|
|
268
|
+
Const.SUMMARY: SUMMARY_COMPARE_RESULT_HEADER,
|
|
269
|
+
Const.MD5: MD5_COMPARE_RESULT_HEADER
|
|
270
|
+
}
|
|
271
|
+
|
|
179
272
|
# compare standard
|
|
180
273
|
HUNDRED_RATIO_THRESHOLD = 0.01
|
|
181
274
|
THOUSAND_RATIO_THRESHOLD = 0.001
|
|
@@ -195,8 +288,12 @@ class CompareConst:
|
|
|
195
288
|
PASS = 'pass'
|
|
196
289
|
WARNING = 'Warning'
|
|
197
290
|
ERROR = 'error'
|
|
291
|
+
TRUE = 'TRUE'
|
|
292
|
+
FALSE = 'FALSE'
|
|
198
293
|
SKIP = 'SKIP'
|
|
199
294
|
N_A = 'N/A'
|
|
295
|
+
INF = 'inf'
|
|
296
|
+
NEG_INF = '-inf'
|
|
200
297
|
BFLOAT16_MIN = -3.3895313892515355e+38
|
|
201
298
|
BFLOAT16_MAX = 3.3895313892515355e+38
|
|
202
299
|
BFLOAT16_EPS = 3.90625e-3 # 2 ** -8
|
|
@@ -250,6 +347,13 @@ class CompareConst:
|
|
|
250
347
|
MAX_DIFF: None, MIN_DIFF: None, MEAN_DIFF: None, NORM_DIFF: None, MAX_RELATIVE_ERR: None,
|
|
251
348
|
MIN_RELATIVE_ERR: None, MEAN_RELATIVE_ERR: None, NORM_RELATIVE_ERR: None
|
|
252
349
|
}
|
|
350
|
+
INPUT_PATTERN = Const.SEP + Const.INPUT + Const.SEP
|
|
351
|
+
KWARGS_PATTERN = Const.SEP + Const.KWARGS + Const.SEP
|
|
352
|
+
OUTPUT_PATTERN = Const.SEP + Const.OUTPUT + Const.SEP
|
|
353
|
+
COMPARE_KEY = 'compare_key'
|
|
354
|
+
COMPARE_SHAPE = 'compare_shape'
|
|
355
|
+
INTERNAL_API_MAPPING_FILE = 'ms_to_pt_api.yaml'
|
|
356
|
+
UNREADABLE = 'unreadable data'
|
|
253
357
|
|
|
254
358
|
|
|
255
359
|
class FileCheckConst:
|
|
@@ -274,7 +378,8 @@ class FileCheckConst:
|
|
|
274
378
|
MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024
|
|
275
379
|
MAX_PT_SIZE = 10737418240 # 10 * 1024 * 1024 * 1024
|
|
276
380
|
MAX_CSV_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024
|
|
277
|
-
MAX_YAML_SIZE =
|
|
381
|
+
MAX_YAML_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024
|
|
382
|
+
COMMOM_FILE_SIZE = 1048576 # 1 * 1024 * 1024
|
|
278
383
|
DIR = "dir"
|
|
279
384
|
FILE = "file"
|
|
280
385
|
DATA_DIR_AUTHORITY = 0o750
|
|
@@ -287,6 +392,7 @@ class FileCheckConst:
|
|
|
287
392
|
CSV_SUFFIX: MAX_CSV_SIZE,
|
|
288
393
|
YAML_SUFFIX: MAX_YAML_SIZE
|
|
289
394
|
}
|
|
395
|
+
CSV_BLACK_LIST = r'^[+-=%@\+\-=%@]|;[+-=%@\+\-=%@]'
|
|
290
396
|
|
|
291
397
|
|
|
292
398
|
class OverflowConst:
|
|
@@ -301,6 +407,9 @@ class MsCompareConst:
|
|
|
301
407
|
# api_info field
|
|
302
408
|
MINT = "Mint"
|
|
303
409
|
MINT_FUNCTIONAL = "MintFunctional"
|
|
410
|
+
TENSOR_API = "Tensor"
|
|
411
|
+
|
|
412
|
+
API_NAME_STR_LENGTH = 4
|
|
304
413
|
|
|
305
414
|
TASK_FIELD = "task"
|
|
306
415
|
STATISTICS_TASK = "statistics"
|
|
@@ -308,6 +417,10 @@ class MsCompareConst:
|
|
|
308
417
|
DUMP_DATA_DIR_FIELD = "dump_data_dir"
|
|
309
418
|
DATA_FIELD = "data"
|
|
310
419
|
|
|
420
|
+
# supported api yaml
|
|
421
|
+
SUPPORTED_API_LIST_FILE = "checker_support_api.yaml"
|
|
422
|
+
SUPPORTED_TENSOR_LIST_KEY = "tensor"
|
|
423
|
+
|
|
311
424
|
# detail_csv
|
|
312
425
|
DETAIL_CSV_API_NAME = "API Name"
|
|
313
426
|
DETAIL_CSV_BENCH_DTYPE = "Bench Dtype"
|
|
@@ -329,13 +442,61 @@ class MsgConst:
|
|
|
329
442
|
"""
|
|
330
443
|
Class for log messages const
|
|
331
444
|
"""
|
|
332
|
-
CLEAR_SYMBOL = "\033[K"
|
|
333
445
|
MSPROBE_LOG_LEVEL = "MSPROBE_LOG_LEVEL"
|
|
334
|
-
|
|
446
|
+
LOG_LEVEL_ENUM = ["0", "1", "2", "3", "4"]
|
|
447
|
+
LOG_LEVEL = ["DEBUG", "INFO", "WARNING", "ERROR"]
|
|
448
|
+
|
|
449
|
+
class LogLevel:
|
|
450
|
+
class DEBUG:
|
|
451
|
+
value = 0
|
|
452
|
+
|
|
453
|
+
class INFO:
|
|
454
|
+
value = 1
|
|
455
|
+
|
|
456
|
+
class WARNING:
|
|
457
|
+
value = 2
|
|
458
|
+
|
|
459
|
+
class ERROR:
|
|
460
|
+
value = 3
|
|
461
|
+
|
|
335
462
|
SPECIAL_CHAR = ["\n", "\r", "\u007F", "\b", "\f", "\t", "\u000B", "%08", "%0a", "%0b", "%0c", "%0d", "%7f"]
|
|
336
463
|
|
|
464
|
+
NOT_CREATED_INSTANCE = "PrecisionDebugger instance is not created."
|
|
465
|
+
|
|
337
466
|
|
|
338
467
|
class GraphMode:
|
|
339
468
|
NPY_MODE = "NPY_MODE"
|
|
340
469
|
STATISTIC_MODE = "STATISTIC_MODE"
|
|
341
470
|
ERROR_MODE = "ERROR_MODE"
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
class MonitorConst:
|
|
474
|
+
"""
|
|
475
|
+
Class for monitor const
|
|
476
|
+
"""
|
|
477
|
+
OP_LIST = ["min", "max", "norm", "zeros", "nans", "id", "mean"]
|
|
478
|
+
MONITOR_OUTPUT_DIR = "MONITOR_OUTPUT_DIR"
|
|
479
|
+
DEFAULT_MONITOR_OUTPUT_DIR = "./monitor_output"
|
|
480
|
+
DATABASE = "database"
|
|
481
|
+
EMAIL = "email"
|
|
482
|
+
OPT_TY = ['Megatron_DistributedOptimizer', 'Megatron_Float16OptimizerWithFloat16Params']
|
|
483
|
+
DEEPSPEED_OPT_TY = ("DeepSpeedZeroOptimizer_Stage0", "DeepSpeedZeroOptimizer_Stage1_or_2", "DeepSpeedZeroOptimizer_Stage3")
|
|
484
|
+
RULE_NAME = ['AnomalyTurbulence']
|
|
485
|
+
|
|
486
|
+
DOT = "."
|
|
487
|
+
VPP_SEP = ":"
|
|
488
|
+
ACTV_IN = "input"
|
|
489
|
+
ACTV_OUT = "output"
|
|
490
|
+
ACTVGRAD_IN = "input_grad"
|
|
491
|
+
ACTVGRAD_OUT = "output_grad"
|
|
492
|
+
POST_GRAD = "post_grad"
|
|
493
|
+
PRE_GRAD = "pre_grad"
|
|
494
|
+
PREFIX_POST = "post"
|
|
495
|
+
PREFIX_PRE = "pre"
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
ANOMALY_JSON = "anomaly.json"
|
|
499
|
+
ANALYSE_JSON = "anomaly_analyse.json"
|
|
500
|
+
TENSORBOARD = "tensorboard"
|
|
501
|
+
CSV = "csv"
|
|
502
|
+
API = "api"
|
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
1
16
|
class CodedException(Exception):
|
|
2
17
|
def __init__(self, code, error_info=''):
|
|
3
18
|
super().__init__()
|
|
@@ -11,10 +26,12 @@ class CodedException(Exception):
|
|
|
11
26
|
class MsprobeException(CodedException):
|
|
12
27
|
INVALID_PARAM_ERROR = 0
|
|
13
28
|
OVERFLOW_NUMS_ERROR = 1
|
|
29
|
+
RECURSION_LIMIT_ERROR = 2
|
|
14
30
|
|
|
15
31
|
err_strs = {
|
|
16
|
-
INVALID_PARAM_ERROR: "[msprobe] 无效参数:
|
|
17
|
-
OVERFLOW_NUMS_ERROR: "[msprobe] 超过预设溢出次数
|
|
32
|
+
INVALID_PARAM_ERROR: "[msprobe] 无效参数:",
|
|
33
|
+
OVERFLOW_NUMS_ERROR: "[msprobe] 超过预设溢出次数 当前溢出次数:",
|
|
34
|
+
RECURSION_LIMIT_ERROR: "[msprobe] 递归调用超过限制:"
|
|
18
35
|
}
|
|
19
36
|
|
|
20
37
|
|
|
@@ -41,7 +58,7 @@ class ParseJsonException(CodedException):
|
|
|
41
58
|
InvalidDumpJson = 1
|
|
42
59
|
err_strs = {
|
|
43
60
|
UnexpectedNameStruct: "[msprobe] Unexpected name in json: ",
|
|
44
|
-
InvalidDumpJson: "[msprobe] json
|
|
61
|
+
InvalidDumpJson: "[msprobe] Invalid dump.json format: ",
|
|
45
62
|
}
|
|
46
63
|
|
|
47
64
|
|
|
@@ -73,9 +90,13 @@ class StepException(CodedException):
|
|
|
73
90
|
class FreeBenchmarkException(CodedException):
|
|
74
91
|
UnsupportedType = 0
|
|
75
92
|
InvalidGrad = 1
|
|
93
|
+
InvalidPerturbedOutput = 2
|
|
94
|
+
OutputIndexError = 3
|
|
76
95
|
err_strs = {
|
|
77
96
|
UnsupportedType: "[msprobe] Free benchmark get unsupported type: ",
|
|
78
97
|
InvalidGrad: "[msprobe] Free benchmark gradient invalid: ",
|
|
98
|
+
InvalidPerturbedOutput: "[msprobe] Free benchmark invalid perturbed output: ",
|
|
99
|
+
OutputIndexError: "[msprobe] Free benchmark output index out of bounds: ",
|
|
79
100
|
}
|
|
80
101
|
|
|
81
102
|
|
|
@@ -87,6 +108,7 @@ class DistributedNotInitializedError(Exception):
|
|
|
87
108
|
def __str__(self):
|
|
88
109
|
return self.msg
|
|
89
110
|
|
|
111
|
+
|
|
90
112
|
class ApiAccuracyCheckerException(CodedException):
|
|
91
113
|
ParseJsonFailed = 0
|
|
92
114
|
UnsupportType = 1
|
|
@@ -97,4 +119,4 @@ class ApiAccuracyCheckerException(CodedException):
|
|
|
97
119
|
UnsupportType: "[msprobe] Api Accuracy Checker get unsupported type: ",
|
|
98
120
|
WrongValue: "[msprobe] Api Accuracy Checker get wrong value: ",
|
|
99
121
|
ApiWrong: "[msprobe] Api Accuracy Checker something wrong with api: ",
|
|
100
|
-
}
|
|
122
|
+
}
|