mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
- mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
- msprobe/README.md +84 -18
- msprobe/__init__.py +16 -1
- msprobe/config.json +1 -5
- msprobe/core/advisor/advisor.py +16 -11
- msprobe/core/advisor/advisor_const.py +6 -7
- msprobe/core/advisor/advisor_result.py +12 -12
- msprobe/core/common/const.py +164 -3
- msprobe/core/common/exceptions.py +26 -4
- msprobe/core/common/file_utils.py +196 -27
- msprobe/core/common/inplace_op_checker.py +53 -0
- msprobe/core/common/inplace_ops.yaml +251 -0
- msprobe/core/common/log.py +46 -18
- msprobe/core/common/utils.py +308 -209
- msprobe/core/common_config.py +60 -38
- msprobe/core/compare/acc_compare.py +332 -94
- msprobe/core/compare/check.py +104 -22
- msprobe/core/compare/compare_cli.py +42 -5
- msprobe/core/compare/highlight.py +162 -57
- msprobe/core/compare/layer_mapping/__init__.py +19 -0
- msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
- msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
- msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
- msprobe/core/compare/multiprocessing_compute.py +33 -8
- msprobe/core/compare/npy_compare.py +73 -29
- msprobe/core/compare/utils.py +306 -247
- msprobe/core/data_dump/data_collector.py +44 -43
- msprobe/core/data_dump/data_processor/base.py +88 -35
- msprobe/core/data_dump/data_processor/factory.py +20 -3
- msprobe/core/data_dump/data_processor/mindspore_processor.py +14 -8
- msprobe/core/data_dump/data_processor/pytorch_processor.py +180 -66
- msprobe/core/data_dump/json_writer.py +63 -42
- msprobe/core/data_dump/scope.py +143 -48
- msprobe/core/grad_probe/constant.py +31 -13
- msprobe/core/grad_probe/grad_compare.py +20 -4
- msprobe/core/grad_probe/utils.py +44 -3
- msprobe/core/overflow_check/abnormal_scene.py +185 -0
- msprobe/core/overflow_check/api_info.py +55 -0
- msprobe/core/overflow_check/checker.py +138 -0
- msprobe/core/overflow_check/filter.py +157 -0
- msprobe/core/overflow_check/ignore_rules.yaml +55 -0
- msprobe/core/overflow_check/level.py +22 -0
- msprobe/core/overflow_check/utils.py +28 -0
- msprobe/docs/01.installation.md +29 -9
- msprobe/docs/02.config_introduction.md +83 -84
- msprobe/docs/03.config_examples.md +3 -20
- msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
- msprobe/docs/05.data_dump_PyTorch.md +143 -13
- msprobe/docs/06.data_dump_MindSpore.md +197 -88
- msprobe/docs/07.accuracy_checker_PyTorch.md +69 -46
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +52 -17
- msprobe/docs/09.accuracy_checker_MindSpore.md +51 -15
- msprobe/docs/10.accuracy_compare_PyTorch.md +187 -99
- msprobe/docs/11.accuracy_compare_MindSpore.md +253 -31
- msprobe/docs/12.overflow_check_PyTorch.md +1 -1
- msprobe/docs/13.overflow_check_MindSpore.md +6 -6
- msprobe/docs/15.free_benchmarking_PyTorch.md +60 -55
- msprobe/docs/16.free_benchmarking_MindSpore.md +159 -0
- msprobe/docs/17.grad_probe.md +19 -22
- msprobe/docs/18.online_dispatch.md +89 -0
- msprobe/docs/19.monitor.md +468 -0
- msprobe/docs/20.monitor_performance_baseline.md +52 -0
- msprobe/docs/21.visualization_PyTorch.md +386 -0
- msprobe/docs/22.visualization_MindSpore.md +384 -0
- msprobe/docs/23.tool_function_introduction.md +28 -0
- msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +25 -10
- msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
- msprobe/docs/img/compare_result.png +0 -0
- msprobe/docs/img/monitor/cpu_info.png +0 -0
- msprobe/docs/img/ms_dump.png +0 -0
- msprobe/docs/img/ms_layer.png +0 -0
- msprobe/docs/img/pt_dump.png +0 -0
- msprobe/mindspore/__init__.py +16 -0
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +130 -138
- msprobe/mindspore/api_accuracy_checker/api_info.py +27 -5
- msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
- msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
- msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
- msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
- msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
- msprobe/mindspore/api_accuracy_checker/main.py +27 -3
- msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
- msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
- msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
- msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
- msprobe/mindspore/cell_processor.py +58 -13
- msprobe/mindspore/common/const.py +35 -13
- msprobe/mindspore/common/log.py +5 -9
- msprobe/mindspore/common/utils.py +60 -5
- msprobe/mindspore/compare/distributed_compare.py +15 -28
- msprobe/mindspore/compare/ms_compare.py +319 -158
- msprobe/mindspore/compare/ms_graph_compare.py +99 -49
- msprobe/mindspore/debugger/debugger_config.py +20 -14
- msprobe/mindspore/debugger/precision_debugger.py +43 -13
- msprobe/mindspore/dump/dump_tool_factory.py +18 -1
- msprobe/mindspore/dump/hook_cell/api_registry.py +23 -3
- msprobe/mindspore/dump/hook_cell/primitive_hooks.py +203 -0
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +107 -10
- msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
- msprobe/mindspore/dump/jit_dump.py +56 -20
- msprobe/mindspore/dump/kernel_graph_dump.py +19 -5
- msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -6
- msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
- msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +162 -41
- msprobe/mindspore/free_benchmark/common/config.py +15 -0
- msprobe/mindspore/free_benchmark/common/handler_params.py +15 -1
- msprobe/mindspore/free_benchmark/common/utils.py +37 -8
- msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
- msprobe/mindspore/free_benchmark/handler/base_handler.py +20 -5
- msprobe/mindspore/free_benchmark/handler/check_handler.py +21 -7
- msprobe/mindspore/free_benchmark/handler/fix_handler.py +18 -3
- msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -6
- msprobe/mindspore/free_benchmark/perturbation/add_noise.py +23 -8
- msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +29 -5
- msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +25 -10
- msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +45 -19
- msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +29 -8
- msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
- msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +22 -7
- msprobe/mindspore/free_benchmark/self_check_tool_factory.py +17 -2
- msprobe/mindspore/grad_probe/global_context.py +44 -14
- msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
- msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
- msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
- msprobe/mindspore/grad_probe/hook.py +24 -10
- msprobe/mindspore/grad_probe/utils.py +18 -5
- msprobe/mindspore/ms_config.py +22 -15
- msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -6
- msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
- msprobe/mindspore/runtime.py +15 -0
- msprobe/mindspore/service.py +75 -150
- msprobe/mindspore/task_handler_factory.py +15 -0
- msprobe/msprobe.py +24 -7
- msprobe/pytorch/__init__.py +23 -3
- msprobe/pytorch/api_accuracy_checker/common/config.py +81 -2
- msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
- msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +50 -25
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
- msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
- msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
- msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
- msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
- msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
- msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
- msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +73 -33
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +44 -18
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +32 -11
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +122 -172
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +158 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +30 -24
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +68 -31
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +115 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +26 -9
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
- msprobe/pytorch/bench_functions/__init__.py +18 -3
- msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
- msprobe/pytorch/bench_functions/confusion_transpose.py +20 -1
- msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
- msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
- msprobe/pytorch/bench_functions/linear.py +15 -0
- msprobe/pytorch/bench_functions/matmul_backward.py +33 -6
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +280 -157
- msprobe/pytorch/bench_functions/rms_norm.py +15 -0
- msprobe/pytorch/bench_functions/rotary_mul.py +32 -9
- msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
- msprobe/pytorch/bench_functions/swiglu.py +29 -6
- msprobe/pytorch/common/__init__.py +15 -0
- msprobe/pytorch/common/log.py +18 -6
- msprobe/pytorch/common/parse_json.py +31 -16
- msprobe/pytorch/common/utils.py +96 -40
- msprobe/pytorch/compare/distributed_compare.py +13 -14
- msprobe/pytorch/compare/match.py +15 -0
- msprobe/pytorch/compare/pt_compare.py +44 -10
- msprobe/pytorch/debugger/debugger_config.py +69 -52
- msprobe/pytorch/debugger/precision_debugger.py +72 -24
- msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
- msprobe/pytorch/free_benchmark/__init__.py +20 -5
- msprobe/pytorch/free_benchmark/common/constant.py +15 -0
- msprobe/pytorch/free_benchmark/common/counter.py +15 -0
- msprobe/pytorch/free_benchmark/common/enums.py +43 -0
- msprobe/pytorch/free_benchmark/common/params.py +23 -1
- msprobe/pytorch/free_benchmark/common/utils.py +43 -5
- msprobe/pytorch/free_benchmark/compare/grad_saver.py +47 -9
- msprobe/pytorch/free_benchmark/compare/single_benchmark.py +17 -0
- msprobe/pytorch/free_benchmark/main.py +19 -4
- msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +18 -1
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +21 -4
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +28 -2
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +19 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
- msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
- msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +65 -16
- msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
- msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +21 -5
- msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
- msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
- msprobe/pytorch/function_factory.py +17 -2
- msprobe/pytorch/functional/module_dump.py +84 -0
- msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
- msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
- msprobe/pytorch/hook_module/__init__.py +16 -1
- msprobe/pytorch/hook_module/api_registry.py +13 -8
- msprobe/pytorch/hook_module/hook_module.py +17 -19
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
- msprobe/pytorch/hook_module/utils.py +4 -6
- msprobe/pytorch/hook_module/wrap_aten.py +12 -11
- msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
- msprobe/pytorch/hook_module/wrap_functional.py +21 -20
- msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
- msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
- msprobe/pytorch/hook_module/wrap_torch.py +4 -6
- msprobe/pytorch/hook_module/wrap_vf.py +4 -6
- msprobe/pytorch/module_processer.py +18 -6
- msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
- msprobe/pytorch/monitor/anomaly_detect.py +340 -0
- msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
- msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
- msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
- msprobe/pytorch/monitor/features.py +108 -0
- msprobe/pytorch/monitor/module_hook.py +870 -0
- msprobe/pytorch/monitor/module_metric.py +193 -0
- msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
- msprobe/pytorch/monitor/optimizer_collect.py +295 -0
- msprobe/pytorch/monitor/unittest/__init__.py +0 -0
- msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
- msprobe/pytorch/monitor/utils.py +250 -0
- msprobe/pytorch/monitor/visualizer.py +59 -0
- msprobe/pytorch/online_dispatch/__init__.py +2 -3
- msprobe/pytorch/online_dispatch/compare.py +38 -48
- msprobe/pytorch/online_dispatch/dispatch.py +50 -25
- msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
- msprobe/pytorch/online_dispatch/single_compare.py +60 -39
- msprobe/pytorch/online_dispatch/torch_ops_config.yaml +9 -1
- msprobe/pytorch/online_dispatch/utils.py +48 -23
- msprobe/pytorch/parse.py +15 -0
- msprobe/pytorch/parse_tool/cli.py +5 -6
- msprobe/pytorch/parse_tool/lib/compare.py +19 -26
- msprobe/pytorch/parse_tool/lib/config.py +1 -1
- msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -2
- msprobe/pytorch/parse_tool/lib/utils.py +40 -55
- msprobe/pytorch/parse_tool/lib/visualization.py +3 -1
- msprobe/pytorch/pt_config.py +192 -40
- msprobe/pytorch/service.py +110 -35
- msprobe/visualization/__init__.py +14 -0
- msprobe/visualization/builder/__init__.py +14 -0
- msprobe/visualization/builder/graph_builder.py +165 -0
- msprobe/visualization/builder/msprobe_adapter.py +205 -0
- msprobe/visualization/compare/__init__.py +14 -0
- msprobe/visualization/compare/graph_comparator.py +130 -0
- msprobe/visualization/compare/mode_adapter.py +211 -0
- msprobe/visualization/graph/__init__.py +14 -0
- msprobe/visualization/graph/base_node.py +124 -0
- msprobe/visualization/graph/graph.py +200 -0
- msprobe/visualization/graph/node_colors.py +95 -0
- msprobe/visualization/graph/node_op.py +39 -0
- msprobe/visualization/graph_service.py +214 -0
- msprobe/visualization/utils.py +232 -0
- mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
- msprobe/docs/04.acl_config_examples.md +0 -76
- msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -43
- msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -107
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
- msprobe/pytorch/functional/dump_module.py +0 -39
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
- {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
- /msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
- /msprobe/pytorch/{functional/data_processor.py → monitor/distributed/__init__.py} +0 -0
|
@@ -1,8 +1,23 @@
|
|
|
1
|
-
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
2
16
|
import hashlib
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
3
18
|
|
|
4
19
|
import mindspore
|
|
5
|
-
from mindspore import ops
|
|
20
|
+
from mindspore import ops
|
|
6
21
|
from msprobe.core.grad_probe.constant import GradConst
|
|
7
22
|
|
|
8
23
|
|
|
@@ -12,6 +27,7 @@ class CsvInput:
|
|
|
12
27
|
self.grad = grad
|
|
13
28
|
self.bounds = bounds
|
|
14
29
|
|
|
30
|
+
|
|
15
31
|
class GradStatCsv:
|
|
16
32
|
csv = {}
|
|
17
33
|
|
|
@@ -52,9 +68,11 @@ class CsvItem(ABC):
|
|
|
52
68
|
|
|
53
69
|
@register_csv_item(GradConst.MD5)
|
|
54
70
|
class CsvMd5(CsvItem):
|
|
71
|
+
@staticmethod
|
|
55
72
|
def generate_csv_header(csv_input):
|
|
56
73
|
return ["MD5"]
|
|
57
74
|
|
|
75
|
+
@staticmethod
|
|
58
76
|
def generate_csv_content(csv_input):
|
|
59
77
|
grad = csv_input.grad
|
|
60
78
|
tensor_bytes = grad.float().numpy().tobytes()
|
|
@@ -64,19 +82,21 @@ class CsvMd5(CsvItem):
|
|
|
64
82
|
|
|
65
83
|
@register_csv_item(GradConst.DISTRIBUTION)
|
|
66
84
|
class CsvDistribution(CsvItem):
|
|
85
|
+
@staticmethod
|
|
67
86
|
def generate_csv_header(csv_input):
|
|
68
87
|
bounds = csv_input.bounds
|
|
69
88
|
intervals = []
|
|
70
89
|
if bounds:
|
|
71
90
|
intervals.append(f"(-inf, {bounds[0]}]")
|
|
72
91
|
for i in range(1, len(bounds)):
|
|
73
|
-
intervals.append(f"({bounds[i-1]}, {bounds[i]}]")
|
|
92
|
+
intervals.append(f"({bounds[i - 1]}, {bounds[i]}]")
|
|
74
93
|
if intervals:
|
|
75
94
|
intervals.append(f"({bounds[-1]}, inf)")
|
|
76
95
|
intervals.append("=0")
|
|
77
|
-
|
|
96
|
+
|
|
78
97
|
return intervals
|
|
79
98
|
|
|
99
|
+
@staticmethod
|
|
80
100
|
def generate_csv_content(csv_input):
|
|
81
101
|
grad = csv_input.grad
|
|
82
102
|
bounds = csv_input.bounds
|
|
@@ -94,9 +114,11 @@ class CsvDistribution(CsvItem):
|
|
|
94
114
|
|
|
95
115
|
@register_csv_item(GradConst.MAX)
|
|
96
116
|
class CsvMax(CsvItem):
|
|
117
|
+
@staticmethod
|
|
97
118
|
def generate_csv_header(csv_input):
|
|
98
119
|
return ["max"]
|
|
99
120
|
|
|
121
|
+
@staticmethod
|
|
100
122
|
def generate_csv_content(csv_input):
|
|
101
123
|
grad = csv_input.grad
|
|
102
124
|
return [ops.amax(grad).float().numpy().tolist()]
|
|
@@ -104,9 +126,11 @@ class CsvMax(CsvItem):
|
|
|
104
126
|
|
|
105
127
|
@register_csv_item(GradConst.MIN)
|
|
106
128
|
class CsvMin(CsvItem):
|
|
129
|
+
@staticmethod
|
|
107
130
|
def generate_csv_header(csv_input):
|
|
108
131
|
return ["min"]
|
|
109
132
|
|
|
133
|
+
@staticmethod
|
|
110
134
|
def generate_csv_content(csv_input):
|
|
111
135
|
grad = csv_input.grad
|
|
112
136
|
return [ops.amin(grad).float().numpy().tolist()]
|
|
@@ -114,9 +138,11 @@ class CsvMin(CsvItem):
|
|
|
114
138
|
|
|
115
139
|
@register_csv_item(GradConst.NORM)
|
|
116
140
|
class CsvNorm(CsvItem):
|
|
141
|
+
@staticmethod
|
|
117
142
|
def generate_csv_header(csv_input):
|
|
118
143
|
return ["norm"]
|
|
119
144
|
|
|
145
|
+
@staticmethod
|
|
120
146
|
def generate_csv_content(csv_input):
|
|
121
147
|
grad = csv_input.grad
|
|
122
148
|
return [ops.norm(grad).float().numpy().tolist()]
|
|
@@ -124,9 +150,11 @@ class CsvNorm(CsvItem):
|
|
|
124
150
|
|
|
125
151
|
@register_csv_item(GradConst.SHAPE)
|
|
126
152
|
class CsvShape(CsvItem):
|
|
153
|
+
@staticmethod
|
|
127
154
|
def generate_csv_header(csv_input):
|
|
128
155
|
return ["shape"]
|
|
129
156
|
|
|
157
|
+
@staticmethod
|
|
130
158
|
def generate_csv_content(csv_input):
|
|
131
159
|
grad = csv_input.grad
|
|
132
|
-
return [list(grad.shape)]
|
|
160
|
+
return [list(grad.shape)]
|
|
@@ -1,25 +1,37 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
1
15
|
|
|
2
16
|
import os
|
|
3
17
|
|
|
4
18
|
import mindspore
|
|
5
19
|
import mindspore as ms
|
|
6
20
|
from mindspore.common.api import jit
|
|
7
|
-
from mindspore.nn.optim.optimizer import Optimizer
|
|
8
|
-
from mindspore.common.parameter import Parameter
|
|
9
21
|
from mindspore.common.initializer import initializer
|
|
10
|
-
|
|
22
|
+
from mindspore.common.parameter import Parameter
|
|
23
|
+
from mindspore.nn.optim.optimizer import Optimizer
|
|
24
|
+
from msprobe.core.common.file_utils import remove_path, write_csv, create_directory
|
|
11
25
|
from msprobe.core.grad_probe.constant import GradConst
|
|
12
26
|
from msprobe.mindspore.common.log import logger
|
|
13
|
-
|
|
14
|
-
from msprobe.core.common.file_utils import remove_path, write_csv, create_directory
|
|
15
27
|
from msprobe.mindspore.grad_probe.global_context import grad_context
|
|
16
|
-
from msprobe.mindspore.grad_probe.grad_analyzer import grad_dump, get_rank_id
|
|
17
28
|
from msprobe.mindspore.grad_probe.grad_analyzer import csv_generator
|
|
29
|
+
from msprobe.mindspore.grad_probe.grad_analyzer import grad_dump, get_rank_id
|
|
18
30
|
from msprobe.mindspore.grad_probe.grad_stat_csv import GradStatCsv, CsvInput
|
|
19
31
|
from msprobe.mindspore.grad_probe.utils import save_grad_direction, get_adapted_level
|
|
20
32
|
|
|
21
|
-
class HookInput:
|
|
22
33
|
|
|
34
|
+
class HookInput:
|
|
23
35
|
'''
|
|
24
36
|
HookInput is a class wrapping all the variables used for hooking optimizer
|
|
25
37
|
'''
|
|
@@ -40,6 +52,7 @@ class HookInput:
|
|
|
40
52
|
self.bounds = grad_context.get_context(GradConst.BOUNDS)
|
|
41
53
|
self.mode = mindspore.get_context("mode")
|
|
42
54
|
|
|
55
|
+
|
|
43
56
|
def hook_graph_mode_optimizer(opt, hook_input):
|
|
44
57
|
@jit
|
|
45
58
|
def new_construct(self, gradients):
|
|
@@ -47,7 +60,7 @@ def hook_graph_mode_optimizer(opt, hook_input):
|
|
|
47
60
|
if hook_input.param_list and hook_input.g_names[index] not in hook_input.param_list:
|
|
48
61
|
continue
|
|
49
62
|
grad_dump(hook_input.dump_dir, hook_input.g_names[index], self.dump_step,
|
|
50
|
-
|
|
63
|
+
grad_value, hook_input.level, hook_input.bounds)
|
|
51
64
|
ms.ops.TensorDump()(hook_input.step_finish_flag, self.dump_step)
|
|
52
65
|
self.assignadd(self.dump_step, self.global_step_increase_tensor)
|
|
53
66
|
out = hook_input.func(gradients)
|
|
@@ -57,11 +70,12 @@ def hook_graph_mode_optimizer(opt, hook_input):
|
|
|
57
70
|
opt.construct = new_construct.__get__(opt, type(opt))
|
|
58
71
|
csv_generator.start()
|
|
59
72
|
|
|
73
|
+
|
|
60
74
|
def hook_pynative_optimizer(opt, hook_input):
|
|
61
75
|
level_adapted = get_adapted_level(hook_input.level)
|
|
62
76
|
|
|
63
|
-
def hook_fn(cell,
|
|
64
|
-
gradients, =
|
|
77
|
+
def hook_fn(cell, input_data):
|
|
78
|
+
gradients, = input_data
|
|
65
79
|
cur_step = grad_context.get_context(GradConst.CURRENT_STEP)
|
|
66
80
|
if grad_context.step_need_dump(cur_step) and grad_context.rank_need_dump(hook_input.rank_id):
|
|
67
81
|
create_directory(hook_input.save_dir)
|
|
@@ -1,12 +1,26 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
1
16
|
import os
|
|
2
17
|
|
|
3
18
|
import mindspore
|
|
4
|
-
from msprobe.core.grad_probe.constant import level_adp
|
|
5
|
-
from msprobe.core.grad_probe.utils import check_param
|
|
6
19
|
from msprobe.core.common.file_utils import (create_directory,
|
|
7
|
-
check_path_before_create,
|
|
8
20
|
check_file_or_directory_path,
|
|
9
21
|
save_npy)
|
|
22
|
+
from msprobe.core.grad_probe.constant import level_adp
|
|
23
|
+
from msprobe.core.grad_probe.utils import check_param
|
|
10
24
|
|
|
11
25
|
|
|
12
26
|
def save_grad_direction(param_name, grad, save_path):
|
|
@@ -15,7 +29,6 @@ def save_grad_direction(param_name, grad, save_path):
|
|
|
15
29
|
check_file_or_directory_path(save_path, isdir=True)
|
|
16
30
|
check_param(param_name)
|
|
17
31
|
save_filepath = os.path.join(save_path, f"{param_name}.npy")
|
|
18
|
-
check_path_before_create(save_filepath)
|
|
19
32
|
|
|
20
33
|
if grad.dtype == mindspore.bfloat16:
|
|
21
34
|
grad = grad.to(mindspore.float32)
|
|
@@ -27,4 +40,4 @@ def save_grad_direction(param_name, grad, save_path):
|
|
|
27
40
|
|
|
28
41
|
def get_adapted_level(level: str):
|
|
29
42
|
level_adapted = level_adp.get(level)
|
|
30
|
-
return level_adapted
|
|
43
|
+
return level_adapted
|
msprobe/mindspore/ms_config.py
CHANGED
|
@@ -1,12 +1,26 @@
|
|
|
1
|
-
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
2
15
|
|
|
3
|
-
from msprobe.core.common_config import CommonConfig, BaseConfig
|
|
4
|
-
from msprobe.core.common.file_utils import FileOpen
|
|
5
16
|
from msprobe.core.common.const import Const
|
|
6
|
-
from msprobe.
|
|
7
|
-
from msprobe.
|
|
17
|
+
from msprobe.core.common.file_utils import load_json
|
|
18
|
+
from msprobe.core.common.utils import is_int
|
|
19
|
+
from msprobe.core.common_config import BaseConfig, CommonConfig
|
|
8
20
|
from msprobe.core.grad_probe.constant import level_adp
|
|
9
21
|
from msprobe.core.grad_probe.utils import check_numeral_list_ascend
|
|
22
|
+
from msprobe.mindspore.common.const import FreeBenchmarkConst
|
|
23
|
+
from msprobe.mindspore.common.log import logger
|
|
10
24
|
|
|
11
25
|
|
|
12
26
|
class TensorConfig(BaseConfig):
|
|
@@ -18,9 +32,6 @@ class TensorConfig(BaseConfig):
|
|
|
18
32
|
self._check_config()
|
|
19
33
|
|
|
20
34
|
def _check_config(self):
|
|
21
|
-
if self.data_mode is not None and len(self.data_mode) > 0:
|
|
22
|
-
if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
|
|
23
|
-
raise Exception("data_mode must be all, input or output")
|
|
24
35
|
if self.file_format and self.file_format not in ["npy", "bin"]:
|
|
25
36
|
raise Exception("file_format is invalid")
|
|
26
37
|
|
|
@@ -34,9 +45,6 @@ class StatisticsConfig(BaseConfig):
|
|
|
34
45
|
self._check_config()
|
|
35
46
|
|
|
36
47
|
def _check_config(self):
|
|
37
|
-
if self.data_mode is not None and len(self.data_mode) > 0:
|
|
38
|
-
if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
|
|
39
|
-
raise Exception("data_mode must be all, input or output")
|
|
40
48
|
if self.summary_mode and self.summary_mode not in ["statistics", "md5"]:
|
|
41
49
|
raise Exception("summary_mode is invalid")
|
|
42
50
|
|
|
@@ -48,7 +56,7 @@ class OverflowCheckConfig(BaseConfig):
|
|
|
48
56
|
self._check_config()
|
|
49
57
|
|
|
50
58
|
def _check_config(self):
|
|
51
|
-
if self.overflow_nums is not None and not
|
|
59
|
+
if self.overflow_nums is not None and not is_int(self.overflow_nums):
|
|
52
60
|
raise Exception("overflow_nums is invalid, it should be an integer")
|
|
53
61
|
if self.overflow_nums is not None and self.overflow_nums != -1 and self.overflow_nums <= 0:
|
|
54
62
|
raise Exception("overflow_nums should be -1 or positive integer")
|
|
@@ -72,7 +80,7 @@ class FreeBenchmarkConfig(BaseConfig):
|
|
|
72
80
|
if self.fuzz_level and self.fuzz_level not in FreeBenchmarkConst.DUMP_LEVEL_LIST:
|
|
73
81
|
raise Exception("fuzz_level must be L1 or empty")
|
|
74
82
|
if self.fuzz_stage and self.fuzz_stage not in FreeBenchmarkConst.STAGE_LIST:
|
|
75
|
-
raise Exception("fuzz_stage must be forward or empty")
|
|
83
|
+
raise Exception("fuzz_stage must be forward, backward or empty")
|
|
76
84
|
if self.if_preheat or self.preheat_step or self.max_sample:
|
|
77
85
|
logger.warning("'if_preheat', 'preheat_step' and 'max_sample' settings "
|
|
78
86
|
"are not supported for mindspore free benchmark task.")
|
|
@@ -119,8 +127,7 @@ def parse_task_config(task, json_config):
|
|
|
119
127
|
def parse_json_config(json_file_path):
|
|
120
128
|
if not json_file_path:
|
|
121
129
|
raise Exception("json file path is None")
|
|
122
|
-
|
|
123
|
-
json_config = json.load(file)
|
|
130
|
+
json_config = load_json(json_file_path)
|
|
124
131
|
common_config = parse_common_config(json_config)
|
|
125
132
|
if not common_config.task:
|
|
126
133
|
common_config.task = Const.STATISTICS
|
|
@@ -1,8 +1,23 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
1
16
|
import os
|
|
2
|
-
|
|
3
|
-
from msprobe.
|
|
17
|
+
|
|
18
|
+
from msprobe.core.common.file_utils import create_directory, save_json
|
|
4
19
|
from msprobe.mindspore.common.log import logger
|
|
5
|
-
from msprobe.
|
|
20
|
+
from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
|
|
6
21
|
|
|
7
22
|
|
|
8
23
|
class KernelGraphOverflowCheck:
|
|
@@ -16,7 +31,7 @@ class KernelGraphOverflowCheck:
|
|
|
16
31
|
self.dump_json["common_dump_settings"]["saved_data"] = "full"
|
|
17
32
|
self.dump_json["common_dump_settings"]["input_output"] = 0
|
|
18
33
|
self.dump_json["common_dump_settings"]["kernels"] = []
|
|
19
|
-
self.dump_json["common_dump_settings"]["support_device"] = [0,1,2,3,4,5,6,7]
|
|
34
|
+
self.dump_json["common_dump_settings"]["support_device"] = [0, 1, 2, 3, 4, 5, 6, 7]
|
|
20
35
|
self.dump_json["common_dump_settings"]["op_debug_mode"] = 3
|
|
21
36
|
self.dump_json["common_dump_settings"]["file_format"] = "npy"
|
|
22
37
|
|
|
@@ -36,8 +51,7 @@ class KernelGraphOverflowCheck:
|
|
|
36
51
|
json_path = self.dump_json["common_dump_settings"]["path"]
|
|
37
52
|
create_directory(json_path)
|
|
38
53
|
json_path = os.path.join(json_path, "kernel_graph_overflow_check.json")
|
|
39
|
-
|
|
40
|
-
json.dump(self.dump_json, f)
|
|
54
|
+
save_json(json_path, self.dump_json, indent=4)
|
|
41
55
|
logger.info(json_path + " has been created.")
|
|
42
56
|
os.environ["MINDSPORE_DUMP_CONFIG"] = json_path
|
|
43
57
|
if "MS_ACL_DUMP_CFG_PATH" in os.environ:
|
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
1
16
|
from msprobe.mindspore.common.const import Const
|
|
2
17
|
from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
|
|
3
18
|
from msprobe.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck
|
msprobe/mindspore/runtime.py
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
1
16
|
class Runtime:
|
|
2
17
|
step_count: int = 0
|
|
3
18
|
rank_id: int = -1
|