mindstudio-probe 1.2.2__py3-none-any.whl → 8.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/METADATA +4 -3
- {mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/RECORD +243 -191
- msprobe/README.md +57 -21
- msprobe/core/__init__.py +17 -0
- msprobe/core/common/const.py +224 -82
- msprobe/core/common/decorator.py +50 -0
- msprobe/core/common/exceptions.py +5 -3
- msprobe/core/common/file_utils.py +274 -40
- msprobe/core/common/framework_adapter.py +169 -0
- msprobe/core/common/global_lock.py +86 -0
- msprobe/core/common/runtime.py +25 -0
- msprobe/core/common/utils.py +148 -72
- msprobe/core/common_config.py +7 -0
- msprobe/core/compare/acc_compare.py +640 -462
- msprobe/core/compare/check.py +36 -107
- msprobe/core/compare/compare_cli.py +4 -0
- msprobe/core/compare/config.py +72 -0
- msprobe/core/compare/highlight.py +217 -215
- msprobe/core/compare/layer_mapping/layer_mapping.py +4 -1
- msprobe/core/compare/merge_result/merge_result.py +12 -6
- msprobe/core/compare/multiprocessing_compute.py +227 -107
- msprobe/core/compare/npy_compare.py +32 -16
- msprobe/core/compare/utils.py +218 -244
- msprobe/{mindspore/runtime.py → core/config_check/__init__.py} +2 -4
- msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
- msprobe/core/config_check/checkers/base_checker.py +60 -0
- msprobe/core/config_check/checkers/dataset_checker.py +138 -0
- msprobe/core/config_check/checkers/env_args_checker.py +96 -0
- msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
- msprobe/core/config_check/checkers/pip_checker.py +90 -0
- msprobe/core/config_check/checkers/random_checker.py +367 -0
- msprobe/core/config_check/checkers/weights_checker.py +147 -0
- msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
- msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
- msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
- msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
- msprobe/core/config_check/config_check_cli.py +51 -0
- msprobe/core/config_check/config_checker.py +100 -0
- msprobe/{pytorch/parse.py → core/config_check/resource/dependency.yaml} +7 -4
- msprobe/core/config_check/resource/env.yaml +57 -0
- msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
- msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
- msprobe/core/config_check/utils/utils.py +107 -0
- msprobe/core/data_dump/api_registry.py +239 -0
- msprobe/core/data_dump/data_collector.py +36 -9
- msprobe/core/data_dump/data_processor/base.py +74 -53
- msprobe/core/data_dump/data_processor/mindspore_processor.py +119 -78
- msprobe/core/data_dump/data_processor/pytorch_processor.py +134 -96
- msprobe/core/data_dump/json_writer.py +146 -57
- msprobe/core/debugger/precision_debugger.py +143 -0
- msprobe/core/grad_probe/constant.py +2 -1
- msprobe/core/grad_probe/grad_compare.py +2 -2
- msprobe/core/grad_probe/utils.py +1 -1
- msprobe/core/hook_manager.py +242 -0
- msprobe/core/monitor/anomaly_processor.py +384 -0
- msprobe/core/overflow_check/abnormal_scene.py +2 -0
- msprobe/core/service.py +356 -0
- msprobe/core/single_save/__init__.py +0 -0
- msprobe/core/single_save/single_comparator.py +243 -0
- msprobe/core/single_save/single_saver.py +157 -0
- msprobe/docs/01.installation.md +6 -5
- msprobe/docs/02.config_introduction.md +89 -30
- msprobe/docs/03.config_examples.md +1 -0
- msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
- msprobe/docs/05.data_dump_PyTorch.md +184 -50
- msprobe/docs/06.data_dump_MindSpore.md +193 -28
- msprobe/docs/07.accuracy_checker_PyTorch.md +13 -3
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +72 -10
- msprobe/docs/09.accuracy_checker_MindSpore.md +19 -7
- msprobe/docs/10.accuracy_compare_PyTorch.md +266 -102
- msprobe/docs/11.accuracy_compare_MindSpore.md +117 -43
- msprobe/docs/12.overflow_check_PyTorch.md +5 -3
- msprobe/docs/13.overflow_check_MindSpore.md +6 -4
- msprobe/docs/14.data_parse_PyTorch.md +4 -10
- msprobe/docs/17.grad_probe.md +2 -1
- msprobe/docs/18.online_dispatch.md +3 -3
- msprobe/docs/19.monitor.md +211 -103
- msprobe/docs/21.visualization_PyTorch.md +100 -28
- msprobe/docs/22.visualization_MindSpore.md +103 -31
- msprobe/docs/23.generate_operator_PyTorch.md +9 -9
- msprobe/docs/25.tool_function_introduction.md +23 -22
- msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
- msprobe/docs/27.dump_json_instruction.md +278 -8
- msprobe/docs/28.debugger_save_instruction.md +111 -20
- msprobe/docs/28.kernel_dump_MindSpore.md +1 -1
- msprobe/docs/29.data_dump_MSAdapter.md +229 -0
- msprobe/docs/30.overflow_check_MSAdapter.md +31 -0
- msprobe/docs/31.config_check.md +95 -0
- msprobe/docs/32.ckpt_compare.md +69 -0
- msprobe/docs/33.generate_operator_MindSpore.md +190 -0
- msprobe/docs/34.RL_collect.md +92 -0
- msprobe/docs/35.nan_analyze.md +72 -0
- msprobe/docs/FAQ.md +3 -11
- msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
- msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
- msprobe/docs/img/compare_result.png +0 -0
- msprobe/docs/img/merge_result.png +0 -0
- msprobe/docs/img/save_compare_result_sample.png +0 -0
- msprobe/docs/img/visualization/proxy.png +0 -0
- msprobe/docs/img/visualization/vis_browser_1.png +0 -0
- msprobe/docs/img/visualization/vis_match_info.png +0 -0
- msprobe/docs/img/visualization/vis_precision_info.png +0 -0
- msprobe/docs/img/visualization/vis_search_info.png +0 -0
- msprobe/docs/img/visualization/vis_show_info.png +0 -0
- msprobe/docs/img/visualization/vis_showcase.png +0 -0
- msprobe/docs/img/visualization/vis_unmatch_info.png +0 -0
- msprobe/mindspore/__init__.py +3 -3
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +151 -55
- msprobe/mindspore/api_accuracy_checker/api_runner.py +25 -11
- msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +2 -1
- msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +580 -0
- msprobe/mindspore/api_accuracy_checker/bench_functions/fusion_operator.py +41 -0
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
- msprobe/mindspore/api_accuracy_checker/data_manager.py +4 -3
- msprobe/mindspore/api_accuracy_checker/generate_op_script/config_op.json +9 -0
- msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +451 -0
- msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
- msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +11 -1
- msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
- msprobe/mindspore/cell_processor.py +204 -33
- msprobe/mindspore/code_mapping/graph_parser.py +4 -21
- msprobe/mindspore/common/const.py +73 -2
- msprobe/mindspore/common/utils.py +157 -29
- msprobe/mindspore/compare/common_dir_compare.py +382 -0
- msprobe/mindspore/compare/distributed_compare.py +2 -26
- msprobe/mindspore/compare/ms_compare.py +18 -398
- msprobe/mindspore/compare/ms_graph_compare.py +20 -10
- msprobe/mindspore/compare/utils.py +37 -0
- msprobe/mindspore/debugger/debugger_config.py +59 -7
- msprobe/mindspore/debugger/precision_debugger.py +83 -90
- msprobe/mindspore/dump/cell_dump_process.py +902 -0
- msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +889 -0
- msprobe/mindspore/dump/dump_tool_factory.py +18 -8
- msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
- msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
- msprobe/mindspore/dump/hook_cell/api_register.py +176 -0
- msprobe/mindspore/dump/hook_cell/hook_cell.py +22 -12
- msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
- msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +42 -26
- msprobe/mindspore/dump/jit_dump.py +35 -27
- msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
- msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
- msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -16
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +22 -12
- msprobe/mindspore/free_benchmark/common/utils.py +1 -1
- msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +4 -2
- msprobe/mindspore/free_benchmark/self_check_tool_factory.py +6 -3
- msprobe/mindspore/grad_probe/global_context.py +9 -2
- msprobe/mindspore/grad_probe/grad_analyzer.py +2 -1
- msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
- msprobe/mindspore/grad_probe/hook.py +2 -4
- msprobe/mindspore/mindspore_service.py +111 -0
- msprobe/mindspore/monitor/common_func.py +52 -0
- msprobe/mindspore/monitor/data_writers.py +237 -0
- msprobe/mindspore/monitor/distributed/wrap_distributed.py +1 -1
- msprobe/mindspore/monitor/features.py +13 -1
- msprobe/mindspore/monitor/module_hook.py +568 -444
- msprobe/mindspore/monitor/optimizer_collect.py +331 -0
- msprobe/mindspore/monitor/utils.py +71 -9
- msprobe/mindspore/ms_config.py +16 -15
- msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +5 -3
- msprobe/mindspore/task_handler_factory.py +5 -2
- msprobe/msprobe.py +19 -0
- msprobe/nan_analyze/__init__.py +14 -0
- msprobe/nan_analyze/analyzer.py +255 -0
- msprobe/nan_analyze/graph.py +189 -0
- msprobe/nan_analyze/utils.py +211 -0
- msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
- msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -6
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
- msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +15 -13
- msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +206 -4
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +9 -9
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +6 -5
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +31 -9
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +28 -20
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +3 -1
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +154 -0
- msprobe/pytorch/attl_manager.py +65 -0
- msprobe/pytorch/bench_functions/moe_gating_top_k_softmax.py +6 -0
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
- msprobe/pytorch/common/utils.py +53 -19
- msprobe/pytorch/compare/distributed_compare.py +4 -36
- msprobe/pytorch/compare/pt_compare.py +13 -84
- msprobe/pytorch/compare/utils.py +47 -0
- msprobe/pytorch/debugger/debugger_config.py +34 -17
- msprobe/pytorch/debugger/precision_debugger.py +50 -96
- msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
- msprobe/pytorch/dump/module_dump/module_dump.py +15 -61
- msprobe/pytorch/dump/module_dump/module_processer.py +150 -114
- msprobe/pytorch/free_benchmark/common/utils.py +1 -1
- msprobe/pytorch/free_benchmark/compare/single_benchmark.py +1 -1
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -3
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +3 -3
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +1 -1
- msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +1 -1
- msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +1 -1
- msprobe/pytorch/function_factory.py +1 -1
- msprobe/pytorch/grad_probe/grad_monitor.py +2 -2
- msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
- msprobe/pytorch/hook_module/api_register.py +155 -0
- msprobe/pytorch/hook_module/hook_module.py +18 -22
- msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
- msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
- msprobe/pytorch/hook_module/register_optimizer_hook.py +2 -1
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +193 -75
- msprobe/pytorch/hook_module/utils.py +28 -2
- msprobe/pytorch/monitor/csv2tb.py +14 -4
- msprobe/pytorch/monitor/data_writers.py +259 -0
- msprobe/pytorch/monitor/distributed/wrap_distributed.py +8 -2
- msprobe/pytorch/monitor/module_hook.py +336 -241
- msprobe/pytorch/monitor/module_metric.py +17 -0
- msprobe/pytorch/monitor/optimizer_collect.py +244 -224
- msprobe/pytorch/monitor/utils.py +84 -4
- msprobe/pytorch/online_dispatch/compare.py +0 -2
- msprobe/pytorch/online_dispatch/dispatch.py +13 -2
- msprobe/pytorch/online_dispatch/dump_compare.py +8 -2
- msprobe/pytorch/online_dispatch/utils.py +3 -0
- msprobe/pytorch/parse_tool/lib/interactive_cli.py +1 -6
- msprobe/pytorch/parse_tool/lib/utils.py +5 -4
- msprobe/pytorch/pt_config.py +16 -11
- msprobe/pytorch/pytorch_service.py +70 -0
- msprobe/visualization/builder/graph_builder.py +69 -10
- msprobe/visualization/builder/msprobe_adapter.py +24 -12
- msprobe/visualization/compare/graph_comparator.py +63 -51
- msprobe/visualization/compare/mode_adapter.py +22 -20
- msprobe/visualization/graph/base_node.py +11 -4
- msprobe/visualization/graph/distributed_analyzer.py +1 -10
- msprobe/visualization/graph/graph.py +2 -13
- msprobe/visualization/graph/node_op.py +1 -2
- msprobe/visualization/graph_service.py +251 -104
- msprobe/visualization/utils.py +26 -44
- msprobe/mindspore/dump/hook_cell/api_registry.py +0 -207
- msprobe/mindspore/dump/hook_cell/wrap_api.py +0 -212
- msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -140
- msprobe/mindspore/monitor/anomaly_detect.py +0 -404
- msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
- msprobe/mindspore/service.py +0 -543
- msprobe/pytorch/hook_module/api_registry.py +0 -166
- msprobe/pytorch/hook_module/wrap_distributed.py +0 -79
- msprobe/pytorch/hook_module/wrap_functional.py +0 -66
- msprobe/pytorch/hook_module/wrap_npu_custom.py +0 -85
- msprobe/pytorch/hook_module/wrap_tensor.py +0 -69
- msprobe/pytorch/hook_module/wrap_torch.py +0 -84
- msprobe/pytorch/hook_module/wrap_vf.py +0 -60
- msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
- msprobe/pytorch/monitor/anomaly_detect.py +0 -410
- msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
- msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
- msprobe/pytorch/service.py +0 -470
- {mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/LICENSE +0 -0
- {mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/WHEEL +0 -0
- {mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/entry_points.txt +0 -0
- {mindstudio_probe-1.2.2.dist-info → mindstudio_probe-8.1.0.dist-info}/top_level.txt +0 -0
- /msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
- /msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
- /msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0
|
@@ -19,22 +19,33 @@ from collections import defaultdict, namedtuple
|
|
|
19
19
|
import mindspore as ms
|
|
20
20
|
from mindspore._c_expression import MSContext
|
|
21
21
|
|
|
22
|
-
from msprobe.core.common.const import Const,
|
|
23
|
-
from msprobe.core.common.
|
|
24
|
-
from msprobe.core.common.
|
|
25
|
-
from msprobe.core.
|
|
22
|
+
from msprobe.core.common.const import Const, MsgConst
|
|
23
|
+
from msprobe.core.common.utils import check_token_range
|
|
24
|
+
from msprobe.core.common.runtime import Runtime
|
|
25
|
+
from msprobe.core.debugger.precision_debugger import BasePrecisionDebugger
|
|
26
26
|
from msprobe.mindspore.cell_processor import CellProcessor
|
|
27
27
|
from msprobe.mindspore.common.const import Const as MsConst
|
|
28
|
-
from msprobe.mindspore.common.utils import
|
|
28
|
+
from msprobe.mindspore.common.utils import (
|
|
29
|
+
set_register_backward_hook_functions,
|
|
30
|
+
check_save_param,
|
|
31
|
+
is_graph_mode_cell_dump_allowed
|
|
32
|
+
)
|
|
29
33
|
from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
|
|
30
|
-
from msprobe.mindspore.dump.
|
|
34
|
+
from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump
|
|
35
|
+
from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
|
|
31
36
|
from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
|
|
32
37
|
from msprobe.mindspore.grad_probe.grad_monitor import GradientMonitor
|
|
33
|
-
from msprobe.mindspore.ms_config import
|
|
34
|
-
from msprobe.mindspore.
|
|
35
|
-
from msprobe.mindspore.service import Service
|
|
38
|
+
from msprobe.mindspore.ms_config import parse_task_config
|
|
39
|
+
from msprobe.mindspore.mindspore_service import MindsporeService
|
|
36
40
|
from msprobe.mindspore.task_handler_factory import TaskHandlerFactory
|
|
37
41
|
|
|
42
|
+
try:
|
|
43
|
+
from mindspore._c_expression import _dump_start, _dump_stop, _dump_step, _set_init_iter, _dump_set_dynamic
|
|
44
|
+
except ImportError:
|
|
45
|
+
enable_dynamic_kbyk_dump = False
|
|
46
|
+
else:
|
|
47
|
+
enable_dynamic_kbyk_dump = True
|
|
48
|
+
|
|
38
49
|
try:
|
|
39
50
|
from msprobe.lib import _msprobe_c
|
|
40
51
|
except ImportError:
|
|
@@ -44,9 +55,7 @@ except ImportError:
|
|
|
44
55
|
ConfigParameters = namedtuple("ConfigParameters", ["config_path", "task", "dump_path", "level"])
|
|
45
56
|
|
|
46
57
|
|
|
47
|
-
class PrecisionDebugger:
|
|
48
|
-
_instance = None
|
|
49
|
-
task_not_need_service = [Const.GRAD_PROBE]
|
|
58
|
+
class PrecisionDebugger(BasePrecisionDebugger):
|
|
50
59
|
|
|
51
60
|
def __new__(cls, config_path=None, task=None, dump_path=None,
|
|
52
61
|
level=None, step=None, opt=None):
|
|
@@ -62,61 +71,33 @@ class PrecisionDebugger:
|
|
|
62
71
|
level=None, step=None):
|
|
63
72
|
if self.initialized:
|
|
64
73
|
return
|
|
65
|
-
self.initialized = True
|
|
66
|
-
|
|
67
74
|
set_register_backward_hook_functions()
|
|
75
|
+
super().__init__(config_path, task, dump_path, level, step)
|
|
68
76
|
|
|
69
|
-
if not config_path:
|
|
70
|
-
config_path = os.path.join(os.path.dirname(__file__), "../../config.json")
|
|
71
|
-
|
|
72
|
-
config_params = ConfigParameters(config_path, task, dump_path, level)
|
|
73
|
-
self.check_input_params(config_params)
|
|
74
|
-
|
|
75
|
-
common_config, task_config = parse_json_config(config_path)
|
|
76
|
-
common_config.task = task if task else common_config.task
|
|
77
|
-
self.task = common_config.task
|
|
78
77
|
if self.task == Const.GRAD_PROBE:
|
|
79
|
-
self.gm = GradientMonitor(common_config, task_config)
|
|
78
|
+
self.gm = GradientMonitor(self.common_config, self.task_config)
|
|
80
79
|
return
|
|
81
|
-
common_config.
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
common_config.dump_path = dump_path if dump_path else common_config.dump_path
|
|
85
|
-
self.config = DebuggerConfig(common_config, task_config)
|
|
80
|
+
self.common_config.level = level if level else self.common_config.level
|
|
81
|
+
self.common_config.dump_path = dump_path if dump_path else self.common_config.dump_path
|
|
82
|
+
self.config = DebuggerConfig(self.common_config, self.task_config)
|
|
86
83
|
|
|
87
|
-
if _msprobe_c:
|
|
84
|
+
if self._need_msprobe_c() and _msprobe_c:
|
|
85
|
+
os.environ["MS_HOOK_ENABLE"] = "on"
|
|
88
86
|
_msprobe_c._PrecisionDebugger(framework="MindSpore", config_path=config_path)
|
|
89
87
|
|
|
90
88
|
self.config.execution_mode = self._get_execution_mode()
|
|
91
89
|
if self._need_service():
|
|
92
90
|
self.config.check_config_with_l2()
|
|
93
|
-
self.service =
|
|
91
|
+
self.service = MindsporeService(self.config)
|
|
94
92
|
|
|
95
93
|
Runtime.step_count = 0
|
|
96
94
|
Runtime.is_running = False
|
|
95
|
+
if enable_dynamic_kbyk_dump:
|
|
96
|
+
_dump_set_dynamic()
|
|
97
97
|
|
|
98
98
|
@staticmethod
|
|
99
|
-
def
|
|
100
|
-
|
|
101
|
-
if not isinstance(args.config_path, str):
|
|
102
|
-
raise MsprobeException(
|
|
103
|
-
MsprobeException.INVALID_PARAM_ERROR, f"config_path must be a string")
|
|
104
|
-
file_checker = FileChecker(
|
|
105
|
-
file_path=args.config_path, path_type=FileCheckConst.FILE, file_type=FileCheckConst.JSON_SUFFIX)
|
|
106
|
-
file_checker.common_check()
|
|
107
|
-
|
|
108
|
-
if args.task is not None and args.task not in Const.TASK_LIST:
|
|
109
|
-
raise MsprobeException(
|
|
110
|
-
MsprobeException.INVALID_PARAM_ERROR, f"task must be one of {Const.TASK_LIST}")
|
|
111
|
-
|
|
112
|
-
if args.dump_path is not None:
|
|
113
|
-
if not isinstance(args.dump_path, str):
|
|
114
|
-
raise MsprobeException(
|
|
115
|
-
MsprobeException.INVALID_PARAM_ERROR, f"dump_path must be a string")
|
|
116
|
-
|
|
117
|
-
if args.level is not None and args.level not in Const.LEVEL_LIST:
|
|
118
|
-
raise MsprobeException(
|
|
119
|
-
MsprobeException.INVALID_PARAM_ERROR, f"level must be one of {Const.LEVEL_LIST}")
|
|
99
|
+
def get_task_config(task, json_config):
|
|
100
|
+
return parse_task_config(task, json_config)
|
|
120
101
|
|
|
121
102
|
@staticmethod
|
|
122
103
|
def _get_execution_mode():
|
|
@@ -137,7 +118,7 @@ class PrecisionDebugger:
|
|
|
137
118
|
return MsConst.PYNATIVE_MODE
|
|
138
119
|
|
|
139
120
|
@staticmethod
|
|
140
|
-
def _is_graph_dump(config):
|
|
121
|
+
def _is_graph_dump(config: DebuggerConfig):
|
|
141
122
|
if config.level != MsConst.KERNEL:
|
|
142
123
|
return False
|
|
143
124
|
if not config.list:
|
|
@@ -147,63 +128,68 @@ class PrecisionDebugger:
|
|
|
147
128
|
return is_graph
|
|
148
129
|
|
|
149
130
|
@classmethod
|
|
150
|
-
def start(cls, model=None):
|
|
151
|
-
instance = cls.
|
|
152
|
-
if
|
|
153
|
-
raise Exception(MsgConst.NOT_CREATED_INSTANCE)
|
|
154
|
-
if _msprobe_c:
|
|
155
|
-
_msprobe_c._PrecisionDebugger().start()
|
|
156
|
-
if instance.task in PrecisionDebugger.task_not_need_service:
|
|
131
|
+
def start(cls, model=None, token_range=None):
|
|
132
|
+
instance = cls.get_instance()
|
|
133
|
+
if instance is None:
|
|
157
134
|
return
|
|
158
|
-
|
|
135
|
+
if cls._need_msprobe_c() and _msprobe_c:
|
|
136
|
+
_msprobe_c._PrecisionDebugger().start()
|
|
137
|
+
check_token_range(token_range)
|
|
159
138
|
instance.config.execution_mode = cls._get_execution_mode()
|
|
160
139
|
if cls._need_service():
|
|
161
140
|
if not instance.service:
|
|
162
|
-
instance.service =
|
|
163
|
-
instance.
|
|
141
|
+
instance.service = MindsporeService(instance.config)
|
|
142
|
+
instance.config.check_model(model, token_range)
|
|
143
|
+
instance.service.start(model, token_range)
|
|
164
144
|
else:
|
|
165
145
|
if not instance.first_start:
|
|
166
|
-
|
|
167
|
-
handler = TaskHandlerFactory.create(instance.config)
|
|
146
|
+
get_api_register().restore_all_api()
|
|
147
|
+
handler = TaskHandlerFactory.create(instance.config, model)
|
|
168
148
|
handler.handle()
|
|
169
|
-
|
|
149
|
+
if enable_dynamic_kbyk_dump:
|
|
150
|
+
_set_init_iter(0)
|
|
151
|
+
if enable_dynamic_kbyk_dump:
|
|
152
|
+
is_valid_rank = (not instance.config.rank or Runtime.rank_id in instance.config.rank)
|
|
153
|
+
is_valid_step = (not instance.config.step or Runtime.step_count in instance.config.step)
|
|
154
|
+
if is_valid_rank and is_valid_step:
|
|
155
|
+
_dump_start()
|
|
156
|
+
Runtime.is_running = True
|
|
170
157
|
instance.first_start = True
|
|
171
|
-
Runtime.is_running = True
|
|
172
|
-
|
|
173
|
-
@classmethod
|
|
174
|
-
def forward_backward_dump_end(cls):
|
|
175
|
-
instance = cls._instance
|
|
176
|
-
instance.stop()
|
|
177
158
|
|
|
178
159
|
@classmethod
|
|
179
160
|
def stop(cls):
|
|
180
|
-
instance = cls.
|
|
181
|
-
if
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
_msprobe_c._PrecisionDebugger().stop()
|
|
161
|
+
instance = cls.get_instance()
|
|
162
|
+
if instance is None:
|
|
163
|
+
return
|
|
164
|
+
|
|
185
165
|
if instance.task == Const.GRAD_PROBE:
|
|
186
166
|
instance.gm.stop()
|
|
187
|
-
if instance.task in PrecisionDebugger.task_not_need_service:
|
|
188
|
-
return
|
|
189
167
|
if instance.service:
|
|
190
168
|
instance.service.stop()
|
|
191
|
-
|
|
192
|
-
|
|
169
|
+
else:
|
|
170
|
+
Runtime.is_running = False
|
|
171
|
+
if enable_dynamic_kbyk_dump:
|
|
172
|
+
_dump_stop()
|
|
173
|
+
if cls._need_msprobe_c() and _msprobe_c:
|
|
174
|
+
_msprobe_c._PrecisionDebugger().stop()
|
|
175
|
+
|
|
193
176
|
@classmethod
|
|
194
177
|
def step(cls):
|
|
195
|
-
instance = cls.
|
|
196
|
-
if
|
|
197
|
-
raise Exception(MsgConst.NOT_CREATED_INSTANCE)
|
|
198
|
-
if _msprobe_c:
|
|
199
|
-
_msprobe_c._PrecisionDebugger().step()
|
|
200
|
-
if instance.task in PrecisionDebugger.task_not_need_service:
|
|
178
|
+
instance = cls.get_instance()
|
|
179
|
+
if instance is None:
|
|
201
180
|
return
|
|
181
|
+
|
|
202
182
|
if instance.service:
|
|
203
183
|
instance.service.step()
|
|
184
|
+
if is_graph_mode_cell_dump_allowed(instance.config):
|
|
185
|
+
GraphModeCellDump.step()
|
|
186
|
+
if enable_dynamic_kbyk_dump:
|
|
187
|
+
_dump_step(1)
|
|
188
|
+
if cls._need_msprobe_c() and _msprobe_c:
|
|
189
|
+
_msprobe_c._PrecisionDebugger().step()
|
|
190
|
+
|
|
204
191
|
HOOKCell.cell_count = defaultdict(int)
|
|
205
192
|
CellProcessor.reset_cell_stats()
|
|
206
|
-
|
|
207
193
|
Runtime.step_count += 1
|
|
208
194
|
|
|
209
195
|
@classmethod
|
|
@@ -230,7 +216,7 @@ class PrecisionDebugger:
|
|
|
230
216
|
instance.config.execution_mode = cls._get_execution_mode()
|
|
231
217
|
if cls._need_service():
|
|
232
218
|
if not instance.service:
|
|
233
|
-
instance.service =
|
|
219
|
+
instance.service = MindsporeService(instance.config)
|
|
234
220
|
instance.service.save(variable, name, save_backward)
|
|
235
221
|
|
|
236
222
|
@classmethod
|
|
@@ -241,4 +227,11 @@ class PrecisionDebugger:
|
|
|
241
227
|
if instance.config.execution_mode != MsConst.PYNATIVE_MODE:
|
|
242
228
|
return False
|
|
243
229
|
else:
|
|
244
|
-
return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
|
|
230
|
+
return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def _need_msprobe_c(cls):
|
|
234
|
+
instance = cls._instance
|
|
235
|
+
if not instance:
|
|
236
|
+
raise Exception(MsgConst.NOT_CREATED_INSTANCE)
|
|
237
|
+
return instance.config.level_ori == Const.LEVEL_L2
|