mindstudio-probe 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/METADATA +4 -2
- {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/RECORD +204 -152
- msprobe/README.md +32 -1
- msprobe/core/__init__.py +17 -0
- msprobe/core/common/const.py +120 -21
- msprobe/core/common/exceptions.py +2 -2
- msprobe/core/common/file_utils.py +279 -50
- msprobe/core/common/framework_adapter.py +169 -0
- msprobe/core/common/global_lock.py +86 -0
- msprobe/core/common/runtime.py +25 -0
- msprobe/core/common/utils.py +136 -45
- msprobe/core/common_config.py +7 -0
- msprobe/core/compare/acc_compare.py +646 -428
- msprobe/core/compare/check.py +36 -103
- msprobe/core/compare/compare_cli.py +4 -0
- msprobe/core/compare/config.py +72 -0
- msprobe/core/compare/highlight.py +215 -215
- msprobe/core/compare/layer_mapping/layer_mapping.py +2 -0
- msprobe/core/compare/merge_result/merge_result.py +4 -4
- msprobe/core/compare/multiprocessing_compute.py +223 -110
- msprobe/core/compare/npy_compare.py +2 -4
- msprobe/core/compare/utils.py +214 -244
- msprobe/core/config_check/__init__.py +17 -0
- msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
- msprobe/core/config_check/checkers/base_checker.py +60 -0
- msprobe/core/config_check/checkers/dataset_checker.py +138 -0
- msprobe/core/config_check/checkers/env_args_checker.py +96 -0
- msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
- msprobe/core/config_check/checkers/pip_checker.py +90 -0
- msprobe/core/config_check/checkers/random_checker.py +367 -0
- msprobe/core/config_check/checkers/weights_checker.py +147 -0
- msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
- msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
- msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
- msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
- msprobe/core/config_check/config_check_cli.py +51 -0
- msprobe/core/config_check/config_checker.py +100 -0
- msprobe/{mindspore/runtime.py → core/config_check/resource/dependency.yaml} +7 -4
- msprobe/core/config_check/resource/env.yaml +57 -0
- msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
- msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
- msprobe/core/config_check/utils/utils.py +107 -0
- msprobe/core/data_dump/api_registry.py +67 -4
- msprobe/core/data_dump/data_collector.py +170 -89
- msprobe/core/data_dump/data_processor/base.py +72 -51
- msprobe/core/data_dump/data_processor/mindspore_processor.py +109 -55
- msprobe/core/data_dump/data_processor/pytorch_processor.py +90 -82
- msprobe/core/data_dump/json_writer.py +143 -27
- msprobe/core/debugger/precision_debugger.py +144 -0
- msprobe/core/grad_probe/constant.py +1 -1
- msprobe/core/grad_probe/grad_compare.py +1 -1
- msprobe/core/grad_probe/utils.py +1 -1
- msprobe/core/hook_manager.py +242 -0
- msprobe/core/monitor/anomaly_processor.py +384 -0
- msprobe/core/service.py +357 -0
- msprobe/core/single_save/__init__.py +0 -0
- msprobe/core/single_save/single_comparator.py +243 -0
- msprobe/core/single_save/single_saver.py +146 -0
- msprobe/docs/01.installation.md +6 -5
- msprobe/docs/02.config_introduction.md +79 -22
- msprobe/docs/03.config_examples.md +1 -0
- msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
- msprobe/docs/05.data_dump_PyTorch.md +118 -49
- msprobe/docs/06.data_dump_MindSpore.md +167 -20
- msprobe/docs/07.accuracy_checker_PyTorch.md +2 -2
- msprobe/docs/08.accuracy_checker_online_PyTorch.md +69 -9
- msprobe/docs/09.accuracy_checker_MindSpore.md +18 -6
- msprobe/docs/10.accuracy_compare_PyTorch.md +212 -74
- msprobe/docs/11.accuracy_compare_MindSpore.md +87 -37
- msprobe/docs/12.overflow_check_PyTorch.md +2 -2
- msprobe/docs/13.overflow_check_MindSpore.md +2 -2
- msprobe/docs/14.data_parse_PyTorch.md +3 -3
- msprobe/docs/17.grad_probe.md +2 -1
- msprobe/docs/18.online_dispatch.md +2 -2
- msprobe/docs/19.monitor.md +90 -44
- msprobe/docs/21.visualization_PyTorch.md +68 -15
- msprobe/docs/22.visualization_MindSpore.md +71 -18
- msprobe/docs/25.tool_function_introduction.md +23 -22
- msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
- msprobe/docs/27.dump_json_instruction.md +1 -1
- msprobe/docs/28.debugger_save_instruction.md +111 -20
- msprobe/docs/29.data_dump_MSAdapter.md +2 -2
- msprobe/docs/30.overflow_check_MSAdapter.md +2 -2
- msprobe/docs/31.config_check.md +95 -0
- msprobe/docs/32.ckpt_compare.md +69 -0
- msprobe/docs/33.generate_operator_MindSpore.md +181 -0
- msprobe/docs/34.RL_collect.md +92 -0
- msprobe/docs/35.nan_analyze.md +72 -0
- msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
- msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
- msprobe/docs/img/compare_result.png +0 -0
- msprobe/docs/img/save_compare_result_sample.png +0 -0
- msprobe/docs/img/visualization/proxy.png +0 -0
- msprobe/mindspore/__init__.py +1 -2
- msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +150 -58
- msprobe/mindspore/api_accuracy_checker/api_runner.py +7 -3
- msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +47 -69
- msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
- msprobe/mindspore/api_accuracy_checker/compute_element.py +0 -1
- msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -2
- msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +460 -0
- msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
- msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +9 -0
- msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
- msprobe/mindspore/cell_processor.py +204 -33
- msprobe/mindspore/code_mapping/graph_parser.py +4 -21
- msprobe/mindspore/common/const.py +17 -7
- msprobe/mindspore/common/utils.py +128 -11
- msprobe/mindspore/compare/common_dir_compare.py +382 -0
- msprobe/mindspore/compare/distributed_compare.py +2 -26
- msprobe/mindspore/compare/ms_compare.py +17 -405
- msprobe/mindspore/compare/ms_graph_compare.py +14 -5
- msprobe/mindspore/compare/utils.py +37 -0
- msprobe/mindspore/debugger/debugger_config.py +53 -3
- msprobe/mindspore/debugger/precision_debugger.py +72 -91
- msprobe/mindspore/dump/cell_dump_process.py +877 -0
- msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +864 -0
- msprobe/mindspore/dump/dump_tool_factory.py +13 -5
- msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
- msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
- msprobe/mindspore/dump/hook_cell/api_register.py +40 -6
- msprobe/mindspore/dump/hook_cell/hook_cell.py +18 -7
- msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
- msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
- msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +18 -0
- msprobe/mindspore/dump/jit_dump.py +21 -18
- msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
- msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
- msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -15
- msprobe/mindspore/free_benchmark/api_pynative_self_check.py +12 -6
- msprobe/mindspore/free_benchmark/common/utils.py +1 -1
- msprobe/mindspore/grad_probe/global_context.py +7 -2
- msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
- msprobe/mindspore/mindspore_service.py +114 -0
- msprobe/mindspore/monitor/common_func.py +52 -0
- msprobe/mindspore/monitor/data_writers.py +237 -0
- msprobe/mindspore/monitor/features.py +20 -7
- msprobe/mindspore/monitor/module_hook.py +281 -209
- msprobe/mindspore/monitor/optimizer_collect.py +334 -0
- msprobe/mindspore/monitor/utils.py +25 -5
- msprobe/mindspore/ms_config.py +16 -15
- msprobe/mindspore/task_handler_factory.py +5 -2
- msprobe/msprobe.py +19 -0
- msprobe/nan_analyze/__init__.py +14 -0
- msprobe/nan_analyze/analyzer.py +255 -0
- msprobe/nan_analyze/graph.py +189 -0
- msprobe/nan_analyze/utils.py +211 -0
- msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
- msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
- msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +20 -20
- msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +4 -7
- msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +204 -2
- msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +12 -11
- msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +1 -0
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +8 -5
- msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +2 -3
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
- msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +156 -0
- msprobe/pytorch/attl_manager.py +65 -0
- msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
- msprobe/pytorch/common/utils.py +26 -14
- msprobe/pytorch/compare/distributed_compare.py +4 -36
- msprobe/pytorch/compare/pt_compare.py +13 -84
- msprobe/pytorch/compare/utils.py +47 -0
- msprobe/pytorch/debugger/debugger_config.py +34 -17
- msprobe/pytorch/debugger/precision_debugger.py +66 -118
- msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
- msprobe/pytorch/dump/module_dump/module_dump.py +11 -58
- msprobe/pytorch/dump/module_dump/module_processer.py +143 -113
- msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
- msprobe/pytorch/hook_module/api_register.py +29 -5
- msprobe/pytorch/hook_module/hook_module.py +9 -18
- msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
- msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
- msprobe/pytorch/hook_module/support_wrap_ops.yaml +22 -1
- msprobe/pytorch/hook_module/utils.py +28 -2
- msprobe/pytorch/monitor/csv2tb.py +6 -2
- msprobe/pytorch/monitor/data_writers.py +259 -0
- msprobe/pytorch/monitor/module_hook.py +227 -158
- msprobe/pytorch/monitor/module_metric.py +14 -0
- msprobe/pytorch/monitor/optimizer_collect.py +242 -270
- msprobe/pytorch/monitor/utils.py +16 -3
- msprobe/pytorch/online_dispatch/dispatch.py +4 -2
- msprobe/pytorch/online_dispatch/dump_compare.py +5 -2
- msprobe/pytorch/parse_tool/lib/utils.py +3 -3
- msprobe/pytorch/pt_config.py +8 -7
- msprobe/pytorch/pytorch_service.py +73 -0
- msprobe/visualization/builder/graph_builder.py +33 -13
- msprobe/visualization/builder/msprobe_adapter.py +24 -11
- msprobe/visualization/compare/graph_comparator.py +53 -45
- msprobe/visualization/compare/mode_adapter.py +31 -1
- msprobe/visualization/graph/base_node.py +3 -3
- msprobe/visualization/graph/graph.py +2 -2
- msprobe/visualization/graph_service.py +250 -103
- msprobe/visualization/utils.py +27 -11
- msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -106
- msprobe/mindspore/monitor/anomaly_detect.py +0 -404
- msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
- msprobe/mindspore/service.py +0 -549
- msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
- msprobe/pytorch/monitor/anomaly_detect.py +0 -410
- msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
- msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
- msprobe/pytorch/service.py +0 -473
- {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/LICENSE +0 -0
- {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/WHEEL +0 -0
- {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/entry_points.txt +0 -0
- {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/top_level.txt +0 -0
- /msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
- /msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
- /msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0
|
@@ -19,22 +19,33 @@ from collections import defaultdict, namedtuple
|
|
|
19
19
|
import mindspore as ms
|
|
20
20
|
from mindspore._c_expression import MSContext
|
|
21
21
|
|
|
22
|
-
from msprobe.core.common.const import Const,
|
|
23
|
-
from msprobe.core.common.
|
|
24
|
-
from msprobe.core.common.
|
|
25
|
-
from msprobe.core.
|
|
22
|
+
from msprobe.core.common.const import Const, MsgConst
|
|
23
|
+
from msprobe.core.common.utils import check_token_range
|
|
24
|
+
from msprobe.core.common.runtime import Runtime
|
|
25
|
+
from msprobe.core.debugger.precision_debugger import BasePrecisionDebugger
|
|
26
26
|
from msprobe.mindspore.cell_processor import CellProcessor
|
|
27
27
|
from msprobe.mindspore.common.const import Const as MsConst
|
|
28
|
-
from msprobe.mindspore.common.utils import
|
|
28
|
+
from msprobe.mindspore.common.utils import (
|
|
29
|
+
set_register_backward_hook_functions,
|
|
30
|
+
check_save_param,
|
|
31
|
+
is_graph_mode_cell_dump_allowed
|
|
32
|
+
)
|
|
29
33
|
from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
|
|
34
|
+
from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump
|
|
30
35
|
from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
|
|
31
36
|
from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
|
|
32
37
|
from msprobe.mindspore.grad_probe.grad_monitor import GradientMonitor
|
|
33
|
-
from msprobe.mindspore.ms_config import
|
|
34
|
-
from msprobe.mindspore.
|
|
35
|
-
from msprobe.mindspore.service import Service
|
|
38
|
+
from msprobe.mindspore.ms_config import parse_task_config
|
|
39
|
+
from msprobe.mindspore.mindspore_service import MindsporeService
|
|
36
40
|
from msprobe.mindspore.task_handler_factory import TaskHandlerFactory
|
|
37
41
|
|
|
42
|
+
try:
|
|
43
|
+
from mindspore._c_expression import _dump_start, _dump_stop, _dump_step, _set_init_iter, _dump_set_dynamic
|
|
44
|
+
except ImportError:
|
|
45
|
+
enable_dynamic_kbyk_dump = False
|
|
46
|
+
else:
|
|
47
|
+
enable_dynamic_kbyk_dump = True
|
|
48
|
+
|
|
38
49
|
try:
|
|
39
50
|
from msprobe.lib import _msprobe_c
|
|
40
51
|
except ImportError:
|
|
@@ -44,9 +55,7 @@ except ImportError:
|
|
|
44
55
|
ConfigParameters = namedtuple("ConfigParameters", ["config_path", "task", "dump_path", "level"])
|
|
45
56
|
|
|
46
57
|
|
|
47
|
-
class PrecisionDebugger:
|
|
48
|
-
_instance = None
|
|
49
|
-
task_not_need_service = [Const.GRAD_PROBE]
|
|
58
|
+
class PrecisionDebugger(BasePrecisionDebugger):
|
|
50
59
|
|
|
51
60
|
def __new__(cls, config_path=None, task=None, dump_path=None,
|
|
52
61
|
level=None, step=None, opt=None):
|
|
@@ -62,61 +71,33 @@ class PrecisionDebugger:
|
|
|
62
71
|
level=None, step=None):
|
|
63
72
|
if self.initialized:
|
|
64
73
|
return
|
|
65
|
-
self.initialized = True
|
|
66
|
-
|
|
67
74
|
set_register_backward_hook_functions()
|
|
75
|
+
super().__init__(config_path, task, dump_path, level, step)
|
|
68
76
|
|
|
69
|
-
if not config_path:
|
|
70
|
-
config_path = os.path.join(os.path.dirname(__file__), "../../config.json")
|
|
71
|
-
|
|
72
|
-
config_params = ConfigParameters(config_path, task, dump_path, level)
|
|
73
|
-
self.check_input_params(config_params)
|
|
74
|
-
|
|
75
|
-
common_config, task_config = parse_json_config(config_path)
|
|
76
|
-
common_config.task = task if task else common_config.task
|
|
77
|
-
self.task = common_config.task
|
|
78
77
|
if self.task == Const.GRAD_PROBE:
|
|
79
|
-
self.gm = GradientMonitor(common_config, task_config)
|
|
78
|
+
self.gm = GradientMonitor(self.common_config, self.task_config)
|
|
80
79
|
return
|
|
81
|
-
common_config.
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
common_config.dump_path = dump_path if dump_path else common_config.dump_path
|
|
85
|
-
self.config = DebuggerConfig(common_config, task_config)
|
|
80
|
+
self.common_config.level = level if level else self.common_config.level
|
|
81
|
+
self.common_config.dump_path = dump_path if dump_path else self.common_config.dump_path
|
|
82
|
+
self.config = DebuggerConfig(self.common_config, self.task_config)
|
|
86
83
|
|
|
87
84
|
if self._need_msprobe_c() and _msprobe_c:
|
|
85
|
+
os.environ["MS_HOOK_ENABLE"] = "on"
|
|
88
86
|
_msprobe_c._PrecisionDebugger(framework="MindSpore", config_path=config_path)
|
|
89
87
|
|
|
90
88
|
self.config.execution_mode = self._get_execution_mode()
|
|
91
89
|
if self._need_service():
|
|
92
90
|
self.config.check_config_with_l2()
|
|
93
|
-
self.service =
|
|
91
|
+
self.service = MindsporeService(self.config)
|
|
94
92
|
|
|
95
93
|
Runtime.step_count = 0
|
|
96
94
|
Runtime.is_running = False
|
|
95
|
+
if enable_dynamic_kbyk_dump:
|
|
96
|
+
_dump_set_dynamic()
|
|
97
97
|
|
|
98
98
|
@staticmethod
|
|
99
|
-
def
|
|
100
|
-
|
|
101
|
-
if not isinstance(args.config_path, str):
|
|
102
|
-
raise MsprobeException(
|
|
103
|
-
MsprobeException.INVALID_PARAM_ERROR, f"config_path must be a string")
|
|
104
|
-
file_checker = FileChecker(
|
|
105
|
-
file_path=args.config_path, path_type=FileCheckConst.FILE, file_type=FileCheckConst.JSON_SUFFIX)
|
|
106
|
-
file_checker.common_check()
|
|
107
|
-
|
|
108
|
-
if args.task is not None and args.task not in Const.TASK_LIST:
|
|
109
|
-
raise MsprobeException(
|
|
110
|
-
MsprobeException.INVALID_PARAM_ERROR, f"task must be one of {Const.TASK_LIST}")
|
|
111
|
-
|
|
112
|
-
if args.dump_path is not None:
|
|
113
|
-
if not isinstance(args.dump_path, str):
|
|
114
|
-
raise MsprobeException(
|
|
115
|
-
MsprobeException.INVALID_PARAM_ERROR, f"dump_path must be a string")
|
|
116
|
-
|
|
117
|
-
if args.level is not None and args.level not in Const.LEVEL_LIST:
|
|
118
|
-
raise MsprobeException(
|
|
119
|
-
MsprobeException.INVALID_PARAM_ERROR, f"level must be one of {Const.LEVEL_LIST}")
|
|
99
|
+
def _get_task_config(task, json_config):
|
|
100
|
+
return parse_task_config(task, json_config)
|
|
120
101
|
|
|
121
102
|
@staticmethod
|
|
122
103
|
def _get_execution_mode():
|
|
@@ -137,7 +118,7 @@ class PrecisionDebugger:
|
|
|
137
118
|
return MsConst.PYNATIVE_MODE
|
|
138
119
|
|
|
139
120
|
@staticmethod
|
|
140
|
-
def _is_graph_dump(config):
|
|
121
|
+
def _is_graph_dump(config: DebuggerConfig):
|
|
141
122
|
if config.level != MsConst.KERNEL:
|
|
142
123
|
return False
|
|
143
124
|
if not config.list:
|
|
@@ -147,59 +128,68 @@ class PrecisionDebugger:
|
|
|
147
128
|
return is_graph
|
|
148
129
|
|
|
149
130
|
@classmethod
|
|
150
|
-
def start(cls, model=None):
|
|
151
|
-
instance = cls.
|
|
152
|
-
if
|
|
153
|
-
|
|
131
|
+
def start(cls, model=None, token_range=None):
|
|
132
|
+
instance = cls._get_instance()
|
|
133
|
+
if instance is None:
|
|
134
|
+
return
|
|
154
135
|
if cls._need_msprobe_c() and _msprobe_c:
|
|
155
136
|
_msprobe_c._PrecisionDebugger().start()
|
|
156
|
-
|
|
157
|
-
return
|
|
158
|
-
|
|
137
|
+
check_token_range(token_range)
|
|
159
138
|
instance.config.execution_mode = cls._get_execution_mode()
|
|
160
139
|
if cls._need_service():
|
|
161
140
|
if not instance.service:
|
|
162
|
-
instance.service =
|
|
163
|
-
instance.
|
|
141
|
+
instance.service = MindsporeService(instance.config)
|
|
142
|
+
instance.config.check_model(model, token_range)
|
|
143
|
+
instance.service.start(model, token_range)
|
|
164
144
|
else:
|
|
165
145
|
if not instance.first_start:
|
|
166
146
|
get_api_register().restore_all_api()
|
|
167
|
-
handler = TaskHandlerFactory.create(instance.config)
|
|
147
|
+
handler = TaskHandlerFactory.create(instance.config, model)
|
|
168
148
|
handler.handle()
|
|
169
|
-
|
|
149
|
+
if enable_dynamic_kbyk_dump:
|
|
150
|
+
_set_init_iter(0)
|
|
151
|
+
if enable_dynamic_kbyk_dump:
|
|
152
|
+
is_valid_rank = (not instance.config.rank or Runtime.rank_id in instance.config.rank)
|
|
153
|
+
is_valid_step = (not instance.config.step or Runtime.step_count in instance.config.step)
|
|
154
|
+
if is_valid_rank and is_valid_step:
|
|
155
|
+
_dump_start()
|
|
156
|
+
Runtime.is_running = True
|
|
170
157
|
instance.first_start = True
|
|
171
|
-
Runtime.is_running = True
|
|
172
|
-
|
|
173
|
-
@classmethod
|
|
174
|
-
def forward_backward_dump_end(cls):
|
|
175
|
-
instance = cls._instance
|
|
176
|
-
instance.stop()
|
|
177
158
|
|
|
178
159
|
@classmethod
|
|
179
160
|
def stop(cls):
|
|
180
|
-
instance = cls.
|
|
181
|
-
if
|
|
182
|
-
|
|
161
|
+
instance = cls._get_instance()
|
|
162
|
+
if instance is None:
|
|
163
|
+
return
|
|
164
|
+
|
|
183
165
|
if instance.task == Const.GRAD_PROBE:
|
|
184
166
|
instance.gm.stop()
|
|
185
|
-
if instance.task in PrecisionDebugger.task_not_need_service:
|
|
186
|
-
return
|
|
187
167
|
if instance.service:
|
|
188
168
|
instance.service.stop()
|
|
189
|
-
|
|
190
|
-
|
|
169
|
+
else:
|
|
170
|
+
Runtime.is_running = False
|
|
171
|
+
if enable_dynamic_kbyk_dump:
|
|
172
|
+
_dump_stop()
|
|
173
|
+
if cls._need_msprobe_c() and _msprobe_c:
|
|
174
|
+
_msprobe_c._PrecisionDebugger().stop()
|
|
175
|
+
|
|
191
176
|
@classmethod
|
|
192
177
|
def step(cls):
|
|
193
|
-
instance = cls.
|
|
194
|
-
if
|
|
195
|
-
raise Exception(MsgConst.NOT_CREATED_INSTANCE)
|
|
196
|
-
if instance.task in PrecisionDebugger.task_not_need_service:
|
|
178
|
+
instance = cls._get_instance()
|
|
179
|
+
if instance is None:
|
|
197
180
|
return
|
|
181
|
+
|
|
198
182
|
if instance.service:
|
|
199
183
|
instance.service.step()
|
|
184
|
+
if is_graph_mode_cell_dump_allowed(instance.config):
|
|
185
|
+
GraphModeCellDump.step()
|
|
186
|
+
if enable_dynamic_kbyk_dump:
|
|
187
|
+
_dump_step(1)
|
|
188
|
+
if cls._need_msprobe_c() and _msprobe_c:
|
|
189
|
+
_msprobe_c._PrecisionDebugger().step()
|
|
190
|
+
|
|
200
191
|
HOOKCell.cell_count = defaultdict(int)
|
|
201
192
|
CellProcessor.reset_cell_stats()
|
|
202
|
-
|
|
203
193
|
Runtime.step_count += 1
|
|
204
194
|
|
|
205
195
|
@classmethod
|
|
@@ -226,18 +216,9 @@ class PrecisionDebugger:
|
|
|
226
216
|
instance.config.execution_mode = cls._get_execution_mode()
|
|
227
217
|
if cls._need_service():
|
|
228
218
|
if not instance.service:
|
|
229
|
-
instance.service =
|
|
219
|
+
instance.service = MindsporeService(instance.config)
|
|
230
220
|
instance.service.save(variable, name, save_backward)
|
|
231
221
|
|
|
232
|
-
@classmethod
|
|
233
|
-
def set_init_step(cls, step):
|
|
234
|
-
instance = cls._instance
|
|
235
|
-
if not instance:
|
|
236
|
-
raise Exception(MsgConst.NOT_CREATED_INSTANCE)
|
|
237
|
-
check_init_step(step)
|
|
238
|
-
instance.service.init_step = step
|
|
239
|
-
instance.service.loop = 0
|
|
240
|
-
|
|
241
222
|
@classmethod
|
|
242
223
|
def _need_service(cls):
|
|
243
224
|
instance = cls._instance
|
|
@@ -247,7 +228,7 @@ class PrecisionDebugger:
|
|
|
247
228
|
return False
|
|
248
229
|
else:
|
|
249
230
|
return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
|
|
250
|
-
|
|
231
|
+
|
|
251
232
|
@classmethod
|
|
252
233
|
def _need_msprobe_c(cls):
|
|
253
234
|
instance = cls._instance
|