mindstudio-probe 1.3.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/METADATA +4 -2
  2. {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/RECORD +204 -152
  3. msprobe/README.md +32 -1
  4. msprobe/core/__init__.py +17 -0
  5. msprobe/core/common/const.py +120 -21
  6. msprobe/core/common/exceptions.py +2 -2
  7. msprobe/core/common/file_utils.py +279 -50
  8. msprobe/core/common/framework_adapter.py +169 -0
  9. msprobe/core/common/global_lock.py +86 -0
  10. msprobe/core/common/runtime.py +25 -0
  11. msprobe/core/common/utils.py +136 -45
  12. msprobe/core/common_config.py +7 -0
  13. msprobe/core/compare/acc_compare.py +646 -428
  14. msprobe/core/compare/check.py +36 -103
  15. msprobe/core/compare/compare_cli.py +4 -0
  16. msprobe/core/compare/config.py +72 -0
  17. msprobe/core/compare/highlight.py +215 -215
  18. msprobe/core/compare/layer_mapping/layer_mapping.py +2 -0
  19. msprobe/core/compare/merge_result/merge_result.py +4 -4
  20. msprobe/core/compare/multiprocessing_compute.py +223 -110
  21. msprobe/core/compare/npy_compare.py +2 -4
  22. msprobe/core/compare/utils.py +214 -244
  23. msprobe/core/config_check/__init__.py +17 -0
  24. msprobe/{pytorch/dump/kernel_dump/kernel_config.py → core/config_check/checkers/__init__.py} +8 -16
  25. msprobe/core/config_check/checkers/base_checker.py +60 -0
  26. msprobe/core/config_check/checkers/dataset_checker.py +138 -0
  27. msprobe/core/config_check/checkers/env_args_checker.py +96 -0
  28. msprobe/core/config_check/checkers/hyperparameter_checker.py +170 -0
  29. msprobe/core/config_check/checkers/pip_checker.py +90 -0
  30. msprobe/core/config_check/checkers/random_checker.py +367 -0
  31. msprobe/core/config_check/checkers/weights_checker.py +147 -0
  32. msprobe/core/config_check/ckpt_compare/ckpt_comparator.py +74 -0
  33. msprobe/core/config_check/ckpt_compare/megatron_loader.py +302 -0
  34. msprobe/core/config_check/ckpt_compare/metrics.py +83 -0
  35. msprobe/core/config_check/ckpt_compare/name_mapping.yaml +12 -0
  36. msprobe/core/config_check/config_check_cli.py +51 -0
  37. msprobe/core/config_check/config_checker.py +100 -0
  38. msprobe/{mindspore/runtime.py → core/config_check/resource/dependency.yaml} +7 -4
  39. msprobe/core/config_check/resource/env.yaml +57 -0
  40. msprobe/core/config_check/resource/hyperparameter.yaml +21 -0
  41. msprobe/core/config_check/utils/hyperparameter_parser.py +115 -0
  42. msprobe/core/config_check/utils/utils.py +107 -0
  43. msprobe/core/data_dump/api_registry.py +67 -4
  44. msprobe/core/data_dump/data_collector.py +170 -89
  45. msprobe/core/data_dump/data_processor/base.py +72 -51
  46. msprobe/core/data_dump/data_processor/mindspore_processor.py +109 -55
  47. msprobe/core/data_dump/data_processor/pytorch_processor.py +90 -82
  48. msprobe/core/data_dump/json_writer.py +143 -27
  49. msprobe/core/debugger/precision_debugger.py +144 -0
  50. msprobe/core/grad_probe/constant.py +1 -1
  51. msprobe/core/grad_probe/grad_compare.py +1 -1
  52. msprobe/core/grad_probe/utils.py +1 -1
  53. msprobe/core/hook_manager.py +242 -0
  54. msprobe/core/monitor/anomaly_processor.py +384 -0
  55. msprobe/core/service.py +357 -0
  56. msprobe/core/single_save/__init__.py +0 -0
  57. msprobe/core/single_save/single_comparator.py +243 -0
  58. msprobe/core/single_save/single_saver.py +146 -0
  59. msprobe/docs/01.installation.md +6 -5
  60. msprobe/docs/02.config_introduction.md +79 -22
  61. msprobe/docs/03.config_examples.md +1 -0
  62. msprobe/docs/04.kernel_dump_PyTorch.md +1 -1
  63. msprobe/docs/05.data_dump_PyTorch.md +118 -49
  64. msprobe/docs/06.data_dump_MindSpore.md +167 -20
  65. msprobe/docs/07.accuracy_checker_PyTorch.md +2 -2
  66. msprobe/docs/08.accuracy_checker_online_PyTorch.md +69 -9
  67. msprobe/docs/09.accuracy_checker_MindSpore.md +18 -6
  68. msprobe/docs/10.accuracy_compare_PyTorch.md +212 -74
  69. msprobe/docs/11.accuracy_compare_MindSpore.md +87 -37
  70. msprobe/docs/12.overflow_check_PyTorch.md +2 -2
  71. msprobe/docs/13.overflow_check_MindSpore.md +2 -2
  72. msprobe/docs/14.data_parse_PyTorch.md +3 -3
  73. msprobe/docs/17.grad_probe.md +2 -1
  74. msprobe/docs/18.online_dispatch.md +2 -2
  75. msprobe/docs/19.monitor.md +90 -44
  76. msprobe/docs/21.visualization_PyTorch.md +68 -15
  77. msprobe/docs/22.visualization_MindSpore.md +71 -18
  78. msprobe/docs/25.tool_function_introduction.md +23 -22
  79. msprobe/docs/26.data_dump_PyTorch_baseline.md +14 -3
  80. msprobe/docs/27.dump_json_instruction.md +1 -1
  81. msprobe/docs/28.debugger_save_instruction.md +111 -20
  82. msprobe/docs/29.data_dump_MSAdapter.md +2 -2
  83. msprobe/docs/30.overflow_check_MSAdapter.md +2 -2
  84. msprobe/docs/31.config_check.md +95 -0
  85. msprobe/docs/32.ckpt_compare.md +69 -0
  86. msprobe/docs/33.generate_operator_MindSpore.md +181 -0
  87. msprobe/docs/34.RL_collect.md +92 -0
  88. msprobe/docs/35.nan_analyze.md +72 -0
  89. msprobe/docs/data_dump_MindSpore/data_dump_MindSpore_baseline.md +12 -1
  90. msprobe/docs/data_dump_MindSpore/dynamic_graph_quick_start_example.md +3 -1
  91. msprobe/docs/img/compare_result.png +0 -0
  92. msprobe/docs/img/save_compare_result_sample.png +0 -0
  93. msprobe/docs/img/visualization/proxy.png +0 -0
  94. msprobe/mindspore/__init__.py +1 -2
  95. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +150 -58
  96. msprobe/mindspore/api_accuracy_checker/api_runner.py +7 -3
  97. msprobe/mindspore/api_accuracy_checker/bench_functions/flash_attention_score.py +47 -69
  98. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +4 -0
  99. msprobe/mindspore/api_accuracy_checker/compute_element.py +0 -1
  100. msprobe/mindspore/api_accuracy_checker/data_manager.py +2 -2
  101. msprobe/mindspore/api_accuracy_checker/generate_op_script/op_generator.py +460 -0
  102. msprobe/mindspore/api_accuracy_checker/generate_op_script/operator_replication.template +2081 -0
  103. msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +9 -0
  104. msprobe/mindspore/api_accuracy_checker/torch_mindtorch_importer.py +2 -1
  105. msprobe/mindspore/cell_processor.py +204 -33
  106. msprobe/mindspore/code_mapping/graph_parser.py +4 -21
  107. msprobe/mindspore/common/const.py +17 -7
  108. msprobe/mindspore/common/utils.py +128 -11
  109. msprobe/mindspore/compare/common_dir_compare.py +382 -0
  110. msprobe/mindspore/compare/distributed_compare.py +2 -26
  111. msprobe/mindspore/compare/ms_compare.py +17 -405
  112. msprobe/mindspore/compare/ms_graph_compare.py +14 -5
  113. msprobe/mindspore/compare/utils.py +37 -0
  114. msprobe/mindspore/debugger/debugger_config.py +53 -3
  115. msprobe/mindspore/debugger/precision_debugger.py +72 -91
  116. msprobe/mindspore/dump/cell_dump_process.py +877 -0
  117. msprobe/mindspore/dump/cell_dump_with_insert_gradient.py +864 -0
  118. msprobe/mindspore/dump/dump_tool_factory.py +13 -5
  119. msprobe/mindspore/dump/graph_mode_cell_dump.py +139 -0
  120. msprobe/mindspore/dump/graph_tensor_dump.py +123 -0
  121. msprobe/mindspore/dump/hook_cell/api_register.py +40 -6
  122. msprobe/mindspore/dump/hook_cell/hook_cell.py +18 -7
  123. msprobe/mindspore/dump/hook_cell/ms_hook_manager.py +88 -0
  124. msprobe/mindspore/dump/hook_cell/primitive_hooks.py +8 -2
  125. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +18 -0
  126. msprobe/mindspore/dump/jit_dump.py +21 -18
  127. msprobe/mindspore/dump/kernel_kbyk_dump.py +6 -3
  128. msprobe/mindspore/dym_loader/hook_dynamic_loader.cpp +110 -0
  129. msprobe/mindspore/dym_loader/hook_dynamic_loader.h +15 -15
  130. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +12 -6
  131. msprobe/mindspore/free_benchmark/common/utils.py +1 -1
  132. msprobe/mindspore/grad_probe/global_context.py +7 -2
  133. msprobe/mindspore/grad_probe/grad_stat_csv.py +3 -2
  134. msprobe/mindspore/mindspore_service.py +114 -0
  135. msprobe/mindspore/monitor/common_func.py +52 -0
  136. msprobe/mindspore/monitor/data_writers.py +237 -0
  137. msprobe/mindspore/monitor/features.py +20 -7
  138. msprobe/mindspore/monitor/module_hook.py +281 -209
  139. msprobe/mindspore/monitor/optimizer_collect.py +334 -0
  140. msprobe/mindspore/monitor/utils.py +25 -5
  141. msprobe/mindspore/ms_config.py +16 -15
  142. msprobe/mindspore/task_handler_factory.py +5 -2
  143. msprobe/msprobe.py +19 -0
  144. msprobe/nan_analyze/__init__.py +14 -0
  145. msprobe/nan_analyze/analyzer.py +255 -0
  146. msprobe/nan_analyze/graph.py +189 -0
  147. msprobe/nan_analyze/utils.py +211 -0
  148. msprobe/pytorch/api_accuracy_checker/common/config.py +2 -2
  149. msprobe/pytorch/api_accuracy_checker/compare/compare.py +36 -34
  150. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +20 -20
  151. msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +4 -7
  152. msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +204 -2
  153. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +12 -11
  154. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +1 -0
  155. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +8 -5
  156. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +2 -3
  157. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +29 -13
  158. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +12 -2
  159. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +45 -31
  160. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +156 -0
  161. msprobe/pytorch/attl_manager.py +65 -0
  162. msprobe/pytorch/bench_functions/npu_fusion_attention.py +27 -0
  163. msprobe/pytorch/common/utils.py +26 -14
  164. msprobe/pytorch/compare/distributed_compare.py +4 -36
  165. msprobe/pytorch/compare/pt_compare.py +13 -84
  166. msprobe/pytorch/compare/utils.py +47 -0
  167. msprobe/pytorch/debugger/debugger_config.py +34 -17
  168. msprobe/pytorch/debugger/precision_debugger.py +66 -118
  169. msprobe/pytorch/dump/module_dump/hook_wrapper.py +93 -0
  170. msprobe/pytorch/dump/module_dump/module_dump.py +11 -58
  171. msprobe/pytorch/dump/module_dump/module_processer.py +143 -113
  172. msprobe/pytorch/grad_probe/grad_stat_csv.py +3 -2
  173. msprobe/pytorch/hook_module/api_register.py +29 -5
  174. msprobe/pytorch/hook_module/hook_module.py +9 -18
  175. msprobe/pytorch/hook_module/jit_script_wrapper.py +33 -0
  176. msprobe/pytorch/hook_module/pt_hook_manager.py +68 -0
  177. msprobe/pytorch/hook_module/support_wrap_ops.yaml +22 -1
  178. msprobe/pytorch/hook_module/utils.py +28 -2
  179. msprobe/pytorch/monitor/csv2tb.py +6 -2
  180. msprobe/pytorch/monitor/data_writers.py +259 -0
  181. msprobe/pytorch/monitor/module_hook.py +227 -158
  182. msprobe/pytorch/monitor/module_metric.py +14 -0
  183. msprobe/pytorch/monitor/optimizer_collect.py +242 -270
  184. msprobe/pytorch/monitor/utils.py +16 -3
  185. msprobe/pytorch/online_dispatch/dispatch.py +4 -2
  186. msprobe/pytorch/online_dispatch/dump_compare.py +5 -2
  187. msprobe/pytorch/parse_tool/lib/utils.py +3 -3
  188. msprobe/pytorch/pt_config.py +8 -7
  189. msprobe/pytorch/pytorch_service.py +73 -0
  190. msprobe/visualization/builder/graph_builder.py +33 -13
  191. msprobe/visualization/builder/msprobe_adapter.py +24 -11
  192. msprobe/visualization/compare/graph_comparator.py +53 -45
  193. msprobe/visualization/compare/mode_adapter.py +31 -1
  194. msprobe/visualization/graph/base_node.py +3 -3
  195. msprobe/visualization/graph/graph.py +2 -2
  196. msprobe/visualization/graph_service.py +250 -103
  197. msprobe/visualization/utils.py +27 -11
  198. msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +0 -106
  199. msprobe/mindspore/monitor/anomaly_detect.py +0 -404
  200. msprobe/mindspore/monitor/module_spec_verifier.py +0 -94
  201. msprobe/mindspore/service.py +0 -549
  202. msprobe/pytorch/monitor/anomaly_analyse.py +0 -201
  203. msprobe/pytorch/monitor/anomaly_detect.py +0 -410
  204. msprobe/pytorch/monitor/module_spec_verifier.py +0 -95
  205. msprobe/pytorch/monitor/unittest/test_monitor.py +0 -160
  206. msprobe/pytorch/service.py +0 -473
  207. {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/LICENSE +0 -0
  208. {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/WHEEL +0 -0
  209. {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/entry_points.txt +0 -0
  210. {mindstudio_probe-1.3.0.dist-info → mindstudio_probe-8.1.1.dist-info}/top_level.txt +0 -0
  211. /msprobe/{mindspore → core}/compare/ms_to_pt_api.yaml +0 -0
  212. /msprobe/{mindspore/dump → core}/kernel_dump/kernel_config.py +0 -0
  213. /msprobe/{pytorch/monitor/unittest → core/monitor}/__init__.py +0 -0
@@ -19,22 +19,33 @@ from collections import defaultdict, namedtuple
19
19
  import mindspore as ms
20
20
  from mindspore._c_expression import MSContext
21
21
 
22
- from msprobe.core.common.const import Const, FileCheckConst, MsgConst
23
- from msprobe.core.common.exceptions import MsprobeException
24
- from msprobe.core.common.file_utils import FileChecker
25
- from msprobe.core.common.utils import get_real_step_or_rank, check_init_step
22
+ from msprobe.core.common.const import Const, MsgConst
23
+ from msprobe.core.common.utils import check_token_range
24
+ from msprobe.core.common.runtime import Runtime
25
+ from msprobe.core.debugger.precision_debugger import BasePrecisionDebugger
26
26
  from msprobe.mindspore.cell_processor import CellProcessor
27
27
  from msprobe.mindspore.common.const import Const as MsConst
28
- from msprobe.mindspore.common.utils import set_register_backward_hook_functions, check_save_param
28
+ from msprobe.mindspore.common.utils import (
29
+ set_register_backward_hook_functions,
30
+ check_save_param,
31
+ is_graph_mode_cell_dump_allowed
32
+ )
29
33
  from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
34
+ from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump
30
35
  from msprobe.mindspore.dump.hook_cell.api_register import get_api_register
31
36
  from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell
32
37
  from msprobe.mindspore.grad_probe.grad_monitor import GradientMonitor
33
- from msprobe.mindspore.ms_config import parse_json_config
34
- from msprobe.mindspore.runtime import Runtime
35
- from msprobe.mindspore.service import Service
38
+ from msprobe.mindspore.ms_config import parse_task_config
39
+ from msprobe.mindspore.mindspore_service import MindsporeService
36
40
  from msprobe.mindspore.task_handler_factory import TaskHandlerFactory
37
41
 
42
+ try:
43
+ from mindspore._c_expression import _dump_start, _dump_stop, _dump_step, _set_init_iter, _dump_set_dynamic
44
+ except ImportError:
45
+ enable_dynamic_kbyk_dump = False
46
+ else:
47
+ enable_dynamic_kbyk_dump = True
48
+
38
49
  try:
39
50
  from msprobe.lib import _msprobe_c
40
51
  except ImportError:
@@ -44,9 +55,7 @@ except ImportError:
44
55
  ConfigParameters = namedtuple("ConfigParameters", ["config_path", "task", "dump_path", "level"])
45
56
 
46
57
 
47
- class PrecisionDebugger:
48
- _instance = None
49
- task_not_need_service = [Const.GRAD_PROBE]
58
+ class PrecisionDebugger(BasePrecisionDebugger):
50
59
 
51
60
  def __new__(cls, config_path=None, task=None, dump_path=None,
52
61
  level=None, step=None, opt=None):
@@ -62,61 +71,33 @@ class PrecisionDebugger:
62
71
  level=None, step=None):
63
72
  if self.initialized:
64
73
  return
65
- self.initialized = True
66
-
67
74
  set_register_backward_hook_functions()
75
+ super().__init__(config_path, task, dump_path, level, step)
68
76
 
69
- if not config_path:
70
- config_path = os.path.join(os.path.dirname(__file__), "../../config.json")
71
-
72
- config_params = ConfigParameters(config_path, task, dump_path, level)
73
- self.check_input_params(config_params)
74
-
75
- common_config, task_config = parse_json_config(config_path)
76
- common_config.task = task if task else common_config.task
77
- self.task = common_config.task
78
77
  if self.task == Const.GRAD_PROBE:
79
- self.gm = GradientMonitor(common_config, task_config)
78
+ self.gm = GradientMonitor(self.common_config, self.task_config)
80
79
  return
81
- common_config.step = get_real_step_or_rank(
82
- step, Const.STEP) if step is not None else common_config.step
83
- common_config.level = level if level else common_config.level
84
- common_config.dump_path = dump_path if dump_path else common_config.dump_path
85
- self.config = DebuggerConfig(common_config, task_config)
80
+ self.common_config.level = level if level else self.common_config.level
81
+ self.common_config.dump_path = dump_path if dump_path else self.common_config.dump_path
82
+ self.config = DebuggerConfig(self.common_config, self.task_config)
86
83
 
87
84
  if self._need_msprobe_c() and _msprobe_c:
85
+ os.environ["MS_HOOK_ENABLE"] = "on"
88
86
  _msprobe_c._PrecisionDebugger(framework="MindSpore", config_path=config_path)
89
87
 
90
88
  self.config.execution_mode = self._get_execution_mode()
91
89
  if self._need_service():
92
90
  self.config.check_config_with_l2()
93
- self.service = Service(self.config)
91
+ self.service = MindsporeService(self.config)
94
92
 
95
93
  Runtime.step_count = 0
96
94
  Runtime.is_running = False
95
+ if enable_dynamic_kbyk_dump:
96
+ _dump_set_dynamic()
97
97
 
98
98
  @staticmethod
99
- def check_input_params(args):
100
- if args.config_path is not None:
101
- if not isinstance(args.config_path, str):
102
- raise MsprobeException(
103
- MsprobeException.INVALID_PARAM_ERROR, f"config_path must be a string")
104
- file_checker = FileChecker(
105
- file_path=args.config_path, path_type=FileCheckConst.FILE, file_type=FileCheckConst.JSON_SUFFIX)
106
- file_checker.common_check()
107
-
108
- if args.task is not None and args.task not in Const.TASK_LIST:
109
- raise MsprobeException(
110
- MsprobeException.INVALID_PARAM_ERROR, f"task must be one of {Const.TASK_LIST}")
111
-
112
- if args.dump_path is not None:
113
- if not isinstance(args.dump_path, str):
114
- raise MsprobeException(
115
- MsprobeException.INVALID_PARAM_ERROR, f"dump_path must be a string")
116
-
117
- if args.level is not None and args.level not in Const.LEVEL_LIST:
118
- raise MsprobeException(
119
- MsprobeException.INVALID_PARAM_ERROR, f"level must be one of {Const.LEVEL_LIST}")
99
+ def _get_task_config(task, json_config):
100
+ return parse_task_config(task, json_config)
120
101
 
121
102
  @staticmethod
122
103
  def _get_execution_mode():
@@ -137,7 +118,7 @@ class PrecisionDebugger:
137
118
  return MsConst.PYNATIVE_MODE
138
119
 
139
120
  @staticmethod
140
- def _is_graph_dump(config):
121
+ def _is_graph_dump(config: DebuggerConfig):
141
122
  if config.level != MsConst.KERNEL:
142
123
  return False
143
124
  if not config.list:
@@ -147,59 +128,68 @@ class PrecisionDebugger:
147
128
  return is_graph
148
129
 
149
130
  @classmethod
150
- def start(cls, model=None):
151
- instance = cls._instance
152
- if not instance:
153
- raise Exception(MsgConst.NOT_CREATED_INSTANCE)
131
+ def start(cls, model=None, token_range=None):
132
+ instance = cls._get_instance()
133
+ if instance is None:
134
+ return
154
135
  if cls._need_msprobe_c() and _msprobe_c:
155
136
  _msprobe_c._PrecisionDebugger().start()
156
- if instance.task in PrecisionDebugger.task_not_need_service:
157
- return
158
-
137
+ check_token_range(token_range)
159
138
  instance.config.execution_mode = cls._get_execution_mode()
160
139
  if cls._need_service():
161
140
  if not instance.service:
162
- instance.service = Service(instance.config)
163
- instance.service.start(model)
141
+ instance.service = MindsporeService(instance.config)
142
+ instance.config.check_model(model, token_range)
143
+ instance.service.start(model, token_range)
164
144
  else:
165
145
  if not instance.first_start:
166
146
  get_api_register().restore_all_api()
167
- handler = TaskHandlerFactory.create(instance.config)
147
+ handler = TaskHandlerFactory.create(instance.config, model)
168
148
  handler.handle()
169
-
149
+ if enable_dynamic_kbyk_dump:
150
+ _set_init_iter(0)
151
+ if enable_dynamic_kbyk_dump:
152
+ is_valid_rank = (not instance.config.rank or Runtime.rank_id in instance.config.rank)
153
+ is_valid_step = (not instance.config.step or Runtime.step_count in instance.config.step)
154
+ if is_valid_rank and is_valid_step:
155
+ _dump_start()
156
+ Runtime.is_running = True
170
157
  instance.first_start = True
171
- Runtime.is_running = True
172
-
173
- @classmethod
174
- def forward_backward_dump_end(cls):
175
- instance = cls._instance
176
- instance.stop()
177
158
 
178
159
  @classmethod
179
160
  def stop(cls):
180
- instance = cls._instance
181
- if not instance:
182
- raise Exception(MsgConst.NOT_CREATED_INSTANCE)
161
+ instance = cls._get_instance()
162
+ if instance is None:
163
+ return
164
+
183
165
  if instance.task == Const.GRAD_PROBE:
184
166
  instance.gm.stop()
185
- if instance.task in PrecisionDebugger.task_not_need_service:
186
- return
187
167
  if instance.service:
188
168
  instance.service.stop()
189
- Runtime.is_running = False
190
-
169
+ else:
170
+ Runtime.is_running = False
171
+ if enable_dynamic_kbyk_dump:
172
+ _dump_stop()
173
+ if cls._need_msprobe_c() and _msprobe_c:
174
+ _msprobe_c._PrecisionDebugger().stop()
175
+
191
176
  @classmethod
192
177
  def step(cls):
193
- instance = cls._instance
194
- if not instance:
195
- raise Exception(MsgConst.NOT_CREATED_INSTANCE)
196
- if instance.task in PrecisionDebugger.task_not_need_service:
178
+ instance = cls._get_instance()
179
+ if instance is None:
197
180
  return
181
+
198
182
  if instance.service:
199
183
  instance.service.step()
184
+ if is_graph_mode_cell_dump_allowed(instance.config):
185
+ GraphModeCellDump.step()
186
+ if enable_dynamic_kbyk_dump:
187
+ _dump_step(1)
188
+ if cls._need_msprobe_c() and _msprobe_c:
189
+ _msprobe_c._PrecisionDebugger().step()
190
+
200
191
  HOOKCell.cell_count = defaultdict(int)
201
192
  CellProcessor.reset_cell_stats()
202
-
203
193
  Runtime.step_count += 1
204
194
 
205
195
  @classmethod
@@ -226,18 +216,9 @@ class PrecisionDebugger:
226
216
  instance.config.execution_mode = cls._get_execution_mode()
227
217
  if cls._need_service():
228
218
  if not instance.service:
229
- instance.service = Service(instance.config)
219
+ instance.service = MindsporeService(instance.config)
230
220
  instance.service.save(variable, name, save_backward)
231
221
 
232
- @classmethod
233
- def set_init_step(cls, step):
234
- instance = cls._instance
235
- if not instance:
236
- raise Exception(MsgConst.NOT_CREATED_INSTANCE)
237
- check_init_step(step)
238
- instance.service.init_step = step
239
- instance.service.loop = 0
240
-
241
222
  @classmethod
242
223
  def _need_service(cls):
243
224
  instance = cls._instance
@@ -247,7 +228,7 @@ class PrecisionDebugger:
247
228
  return False
248
229
  else:
249
230
  return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config)
250
-
231
+
251
232
  @classmethod
252
233
  def _need_msprobe_c(cls):
253
234
  instance = cls._instance