mindstudio-probe 1.1.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
  2. mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
  3. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
  4. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
  5. msprobe/README.md +39 -3
  6. msprobe/config.json +1 -3
  7. msprobe/core/advisor/advisor.py +8 -3
  8. msprobe/core/common/const.py +113 -13
  9. msprobe/core/common/exceptions.py +25 -3
  10. msprobe/core/common/file_utils.py +150 -26
  11. msprobe/core/common/inplace_op_checker.py +15 -0
  12. msprobe/core/common/log.py +27 -9
  13. msprobe/core/common/utils.py +182 -69
  14. msprobe/core/common_config.py +44 -15
  15. msprobe/core/compare/acc_compare.py +207 -142
  16. msprobe/core/compare/check.py +2 -5
  17. msprobe/core/compare/compare_cli.py +21 -4
  18. msprobe/core/compare/highlight.py +124 -55
  19. msprobe/core/compare/layer_mapping/__init__.py +19 -0
  20. msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
  21. msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
  22. msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
  23. msprobe/core/compare/npy_compare.py +52 -23
  24. msprobe/core/compare/utils.py +272 -247
  25. msprobe/core/data_dump/data_collector.py +13 -11
  26. msprobe/core/data_dump/data_processor/base.py +46 -16
  27. msprobe/core/data_dump/data_processor/mindspore_processor.py +4 -4
  28. msprobe/core/data_dump/data_processor/pytorch_processor.py +156 -59
  29. msprobe/core/data_dump/scope.py +113 -34
  30. msprobe/core/grad_probe/constant.py +27 -13
  31. msprobe/core/grad_probe/grad_compare.py +18 -1
  32. msprobe/core/grad_probe/utils.py +30 -2
  33. msprobe/core/overflow_check/abnormal_scene.py +185 -0
  34. msprobe/core/overflow_check/api_info.py +55 -0
  35. msprobe/core/overflow_check/checker.py +138 -0
  36. msprobe/core/overflow_check/filter.py +157 -0
  37. msprobe/core/overflow_check/ignore_rules.yaml +55 -0
  38. msprobe/core/overflow_check/level.py +22 -0
  39. msprobe/core/overflow_check/utils.py +28 -0
  40. msprobe/docs/01.installation.md +10 -0
  41. msprobe/docs/02.config_introduction.md +49 -22
  42. msprobe/docs/03.config_examples.md +2 -9
  43. msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
  44. msprobe/docs/05.data_dump_PyTorch.md +3 -1
  45. msprobe/docs/06.data_dump_MindSpore.md +157 -90
  46. msprobe/docs/07.accuracy_checker_PyTorch.md +12 -12
  47. msprobe/docs/08.accuracy_checker_online_PyTorch.md +1 -6
  48. msprobe/docs/09.accuracy_checker_MindSpore.md +44 -8
  49. msprobe/docs/10.accuracy_compare_PyTorch.md +19 -13
  50. msprobe/docs/11.accuracy_compare_MindSpore.md +104 -13
  51. msprobe/docs/12.overflow_check_PyTorch.md +1 -1
  52. msprobe/docs/13.overflow_check_MindSpore.md +6 -6
  53. msprobe/docs/15.free_benchmarking_PyTorch.md +4 -5
  54. msprobe/docs/16.free_benchmarking_MindSpore.md +56 -37
  55. msprobe/docs/17.grad_probe.md +5 -6
  56. msprobe/docs/19.monitor.md +468 -0
  57. msprobe/docs/20.monitor_performance_baseline.md +52 -0
  58. msprobe/docs/21.visualization_PyTorch.md +386 -0
  59. msprobe/docs/22.visualization_MindSpore.md +384 -0
  60. msprobe/docs/23.tool_function_introduction.md +28 -0
  61. msprobe/docs/FAQ.md +3 -0
  62. msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
  63. msprobe/docs/img/compare_result.png +0 -0
  64. msprobe/docs/img/monitor/cpu_info.png +0 -0
  65. msprobe/mindspore/__init__.py +15 -0
  66. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +113 -145
  67. msprobe/mindspore/api_accuracy_checker/api_info.py +21 -6
  68. msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
  69. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
  70. msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
  71. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
  72. msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
  73. msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
  74. msprobe/mindspore/api_accuracy_checker/main.py +27 -3
  75. msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
  76. msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
  77. msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
  78. msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
  79. msprobe/mindspore/cell_processor.py +33 -12
  80. msprobe/mindspore/common/const.py +33 -13
  81. msprobe/mindspore/common/log.py +5 -9
  82. msprobe/mindspore/common/utils.py +43 -4
  83. msprobe/mindspore/compare/distributed_compare.py +22 -22
  84. msprobe/mindspore/compare/ms_compare.py +271 -248
  85. msprobe/mindspore/compare/ms_graph_compare.py +81 -47
  86. msprobe/mindspore/debugger/debugger_config.py +4 -1
  87. msprobe/mindspore/debugger/precision_debugger.py +7 -1
  88. msprobe/mindspore/dump/dump_tool_factory.py +3 -1
  89. msprobe/mindspore/dump/hook_cell/api_registry.py +12 -2
  90. msprobe/mindspore/dump/hook_cell/primitive_hooks.py +13 -16
  91. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +25 -0
  92. msprobe/mindspore/dump/jit_dump.py +17 -5
  93. msprobe/mindspore/dump/kernel_graph_dump.py +2 -4
  94. msprobe/mindspore/dump/kernel_kbyk_dump.py +2 -4
  95. msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
  96. msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
  97. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +145 -39
  98. msprobe/mindspore/free_benchmark/common/handler_params.py +1 -2
  99. msprobe/mindspore/free_benchmark/common/utils.py +19 -4
  100. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
  101. msprobe/mindspore/free_benchmark/handler/base_handler.py +3 -3
  102. msprobe/mindspore/free_benchmark/handler/check_handler.py +4 -5
  103. msprobe/mindspore/free_benchmark/handler/fix_handler.py +4 -4
  104. msprobe/mindspore/free_benchmark/handler/handler_factory.py +4 -4
  105. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +2 -2
  106. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +15 -6
  107. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +4 -4
  108. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +2 -2
  109. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +13 -6
  110. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +2 -2
  111. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +2 -2
  112. msprobe/mindspore/grad_probe/global_context.py +28 -8
  113. msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
  114. msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
  115. msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
  116. msprobe/mindspore/grad_probe/hook.py +24 -10
  117. msprobe/mindspore/grad_probe/utils.py +18 -5
  118. msprobe/mindspore/ms_config.py +22 -15
  119. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +2 -4
  120. msprobe/mindspore/runtime.py +15 -0
  121. msprobe/mindspore/service.py +36 -30
  122. msprobe/mindspore/task_handler_factory.py +15 -0
  123. msprobe/msprobe.py +24 -7
  124. msprobe/pytorch/__init__.py +3 -2
  125. msprobe/pytorch/api_accuracy_checker/common/config.py +62 -0
  126. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +3 -4
  127. msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
  128. msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
  129. msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
  130. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +6 -1
  131. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +19 -14
  132. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +13 -9
  133. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +77 -53
  134. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +15 -4
  135. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +9 -24
  136. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +4 -12
  137. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +9 -4
  138. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +3 -11
  139. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +2 -2
  140. msprobe/pytorch/bench_functions/confusion_transpose.py +5 -1
  141. msprobe/pytorch/bench_functions/matmul_backward.py +12 -0
  142. msprobe/pytorch/bench_functions/npu_fusion_attention.py +100 -6
  143. msprobe/pytorch/bench_functions/rotary_mul.py +4 -0
  144. msprobe/pytorch/bench_functions/swiglu.py +10 -2
  145. msprobe/pytorch/common/parse_json.py +6 -6
  146. msprobe/pytorch/common/utils.py +56 -5
  147. msprobe/pytorch/compare/distributed_compare.py +8 -9
  148. msprobe/pytorch/compare/pt_compare.py +8 -6
  149. msprobe/pytorch/debugger/debugger_config.py +19 -15
  150. msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
  151. msprobe/pytorch/free_benchmark/common/constant.py +15 -0
  152. msprobe/pytorch/free_benchmark/common/counter.py +15 -0
  153. msprobe/pytorch/free_benchmark/common/enums.py +15 -0
  154. msprobe/pytorch/free_benchmark/common/params.py +8 -1
  155. msprobe/pytorch/free_benchmark/common/utils.py +26 -4
  156. msprobe/pytorch/free_benchmark/compare/grad_saver.py +20 -3
  157. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +2 -0
  158. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +3 -1
  159. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +6 -4
  160. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +2 -0
  161. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +4 -0
  162. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +10 -0
  163. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +6 -5
  164. msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
  165. msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
  166. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
  167. msprobe/pytorch/hook_module/wrap_functional.py +14 -12
  168. msprobe/pytorch/module_processer.py +2 -5
  169. msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
  170. msprobe/pytorch/monitor/anomaly_detect.py +340 -0
  171. msprobe/pytorch/monitor/distributed/__init__.py +0 -0
  172. msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
  173. msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
  174. msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
  175. msprobe/pytorch/monitor/features.py +108 -0
  176. msprobe/pytorch/monitor/module_hook.py +870 -0
  177. msprobe/pytorch/monitor/module_metric.py +193 -0
  178. msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
  179. msprobe/pytorch/monitor/optimizer_collect.py +295 -0
  180. msprobe/pytorch/monitor/unittest/__init__.py +0 -0
  181. msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
  182. msprobe/pytorch/monitor/utils.py +250 -0
  183. msprobe/pytorch/monitor/visualizer.py +59 -0
  184. msprobe/pytorch/online_dispatch/__init__.py +2 -3
  185. msprobe/pytorch/online_dispatch/compare.py +29 -38
  186. msprobe/pytorch/online_dispatch/dispatch.py +50 -25
  187. msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
  188. msprobe/pytorch/online_dispatch/single_compare.py +53 -32
  189. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +1 -1
  190. msprobe/pytorch/online_dispatch/utils.py +49 -21
  191. msprobe/pytorch/parse_tool/lib/compare.py +12 -18
  192. msprobe/pytorch/parse_tool/lib/config.py +1 -1
  193. msprobe/pytorch/parse_tool/lib/parse_tool.py +1 -2
  194. msprobe/pytorch/parse_tool/lib/utils.py +16 -35
  195. msprobe/pytorch/parse_tool/lib/visualization.py +2 -0
  196. msprobe/pytorch/pt_config.py +31 -8
  197. msprobe/pytorch/service.py +15 -5
  198. msprobe/visualization/__init__.py +14 -0
  199. msprobe/visualization/builder/__init__.py +14 -0
  200. msprobe/visualization/builder/graph_builder.py +165 -0
  201. msprobe/visualization/builder/msprobe_adapter.py +205 -0
  202. msprobe/visualization/compare/__init__.py +14 -0
  203. msprobe/visualization/compare/graph_comparator.py +130 -0
  204. msprobe/visualization/compare/mode_adapter.py +211 -0
  205. msprobe/visualization/graph/__init__.py +14 -0
  206. msprobe/visualization/graph/base_node.py +124 -0
  207. msprobe/visualization/graph/graph.py +200 -0
  208. msprobe/visualization/graph/node_colors.py +95 -0
  209. msprobe/visualization/graph/node_op.py +39 -0
  210. msprobe/visualization/graph_service.py +214 -0
  211. msprobe/visualization/utils.py +232 -0
  212. mindstudio_probe-1.1.0.dist-info/RECORD +0 -287
  213. msprobe/docs/04.acl_config_examples.md +0 -78
  214. msprobe/mindspore/compare/layer_mapping.py +0 -146
  215. msprobe/mindspore/compare/modify_mapping.py +0 -107
  216. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -57
  217. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -122
  218. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
  219. {mindstudio_probe-1.1.0.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
  220. /msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
@@ -1,7 +1,7 @@
1
1
  # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
2
  # All rights reserved.
3
3
  #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
6
6
  # You may obtain a copy of the License at
7
7
  #
@@ -41,7 +41,7 @@ class ExchangeValuePerturbation(BasePerturbation):
41
41
  """
42
42
  params.fuzzed_value = self.exchange_value(params.args[params.index])
43
43
  if not self.is_fuzzed:
44
- logger.warning(f"{self.api_name} can not exchange value.")
44
+ logger.warning(f"{self.api_name_with_id} can not exchange value.")
45
45
  return False
46
46
  return self.get_fuzzed_result(params)
47
47
 
@@ -1,7 +1,7 @@
1
1
  # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
2
  # All rights reserved.
3
3
  #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
6
6
  # You may obtain a copy of the License at
7
7
  #
@@ -18,9 +18,11 @@ from typing import Any
18
18
  import mindspore as ms
19
19
  from mindspore import Tensor, ops
20
20
 
21
- from msprobe.mindspore.common.const import Const
21
+ from msprobe.core.common.const import Const
22
22
  from msprobe.mindspore.common.log import logger
23
+ from msprobe.mindspore.free_benchmark.common.config import Config
23
24
  from msprobe.mindspore.free_benchmark.common.handler_params import HandlerParams
25
+ from msprobe.mindspore.free_benchmark.common.utils import Tools
24
26
  from msprobe.mindspore.free_benchmark.perturbation.base_perturbation import BasePerturbation
25
27
 
26
28
 
@@ -40,10 +42,15 @@ class ImprovePrecisionPerturbation(BasePerturbation):
40
42
  def handle(self, params: HandlerParams) -> Any:
41
43
  args = self.improve_tensor_precision(params.args)
42
44
  kwargs = self.improve_tensor_precision(params.kwargs)
43
- fuzzed_value = args
44
- if self.api_name in Const.COMMUNICATION_API_LIST:
45
- params.fuzzed_value = fuzzed_value
46
45
  if not self.is_fuzzed:
47
- logger.warning(f"{self.api_name} can not improve precision.")
46
+ logger.warning(f"{self.api_name_with_id} can not improve precision.")
48
47
  return False
48
+
49
+ if Config.stage == Const.BACKWARD:
50
+ fuzzed_result = Tools.get_grad(params.original_func, *args, **kwargs)
51
+ if fuzzed_result is not None:
52
+ return fuzzed_result
53
+ else:
54
+ return False
55
+
49
56
  return params.original_func(*args, **kwargs)
@@ -36,9 +36,9 @@ class PerturbationFactory:
36
36
  }
37
37
 
38
38
  @staticmethod
39
- def create(api_name: str):
39
+ def create(api_name_with_id: str):
40
40
  perturbation = PerturbationFactory.perturbations.get(Config.pert_type)
41
41
  if perturbation:
42
- return perturbation(api_name)
42
+ return perturbation(api_name_with_id)
43
43
  else:
44
44
  raise Exception(f'{Config.pert_type} is a invalid perturbation type')
@@ -15,7 +15,7 @@
15
15
 
16
16
  from msprobe.mindspore.common.const import Const
17
17
  from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
18
- from msprobe.mindspore.free_benchmark.api_pynative_self_check import ApiPyNativeSelFCheck
18
+ from msprobe.mindspore.free_benchmark.api_pynative_self_check import ApiPyNativeSelfCheck
19
19
 
20
20
 
21
21
  class SelfCheckToolFactory:
@@ -28,7 +28,7 @@ class SelfCheckToolFactory:
28
28
  Const.API: {
29
29
  Const.GRAPH_KBYK_MODE: None,
30
30
  Const.GRAPH_GE_MODE: None,
31
- Const.PYNATIVE_MODE: ApiPyNativeSelFCheck
31
+ Const.PYNATIVE_MODE: ApiPyNativeSelfCheck
32
32
  },
33
33
  Const.KERNEL: {
34
34
  Const.GRAPH_KBYK_MODE: None,
@@ -1,15 +1,30 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
1
16
  import os
2
17
  import threading
3
18
  from typing import Dict, Union, Tuple
4
19
 
5
- from msprobe.core.grad_probe.utils import check_str, check_bounds_element
20
+ from msprobe.core.common.utils import is_int
21
+ from msprobe.core.common.file_utils import create_directory, check_path_before_create
6
22
  from msprobe.core.grad_probe.constant import GradConst
23
+ from msprobe.core.grad_probe.utils import check_str, check_bounds_element, check_param_element
7
24
  from msprobe.mindspore.common.log import logger
8
- from msprobe.core.common.file_utils import create_directory, check_path_before_create
9
25
 
10
26
 
11
27
  class GlobalContext:
12
-
13
28
  _instance = None
14
29
  _instance_lock = threading.Lock()
15
30
  _setting = {
@@ -37,10 +52,10 @@ class GlobalContext:
37
52
  else:
38
53
  raise ValueError("Invalid level set in config yaml file, level option: L0, L1, L2")
39
54
 
40
- self._set_input_list(config_dict, GradConst.PARAM_LIST, str)
55
+ self._set_input_list(config_dict, GradConst.PARAM_LIST, (str,), element_check=check_param_element)
41
56
  self._set_input_list(config_dict, GradConst.BOUNDS, (float, int), element_check=check_bounds_element)
42
- self._set_input_list(config_dict, GradConst.STEP, int)
43
- self._set_input_list(config_dict, GradConst.RANK, int)
57
+ self._set_input_list(config_dict, GradConst.STEP, (int,))
58
+ self._set_input_list(config_dict, GradConst.RANK, (int,))
44
59
 
45
60
  output_path = config_dict.get(GradConst.OUTPUT_PATH)
46
61
  check_str(output_path, variable_name="output_path in yaml")
@@ -88,13 +103,18 @@ class GlobalContext:
88
103
  if value and isinstance(value, list):
89
104
  for val in value:
90
105
  if not isinstance(val, dtype):
91
- logger.warning(f"Invalid {name} which must be None or list of {type_str}")
106
+ logger.warning(f"Invalid {name} which must be None or list of {type_str}, use default value.")
107
+ return
108
+ elif isinstance(val, int) and not is_int(val):
109
+ logger.warning(f"Invalid {name} which must be None or list of int, use default value.")
92
110
  return
93
111
  if element_check and not element_check(val):
94
- logger.warning(f"Given {name} violates some rules.")
112
+ logger.warning(f"Given {name} violates some rules, use default value.")
95
113
  return
114
+
96
115
  self._setting[name] = value
97
116
  else:
98
117
  logger.warning(f"{name} is None or not a list with valid items, use default value.")
99
118
 
119
+
100
120
  grad_context = GlobalContext()
@@ -1,20 +1,33 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import multiprocessing
1
17
  import os
2
18
  import time
3
- from typing import List, Tuple
4
- import multiprocessing
5
19
  from multiprocessing import Process
20
+ from typing import List
6
21
 
7
- import numpy as np
8
22
  import mindspore as ms
9
- from mindspore.communication import get_rank
10
- from mindspore.ops import operations as P
23
+ import numpy as np
11
24
  from mindspore.common.parameter import Parameter
12
-
13
- from msprobe.core.grad_probe.utils import ListCache
14
- from msprobe.core.grad_probe.constant import GradConst
15
- from msprobe.mindspore.common.log import logger
25
+ from mindspore.communication import get_rank
16
26
  from msprobe.core.common.file_utils import (create_directory, check_file_or_directory_path,
17
27
  write_csv, remove_path, move_file, load_npy)
28
+ from msprobe.core.grad_probe.constant import GradConst
29
+ from msprobe.core.grad_probe.utils import ListCache
30
+ from msprobe.mindspore.common.log import logger
18
31
  from msprobe.mindspore.grad_probe.global_context import grad_context, GlobalContext
19
32
 
20
33
 
@@ -28,12 +41,12 @@ def get_rank_id():
28
41
 
29
42
  @ms.jit
30
43
  def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, level: str, bounds: List):
31
- '''
44
+ """
32
45
  Dump gradient statistic data.
33
46
  level0: [step, max, min, norm, shape_dim, shape]
34
47
  level1: [step, max, min, norm, shape_dim, shape] + grad_bool_data
35
48
  level2: [step, max, min, norm, shape_dim, shape, dist_dim, dist] + grad_bool_data
36
- '''
49
+ """
37
50
  dump_path = os.path.join(dump_dir, g_name)
38
51
  dump_dir_path = dump_path + "_dir"
39
52
  save_op = ms.ops.TensorDump()
@@ -182,7 +195,7 @@ class CSVGenerator(Process):
182
195
  shape_dim = int(stat_data[GradConst.SHAPE_DIM_IDX])
183
196
  file_name = os.path.basename(file_path)
184
197
  prefix_idx = len(file_name.split("_")[0])
185
- param_name = file_name[(prefix_idx + 1) : -(len(GradConst.NPY_SUFFIX) + 1)]
198
+ param_name = file_name[(prefix_idx + 1): -(len(GradConst.NPY_SUFFIX) + 1)]
186
199
  if not param_name:
187
200
  raise RuntimeError("Invalid gradient statistic file name.")
188
201
  csv_line = [param_name]
@@ -224,8 +237,9 @@ class CSVGenerator(Process):
224
237
  if i == 0:
225
238
  intervals.append(f"(-inf, {self.bounds[i]}]")
226
239
  else:
227
- intervals.append(f"({self.bounds[i-1]}, {self.bounds[i]}]")
240
+ intervals.append(f"({self.bounds[i - 1]}, {self.bounds[i]}]")
228
241
  intervals.extend([f"({self.bounds[-1]}, inf)", "=0"])
229
242
  return intervals
230
243
 
244
+
231
245
  csv_generator = CSVGenerator()
@@ -1,7 +1,22 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from msprobe.core.grad_probe.constant import GradConst
1
17
  from msprobe.mindspore.grad_probe.global_context import grad_context
2
18
  from msprobe.mindspore.grad_probe.grad_analyzer import csv_generator
3
19
  from msprobe.mindspore.grad_probe.hook import hook_optimizer
4
- from msprobe.core.grad_probe.constant import GradConst
5
20
 
6
21
 
7
22
  class GradientMonitor:
@@ -1,8 +1,23 @@
1
- from abc import ABC, abstractmethod
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
2
16
  import hashlib
17
+ from abc import ABC, abstractmethod
3
18
 
4
19
  import mindspore
5
- from mindspore import ops, Tensor
20
+ from mindspore import ops
6
21
  from msprobe.core.grad_probe.constant import GradConst
7
22
 
8
23
 
@@ -12,6 +27,7 @@ class CsvInput:
12
27
  self.grad = grad
13
28
  self.bounds = bounds
14
29
 
30
+
15
31
  class GradStatCsv:
16
32
  csv = {}
17
33
 
@@ -52,9 +68,11 @@ class CsvItem(ABC):
52
68
 
53
69
  @register_csv_item(GradConst.MD5)
54
70
  class CsvMd5(CsvItem):
71
+ @staticmethod
55
72
  def generate_csv_header(csv_input):
56
73
  return ["MD5"]
57
74
 
75
+ @staticmethod
58
76
  def generate_csv_content(csv_input):
59
77
  grad = csv_input.grad
60
78
  tensor_bytes = grad.float().numpy().tobytes()
@@ -64,19 +82,21 @@ class CsvMd5(CsvItem):
64
82
 
65
83
  @register_csv_item(GradConst.DISTRIBUTION)
66
84
  class CsvDistribution(CsvItem):
85
+ @staticmethod
67
86
  def generate_csv_header(csv_input):
68
87
  bounds = csv_input.bounds
69
88
  intervals = []
70
89
  if bounds:
71
90
  intervals.append(f"(-inf, {bounds[0]}]")
72
91
  for i in range(1, len(bounds)):
73
- intervals.append(f"({bounds[i-1]}, {bounds[i]}]")
92
+ intervals.append(f"({bounds[i - 1]}, {bounds[i]}]")
74
93
  if intervals:
75
94
  intervals.append(f"({bounds[-1]}, inf)")
76
95
  intervals.append("=0")
77
-
96
+
78
97
  return intervals
79
98
 
99
+ @staticmethod
80
100
  def generate_csv_content(csv_input):
81
101
  grad = csv_input.grad
82
102
  bounds = csv_input.bounds
@@ -94,9 +114,11 @@ class CsvDistribution(CsvItem):
94
114
 
95
115
  @register_csv_item(GradConst.MAX)
96
116
  class CsvMax(CsvItem):
117
+ @staticmethod
97
118
  def generate_csv_header(csv_input):
98
119
  return ["max"]
99
120
 
121
+ @staticmethod
100
122
  def generate_csv_content(csv_input):
101
123
  grad = csv_input.grad
102
124
  return [ops.amax(grad).float().numpy().tolist()]
@@ -104,9 +126,11 @@ class CsvMax(CsvItem):
104
126
 
105
127
  @register_csv_item(GradConst.MIN)
106
128
  class CsvMin(CsvItem):
129
+ @staticmethod
107
130
  def generate_csv_header(csv_input):
108
131
  return ["min"]
109
132
 
133
+ @staticmethod
110
134
  def generate_csv_content(csv_input):
111
135
  grad = csv_input.grad
112
136
  return [ops.amin(grad).float().numpy().tolist()]
@@ -114,9 +138,11 @@ class CsvMin(CsvItem):
114
138
 
115
139
  @register_csv_item(GradConst.NORM)
116
140
  class CsvNorm(CsvItem):
141
+ @staticmethod
117
142
  def generate_csv_header(csv_input):
118
143
  return ["norm"]
119
144
 
145
+ @staticmethod
120
146
  def generate_csv_content(csv_input):
121
147
  grad = csv_input.grad
122
148
  return [ops.norm(grad).float().numpy().tolist()]
@@ -124,9 +150,11 @@ class CsvNorm(CsvItem):
124
150
 
125
151
  @register_csv_item(GradConst.SHAPE)
126
152
  class CsvShape(CsvItem):
153
+ @staticmethod
127
154
  def generate_csv_header(csv_input):
128
155
  return ["shape"]
129
156
 
157
+ @staticmethod
130
158
  def generate_csv_content(csv_input):
131
159
  grad = csv_input.grad
132
- return [list(grad.shape)]
160
+ return [list(grad.shape)]
@@ -1,25 +1,37 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
1
15
 
2
16
  import os
3
17
 
4
18
  import mindspore
5
19
  import mindspore as ms
6
20
  from mindspore.common.api import jit
7
- from mindspore.nn.optim.optimizer import Optimizer
8
- from mindspore.common.parameter import Parameter
9
21
  from mindspore.common.initializer import initializer
10
-
22
+ from mindspore.common.parameter import Parameter
23
+ from mindspore.nn.optim.optimizer import Optimizer
24
+ from msprobe.core.common.file_utils import remove_path, write_csv, create_directory
11
25
  from msprobe.core.grad_probe.constant import GradConst
12
26
  from msprobe.mindspore.common.log import logger
13
-
14
- from msprobe.core.common.file_utils import remove_path, write_csv, create_directory
15
27
  from msprobe.mindspore.grad_probe.global_context import grad_context
16
- from msprobe.mindspore.grad_probe.grad_analyzer import grad_dump, get_rank_id
17
28
  from msprobe.mindspore.grad_probe.grad_analyzer import csv_generator
29
+ from msprobe.mindspore.grad_probe.grad_analyzer import grad_dump, get_rank_id
18
30
  from msprobe.mindspore.grad_probe.grad_stat_csv import GradStatCsv, CsvInput
19
31
  from msprobe.mindspore.grad_probe.utils import save_grad_direction, get_adapted_level
20
32
 
21
- class HookInput:
22
33
 
34
+ class HookInput:
23
35
  '''
24
36
  HookInput is a class wrapping all the variables used for hooking optimizer
25
37
  '''
@@ -40,6 +52,7 @@ class HookInput:
40
52
  self.bounds = grad_context.get_context(GradConst.BOUNDS)
41
53
  self.mode = mindspore.get_context("mode")
42
54
 
55
+
43
56
  def hook_graph_mode_optimizer(opt, hook_input):
44
57
  @jit
45
58
  def new_construct(self, gradients):
@@ -47,7 +60,7 @@ def hook_graph_mode_optimizer(opt, hook_input):
47
60
  if hook_input.param_list and hook_input.g_names[index] not in hook_input.param_list:
48
61
  continue
49
62
  grad_dump(hook_input.dump_dir, hook_input.g_names[index], self.dump_step,
50
- grad_value, hook_input.level, hook_input.bounds)
63
+ grad_value, hook_input.level, hook_input.bounds)
51
64
  ms.ops.TensorDump()(hook_input.step_finish_flag, self.dump_step)
52
65
  self.assignadd(self.dump_step, self.global_step_increase_tensor)
53
66
  out = hook_input.func(gradients)
@@ -57,11 +70,12 @@ def hook_graph_mode_optimizer(opt, hook_input):
57
70
  opt.construct = new_construct.__get__(opt, type(opt))
58
71
  csv_generator.start()
59
72
 
73
+
60
74
  def hook_pynative_optimizer(opt, hook_input):
61
75
  level_adapted = get_adapted_level(hook_input.level)
62
76
 
63
- def hook_fn(cell, input):
64
- gradients, = input
77
+ def hook_fn(cell, input_data):
78
+ gradients, = input_data
65
79
  cur_step = grad_context.get_context(GradConst.CURRENT_STEP)
66
80
  if grad_context.step_need_dump(cur_step) and grad_context.rank_need_dump(hook_input.rank_id):
67
81
  create_directory(hook_input.save_dir)
@@ -1,12 +1,26 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
1
16
  import os
2
17
 
3
18
  import mindspore
4
- from msprobe.core.grad_probe.constant import level_adp
5
- from msprobe.core.grad_probe.utils import check_param
6
19
  from msprobe.core.common.file_utils import (create_directory,
7
- check_path_before_create,
8
20
  check_file_or_directory_path,
9
21
  save_npy)
22
+ from msprobe.core.grad_probe.constant import level_adp
23
+ from msprobe.core.grad_probe.utils import check_param
10
24
 
11
25
 
12
26
  def save_grad_direction(param_name, grad, save_path):
@@ -15,7 +29,6 @@ def save_grad_direction(param_name, grad, save_path):
15
29
  check_file_or_directory_path(save_path, isdir=True)
16
30
  check_param(param_name)
17
31
  save_filepath = os.path.join(save_path, f"{param_name}.npy")
18
- check_path_before_create(save_filepath)
19
32
 
20
33
  if grad.dtype == mindspore.bfloat16:
21
34
  grad = grad.to(mindspore.float32)
@@ -27,4 +40,4 @@ def save_grad_direction(param_name, grad, save_path):
27
40
 
28
41
  def get_adapted_level(level: str):
29
42
  level_adapted = level_adp.get(level)
30
- return level_adapted
43
+ return level_adapted
@@ -1,12 +1,26 @@
1
- import json
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
2
15
 
3
- from msprobe.core.common_config import CommonConfig, BaseConfig
4
- from msprobe.core.common.file_utils import FileOpen
5
16
  from msprobe.core.common.const import Const
6
- from msprobe.mindspore.common.const import FreeBenchmarkConst
7
- from msprobe.mindspore.common.log import logger
17
+ from msprobe.core.common.file_utils import load_json
18
+ from msprobe.core.common.utils import is_int
19
+ from msprobe.core.common_config import BaseConfig, CommonConfig
8
20
  from msprobe.core.grad_probe.constant import level_adp
9
21
  from msprobe.core.grad_probe.utils import check_numeral_list_ascend
22
+ from msprobe.mindspore.common.const import FreeBenchmarkConst
23
+ from msprobe.mindspore.common.log import logger
10
24
 
11
25
 
12
26
  class TensorConfig(BaseConfig):
@@ -18,9 +32,6 @@ class TensorConfig(BaseConfig):
18
32
  self._check_config()
19
33
 
20
34
  def _check_config(self):
21
- if self.data_mode is not None and len(self.data_mode) > 0:
22
- if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
23
- raise Exception("data_mode must be all, input or output")
24
35
  if self.file_format and self.file_format not in ["npy", "bin"]:
25
36
  raise Exception("file_format is invalid")
26
37
 
@@ -34,9 +45,6 @@ class StatisticsConfig(BaseConfig):
34
45
  self._check_config()
35
46
 
36
47
  def _check_config(self):
37
- if self.data_mode is not None and len(self.data_mode) > 0:
38
- if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
39
- raise Exception("data_mode must be all, input or output")
40
48
  if self.summary_mode and self.summary_mode not in ["statistics", "md5"]:
41
49
  raise Exception("summary_mode is invalid")
42
50
 
@@ -48,7 +56,7 @@ class OverflowCheckConfig(BaseConfig):
48
56
  self._check_config()
49
57
 
50
58
  def _check_config(self):
51
- if self.overflow_nums is not None and not isinstance(self.overflow_nums, int):
59
+ if self.overflow_nums is not None and not is_int(self.overflow_nums):
52
60
  raise Exception("overflow_nums is invalid, it should be an integer")
53
61
  if self.overflow_nums is not None and self.overflow_nums != -1 and self.overflow_nums <= 0:
54
62
  raise Exception("overflow_nums should be -1 or positive integer")
@@ -72,7 +80,7 @@ class FreeBenchmarkConfig(BaseConfig):
72
80
  if self.fuzz_level and self.fuzz_level not in FreeBenchmarkConst.DUMP_LEVEL_LIST:
73
81
  raise Exception("fuzz_level must be L1 or empty")
74
82
  if self.fuzz_stage and self.fuzz_stage not in FreeBenchmarkConst.STAGE_LIST:
75
- raise Exception("fuzz_stage must be forward or empty")
83
+ raise Exception("fuzz_stage must be forward, backward or empty")
76
84
  if self.if_preheat or self.preheat_step or self.max_sample:
77
85
  logger.warning("'if_preheat', 'preheat_step' and 'max_sample' settings "
78
86
  "are not supported for mindspore free benchmark task.")
@@ -119,8 +127,7 @@ def parse_task_config(task, json_config):
119
127
  def parse_json_config(json_file_path):
120
128
  if not json_file_path:
121
129
  raise Exception("json file path is None")
122
- with FileOpen(json_file_path, 'r') as file:
123
- json_config = json.load(file)
130
+ json_config = load_json(json_file_path)
124
131
  common_config = parse_common_config(json_config)
125
132
  if not common_config.task:
126
133
  common_config.task = Const.STATISTICS
@@ -13,10 +13,9 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- import json
17
16
  import os
18
17
 
19
- from msprobe.core.common.file_utils import FileOpen, create_directory
18
+ from msprobe.core.common.file_utils import create_directory, save_json
20
19
  from msprobe.mindspore.common.log import logger
21
20
  from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
22
21
 
@@ -52,8 +51,7 @@ class KernelGraphOverflowCheck:
52
51
  json_path = self.dump_json["common_dump_settings"]["path"]
53
52
  create_directory(json_path)
54
53
  json_path = os.path.join(json_path, "kernel_graph_overflow_check.json")
55
- with FileOpen(json_path, 'w') as f:
56
- json.dump(self.dump_json, f)
54
+ save_json(json_path, self.dump_json, indent=4)
57
55
  logger.info(json_path + " has been created.")
58
56
  os.environ["MINDSPORE_DUMP_CONFIG"] = json_path
59
57
  if "MS_ACL_DUMP_CFG_PATH" in os.environ:
@@ -1,3 +1,18 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
1
16
  class Runtime:
2
17
  step_count: int = 0
3
18
  rank_id: int = -1