mindstudio-probe 1.0.4__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/METADATA +5 -5
  2. mindstudio_probe-1.1.1.dist-info/RECORD +341 -0
  3. {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/WHEEL +1 -1
  4. {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/entry_points.txt +0 -1
  5. msprobe/README.md +84 -18
  6. msprobe/__init__.py +16 -1
  7. msprobe/config.json +1 -5
  8. msprobe/core/advisor/advisor.py +16 -11
  9. msprobe/core/advisor/advisor_const.py +6 -7
  10. msprobe/core/advisor/advisor_result.py +12 -12
  11. msprobe/core/common/const.py +164 -3
  12. msprobe/core/common/exceptions.py +26 -4
  13. msprobe/core/common/file_utils.py +196 -27
  14. msprobe/core/common/inplace_op_checker.py +53 -0
  15. msprobe/core/common/inplace_ops.yaml +251 -0
  16. msprobe/core/common/log.py +46 -18
  17. msprobe/core/common/utils.py +308 -209
  18. msprobe/core/common_config.py +60 -38
  19. msprobe/core/compare/acc_compare.py +332 -94
  20. msprobe/core/compare/check.py +104 -22
  21. msprobe/core/compare/compare_cli.py +42 -5
  22. msprobe/core/compare/highlight.py +162 -57
  23. msprobe/core/compare/layer_mapping/__init__.py +19 -0
  24. msprobe/core/compare/layer_mapping/data_scope_parser.py +235 -0
  25. msprobe/core/compare/layer_mapping/layer_mapping.py +242 -0
  26. msprobe/core/compare/layer_mapping/postprocess_pass.py +94 -0
  27. msprobe/core/compare/multiprocessing_compute.py +33 -8
  28. msprobe/core/compare/npy_compare.py +73 -29
  29. msprobe/core/compare/utils.py +306 -247
  30. msprobe/core/data_dump/data_collector.py +44 -43
  31. msprobe/core/data_dump/data_processor/base.py +88 -35
  32. msprobe/core/data_dump/data_processor/factory.py +20 -3
  33. msprobe/core/data_dump/data_processor/mindspore_processor.py +14 -8
  34. msprobe/core/data_dump/data_processor/pytorch_processor.py +180 -66
  35. msprobe/core/data_dump/json_writer.py +63 -42
  36. msprobe/core/data_dump/scope.py +143 -48
  37. msprobe/core/grad_probe/constant.py +31 -13
  38. msprobe/core/grad_probe/grad_compare.py +20 -4
  39. msprobe/core/grad_probe/utils.py +44 -3
  40. msprobe/core/overflow_check/abnormal_scene.py +185 -0
  41. msprobe/core/overflow_check/api_info.py +55 -0
  42. msprobe/core/overflow_check/checker.py +138 -0
  43. msprobe/core/overflow_check/filter.py +157 -0
  44. msprobe/core/overflow_check/ignore_rules.yaml +55 -0
  45. msprobe/core/overflow_check/level.py +22 -0
  46. msprobe/core/overflow_check/utils.py +28 -0
  47. msprobe/docs/01.installation.md +29 -9
  48. msprobe/docs/02.config_introduction.md +83 -84
  49. msprobe/docs/03.config_examples.md +3 -20
  50. msprobe/docs/04.kernel_dump_PyTorch.md +73 -0
  51. msprobe/docs/05.data_dump_PyTorch.md +143 -13
  52. msprobe/docs/06.data_dump_MindSpore.md +197 -88
  53. msprobe/docs/07.accuracy_checker_PyTorch.md +69 -46
  54. msprobe/docs/08.accuracy_checker_online_PyTorch.md +52 -17
  55. msprobe/docs/09.accuracy_checker_MindSpore.md +51 -15
  56. msprobe/docs/10.accuracy_compare_PyTorch.md +187 -99
  57. msprobe/docs/11.accuracy_compare_MindSpore.md +253 -31
  58. msprobe/docs/12.overflow_check_PyTorch.md +1 -1
  59. msprobe/docs/13.overflow_check_MindSpore.md +6 -6
  60. msprobe/docs/15.free_benchmarking_PyTorch.md +60 -55
  61. msprobe/docs/16.free_benchmarking_MindSpore.md +159 -0
  62. msprobe/docs/17.grad_probe.md +19 -22
  63. msprobe/docs/18.online_dispatch.md +89 -0
  64. msprobe/docs/19.monitor.md +468 -0
  65. msprobe/docs/20.monitor_performance_baseline.md +52 -0
  66. msprobe/docs/21.visualization_PyTorch.md +386 -0
  67. msprobe/docs/22.visualization_MindSpore.md +384 -0
  68. msprobe/docs/23.tool_function_introduction.md +28 -0
  69. msprobe/docs/{FAQ_PyTorch.md → FAQ.md} +25 -10
  70. msprobe/docs/data_dump_Mindspore/dynamic_graph_quick_start_example.md +211 -0
  71. msprobe/docs/img/compare_result.png +0 -0
  72. msprobe/docs/img/monitor/cpu_info.png +0 -0
  73. msprobe/docs/img/ms_dump.png +0 -0
  74. msprobe/docs/img/ms_layer.png +0 -0
  75. msprobe/docs/img/pt_dump.png +0 -0
  76. msprobe/mindspore/__init__.py +16 -0
  77. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +130 -138
  78. msprobe/mindspore/api_accuracy_checker/api_info.py +27 -5
  79. msprobe/mindspore/api_accuracy_checker/api_runner.py +43 -18
  80. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +21 -7
  81. msprobe/mindspore/api_accuracy_checker/checker_support_api.yaml +77 -0
  82. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +63 -1
  83. msprobe/mindspore/api_accuracy_checker/compute_element.py +59 -24
  84. msprobe/mindspore/api_accuracy_checker/data_manager.py +264 -0
  85. msprobe/mindspore/api_accuracy_checker/main.py +27 -3
  86. msprobe/mindspore/api_accuracy_checker/multi_api_accuracy_checker.py +206 -0
  87. msprobe/mindspore/api_accuracy_checker/multi_data_manager.py +58 -0
  88. msprobe/mindspore/api_accuracy_checker/type_mapping.py +22 -5
  89. msprobe/mindspore/api_accuracy_checker/utils.py +34 -17
  90. msprobe/mindspore/cell_processor.py +58 -13
  91. msprobe/mindspore/common/const.py +35 -13
  92. msprobe/mindspore/common/log.py +5 -9
  93. msprobe/mindspore/common/utils.py +60 -5
  94. msprobe/mindspore/compare/distributed_compare.py +15 -28
  95. msprobe/mindspore/compare/ms_compare.py +319 -158
  96. msprobe/mindspore/compare/ms_graph_compare.py +99 -49
  97. msprobe/mindspore/debugger/debugger_config.py +20 -14
  98. msprobe/mindspore/debugger/precision_debugger.py +43 -13
  99. msprobe/mindspore/dump/dump_tool_factory.py +18 -1
  100. msprobe/mindspore/dump/hook_cell/api_registry.py +23 -3
  101. msprobe/mindspore/dump/hook_cell/primitive_hooks.py +203 -0
  102. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +107 -10
  103. msprobe/mindspore/dump/hook_cell/wrap_api.py +21 -13
  104. msprobe/mindspore/dump/jit_dump.py +56 -20
  105. msprobe/mindspore/dump/kernel_graph_dump.py +19 -5
  106. msprobe/mindspore/dump/kernel_kbyk_dump.py +19 -6
  107. msprobe/mindspore/dym_loader/hook_dynamic_loader.cc +140 -0
  108. msprobe/mindspore/dym_loader/hook_dynamic_loader.h +53 -0
  109. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +162 -41
  110. msprobe/mindspore/free_benchmark/common/config.py +15 -0
  111. msprobe/mindspore/free_benchmark/common/handler_params.py +15 -1
  112. msprobe/mindspore/free_benchmark/common/utils.py +37 -8
  113. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +0 -204
  114. msprobe/mindspore/free_benchmark/handler/base_handler.py +20 -5
  115. msprobe/mindspore/free_benchmark/handler/check_handler.py +21 -7
  116. msprobe/mindspore/free_benchmark/handler/fix_handler.py +18 -3
  117. msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -6
  118. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +23 -8
  119. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +29 -5
  120. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +25 -10
  121. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +45 -19
  122. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +29 -8
  123. msprobe/mindspore/free_benchmark/perturbation/no_change.py +16 -1
  124. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +22 -7
  125. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +17 -2
  126. msprobe/mindspore/grad_probe/global_context.py +44 -14
  127. msprobe/mindspore/grad_probe/grad_analyzer.py +27 -13
  128. msprobe/mindspore/grad_probe/grad_monitor.py +16 -1
  129. msprobe/mindspore/grad_probe/grad_stat_csv.py +33 -5
  130. msprobe/mindspore/grad_probe/hook.py +24 -10
  131. msprobe/mindspore/grad_probe/utils.py +18 -5
  132. msprobe/mindspore/ms_config.py +22 -15
  133. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +20 -6
  134. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +15 -0
  135. msprobe/mindspore/runtime.py +15 -0
  136. msprobe/mindspore/service.py +75 -150
  137. msprobe/mindspore/task_handler_factory.py +15 -0
  138. msprobe/msprobe.py +24 -7
  139. msprobe/pytorch/__init__.py +23 -3
  140. msprobe/pytorch/api_accuracy_checker/common/config.py +81 -2
  141. msprobe/pytorch/api_accuracy_checker/common/utils.py +53 -21
  142. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +19 -2
  143. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +50 -25
  144. msprobe/pytorch/api_accuracy_checker/compare/compare.py +51 -21
  145. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +23 -6
  146. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +28 -8
  147. msprobe/pytorch/api_accuracy_checker/config.yaml +1 -1
  148. msprobe/pytorch/api_accuracy_checker/generate_op_script/config_op.json +9 -0
  149. msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +454 -0
  150. msprobe/pytorch/api_accuracy_checker/generate_op_script/operator_replication.template +365 -0
  151. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +73 -33
  152. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +44 -18
  153. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +32 -11
  154. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +122 -172
  155. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +158 -4
  156. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +30 -24
  157. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +68 -31
  158. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +27 -4
  159. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/dump_dispatch.py +115 -0
  160. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +26 -9
  161. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/torch_ops_config.yaml +63 -0
  162. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py +44 -0
  163. msprobe/pytorch/bench_functions/__init__.py +18 -3
  164. msprobe/pytorch/bench_functions/apply_adam_w.py +15 -0
  165. msprobe/pytorch/bench_functions/confusion_transpose.py +20 -1
  166. msprobe/pytorch/bench_functions/fast_gelu.py +15 -0
  167. msprobe/pytorch/bench_functions/layer_norm_eval.py +15 -0
  168. msprobe/pytorch/bench_functions/linear.py +15 -0
  169. msprobe/pytorch/bench_functions/matmul_backward.py +33 -6
  170. msprobe/pytorch/bench_functions/npu_fusion_attention.py +280 -157
  171. msprobe/pytorch/bench_functions/rms_norm.py +15 -0
  172. msprobe/pytorch/bench_functions/rotary_mul.py +32 -9
  173. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +15 -0
  174. msprobe/pytorch/bench_functions/swiglu.py +29 -6
  175. msprobe/pytorch/common/__init__.py +15 -0
  176. msprobe/pytorch/common/log.py +18 -6
  177. msprobe/pytorch/common/parse_json.py +31 -16
  178. msprobe/pytorch/common/utils.py +96 -40
  179. msprobe/pytorch/compare/distributed_compare.py +13 -14
  180. msprobe/pytorch/compare/match.py +15 -0
  181. msprobe/pytorch/compare/pt_compare.py +44 -10
  182. msprobe/pytorch/debugger/debugger_config.py +69 -52
  183. msprobe/pytorch/debugger/precision_debugger.py +72 -24
  184. msprobe/pytorch/dump/kernel_dump/kernel_config.py +33 -0
  185. msprobe/pytorch/free_benchmark/__init__.py +20 -5
  186. msprobe/pytorch/free_benchmark/common/constant.py +15 -0
  187. msprobe/pytorch/free_benchmark/common/counter.py +15 -0
  188. msprobe/pytorch/free_benchmark/common/enums.py +43 -0
  189. msprobe/pytorch/free_benchmark/common/params.py +23 -1
  190. msprobe/pytorch/free_benchmark/common/utils.py +43 -5
  191. msprobe/pytorch/free_benchmark/compare/grad_saver.py +47 -9
  192. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +17 -0
  193. msprobe/pytorch/free_benchmark/main.py +19 -4
  194. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +15 -0
  195. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +19 -4
  196. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +18 -1
  197. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +21 -4
  198. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +28 -2
  199. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +19 -0
  200. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +15 -0
  201. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +15 -0
  202. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +15 -0
  203. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +65 -16
  204. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +15 -0
  205. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +21 -5
  206. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +15 -0
  207. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +19 -4
  208. msprobe/pytorch/function_factory.py +17 -2
  209. msprobe/pytorch/functional/module_dump.py +84 -0
  210. msprobe/pytorch/grad_probe/grad_monitor.py +23 -6
  211. msprobe/pytorch/grad_probe/grad_stat_csv.py +40 -10
  212. msprobe/pytorch/hook_module/__init__.py +16 -1
  213. msprobe/pytorch/hook_module/api_registry.py +13 -8
  214. msprobe/pytorch/hook_module/hook_module.py +17 -19
  215. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1 -0
  216. msprobe/pytorch/hook_module/utils.py +4 -6
  217. msprobe/pytorch/hook_module/wrap_aten.py +12 -11
  218. msprobe/pytorch/hook_module/wrap_distributed.py +6 -7
  219. msprobe/pytorch/hook_module/wrap_functional.py +21 -20
  220. msprobe/pytorch/hook_module/wrap_npu_custom.py +9 -17
  221. msprobe/pytorch/hook_module/wrap_tensor.py +4 -6
  222. msprobe/pytorch/hook_module/wrap_torch.py +4 -6
  223. msprobe/pytorch/hook_module/wrap_vf.py +4 -6
  224. msprobe/pytorch/module_processer.py +18 -6
  225. msprobe/pytorch/monitor/anomaly_analyse.py +201 -0
  226. msprobe/pytorch/monitor/anomaly_detect.py +340 -0
  227. msprobe/pytorch/monitor/distributed/distributed_ops.yaml +19 -0
  228. msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +5 -0
  229. msprobe/pytorch/monitor/distributed/wrap_distributed.py +272 -0
  230. msprobe/pytorch/monitor/features.py +108 -0
  231. msprobe/pytorch/monitor/module_hook.py +870 -0
  232. msprobe/pytorch/monitor/module_metric.py +193 -0
  233. msprobe/pytorch/monitor/module_spec_verifier.py +93 -0
  234. msprobe/pytorch/monitor/optimizer_collect.py +295 -0
  235. msprobe/pytorch/monitor/unittest/__init__.py +0 -0
  236. msprobe/pytorch/monitor/unittest/test_monitor.py +145 -0
  237. msprobe/pytorch/monitor/utils.py +250 -0
  238. msprobe/pytorch/monitor/visualizer.py +59 -0
  239. msprobe/pytorch/online_dispatch/__init__.py +2 -3
  240. msprobe/pytorch/online_dispatch/compare.py +38 -48
  241. msprobe/pytorch/online_dispatch/dispatch.py +50 -25
  242. msprobe/pytorch/online_dispatch/dump_compare.py +21 -9
  243. msprobe/pytorch/online_dispatch/single_compare.py +60 -39
  244. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +9 -1
  245. msprobe/pytorch/online_dispatch/utils.py +48 -23
  246. msprobe/pytorch/parse.py +15 -0
  247. msprobe/pytorch/parse_tool/cli.py +5 -6
  248. msprobe/pytorch/parse_tool/lib/compare.py +19 -26
  249. msprobe/pytorch/parse_tool/lib/config.py +1 -1
  250. msprobe/pytorch/parse_tool/lib/parse_tool.py +4 -2
  251. msprobe/pytorch/parse_tool/lib/utils.py +40 -55
  252. msprobe/pytorch/parse_tool/lib/visualization.py +3 -1
  253. msprobe/pytorch/pt_config.py +192 -40
  254. msprobe/pytorch/service.py +110 -35
  255. msprobe/visualization/__init__.py +14 -0
  256. msprobe/visualization/builder/__init__.py +14 -0
  257. msprobe/visualization/builder/graph_builder.py +165 -0
  258. msprobe/visualization/builder/msprobe_adapter.py +205 -0
  259. msprobe/visualization/compare/__init__.py +14 -0
  260. msprobe/visualization/compare/graph_comparator.py +130 -0
  261. msprobe/visualization/compare/mode_adapter.py +211 -0
  262. msprobe/visualization/graph/__init__.py +14 -0
  263. msprobe/visualization/graph/base_node.py +124 -0
  264. msprobe/visualization/graph/graph.py +200 -0
  265. msprobe/visualization/graph/node_colors.py +95 -0
  266. msprobe/visualization/graph/node_op.py +39 -0
  267. msprobe/visualization/graph_service.py +214 -0
  268. msprobe/visualization/utils.py +232 -0
  269. mindstudio_probe-1.0.4.dist-info/RECORD +0 -276
  270. msprobe/docs/04.acl_config_examples.md +0 -76
  271. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +0 -43
  272. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +0 -107
  273. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +0 -10
  274. msprobe/pytorch/functional/dump_module.py +0 -39
  275. {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/LICENSE +0 -0
  276. {mindstudio_probe-1.0.4.dist-info → mindstudio_probe-1.1.1.dist-info}/top_level.txt +0 -0
  277. /msprobe/{mindspore/free_benchmark/decorator → pytorch/monitor}/__init__.py +0 -0
  278. /msprobe/pytorch/{functional/data_processor.py → monitor/distributed/__init__.py} +0 -0
@@ -1,8 +1,23 @@
1
- from abc import ABC, abstractmethod
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
2
16
  import hashlib
17
+ from abc import ABC, abstractmethod
3
18
 
4
19
  import mindspore
5
- from mindspore import ops, Tensor
20
+ from mindspore import ops
6
21
  from msprobe.core.grad_probe.constant import GradConst
7
22
 
8
23
 
@@ -12,6 +27,7 @@ class CsvInput:
12
27
  self.grad = grad
13
28
  self.bounds = bounds
14
29
 
30
+
15
31
  class GradStatCsv:
16
32
  csv = {}
17
33
 
@@ -52,9 +68,11 @@ class CsvItem(ABC):
52
68
 
53
69
  @register_csv_item(GradConst.MD5)
54
70
  class CsvMd5(CsvItem):
71
+ @staticmethod
55
72
  def generate_csv_header(csv_input):
56
73
  return ["MD5"]
57
74
 
75
+ @staticmethod
58
76
  def generate_csv_content(csv_input):
59
77
  grad = csv_input.grad
60
78
  tensor_bytes = grad.float().numpy().tobytes()
@@ -64,19 +82,21 @@ class CsvMd5(CsvItem):
64
82
 
65
83
  @register_csv_item(GradConst.DISTRIBUTION)
66
84
  class CsvDistribution(CsvItem):
85
+ @staticmethod
67
86
  def generate_csv_header(csv_input):
68
87
  bounds = csv_input.bounds
69
88
  intervals = []
70
89
  if bounds:
71
90
  intervals.append(f"(-inf, {bounds[0]}]")
72
91
  for i in range(1, len(bounds)):
73
- intervals.append(f"({bounds[i-1]}, {bounds[i]}]")
92
+ intervals.append(f"({bounds[i - 1]}, {bounds[i]}]")
74
93
  if intervals:
75
94
  intervals.append(f"({bounds[-1]}, inf)")
76
95
  intervals.append("=0")
77
-
96
+
78
97
  return intervals
79
98
 
99
+ @staticmethod
80
100
  def generate_csv_content(csv_input):
81
101
  grad = csv_input.grad
82
102
  bounds = csv_input.bounds
@@ -94,9 +114,11 @@ class CsvDistribution(CsvItem):
94
114
 
95
115
  @register_csv_item(GradConst.MAX)
96
116
  class CsvMax(CsvItem):
117
+ @staticmethod
97
118
  def generate_csv_header(csv_input):
98
119
  return ["max"]
99
120
 
121
+ @staticmethod
100
122
  def generate_csv_content(csv_input):
101
123
  grad = csv_input.grad
102
124
  return [ops.amax(grad).float().numpy().tolist()]
@@ -104,9 +126,11 @@ class CsvMax(CsvItem):
104
126
 
105
127
  @register_csv_item(GradConst.MIN)
106
128
  class CsvMin(CsvItem):
129
+ @staticmethod
107
130
  def generate_csv_header(csv_input):
108
131
  return ["min"]
109
132
 
133
+ @staticmethod
110
134
  def generate_csv_content(csv_input):
111
135
  grad = csv_input.grad
112
136
  return [ops.amin(grad).float().numpy().tolist()]
@@ -114,9 +138,11 @@ class CsvMin(CsvItem):
114
138
 
115
139
  @register_csv_item(GradConst.NORM)
116
140
  class CsvNorm(CsvItem):
141
+ @staticmethod
117
142
  def generate_csv_header(csv_input):
118
143
  return ["norm"]
119
144
 
145
+ @staticmethod
120
146
  def generate_csv_content(csv_input):
121
147
  grad = csv_input.grad
122
148
  return [ops.norm(grad).float().numpy().tolist()]
@@ -124,9 +150,11 @@ class CsvNorm(CsvItem):
124
150
 
125
151
  @register_csv_item(GradConst.SHAPE)
126
152
  class CsvShape(CsvItem):
153
+ @staticmethod
127
154
  def generate_csv_header(csv_input):
128
155
  return ["shape"]
129
156
 
157
+ @staticmethod
130
158
  def generate_csv_content(csv_input):
131
159
  grad = csv_input.grad
132
- return [list(grad.shape)]
160
+ return [list(grad.shape)]
@@ -1,25 +1,37 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
1
15
 
2
16
  import os
3
17
 
4
18
  import mindspore
5
19
  import mindspore as ms
6
20
  from mindspore.common.api import jit
7
- from mindspore.nn.optim.optimizer import Optimizer
8
- from mindspore.common.parameter import Parameter
9
21
  from mindspore.common.initializer import initializer
10
-
22
+ from mindspore.common.parameter import Parameter
23
+ from mindspore.nn.optim.optimizer import Optimizer
24
+ from msprobe.core.common.file_utils import remove_path, write_csv, create_directory
11
25
  from msprobe.core.grad_probe.constant import GradConst
12
26
  from msprobe.mindspore.common.log import logger
13
-
14
- from msprobe.core.common.file_utils import remove_path, write_csv, create_directory
15
27
  from msprobe.mindspore.grad_probe.global_context import grad_context
16
- from msprobe.mindspore.grad_probe.grad_analyzer import grad_dump, get_rank_id
17
28
  from msprobe.mindspore.grad_probe.grad_analyzer import csv_generator
29
+ from msprobe.mindspore.grad_probe.grad_analyzer import grad_dump, get_rank_id
18
30
  from msprobe.mindspore.grad_probe.grad_stat_csv import GradStatCsv, CsvInput
19
31
  from msprobe.mindspore.grad_probe.utils import save_grad_direction, get_adapted_level
20
32
 
21
- class HookInput:
22
33
 
34
+ class HookInput:
23
35
  '''
24
36
  HookInput is a class wrapping all the variables used for hooking optimizer
25
37
  '''
@@ -40,6 +52,7 @@ class HookInput:
40
52
  self.bounds = grad_context.get_context(GradConst.BOUNDS)
41
53
  self.mode = mindspore.get_context("mode")
42
54
 
55
+
43
56
  def hook_graph_mode_optimizer(opt, hook_input):
44
57
  @jit
45
58
  def new_construct(self, gradients):
@@ -47,7 +60,7 @@ def hook_graph_mode_optimizer(opt, hook_input):
47
60
  if hook_input.param_list and hook_input.g_names[index] not in hook_input.param_list:
48
61
  continue
49
62
  grad_dump(hook_input.dump_dir, hook_input.g_names[index], self.dump_step,
50
- grad_value, hook_input.level, hook_input.bounds)
63
+ grad_value, hook_input.level, hook_input.bounds)
51
64
  ms.ops.TensorDump()(hook_input.step_finish_flag, self.dump_step)
52
65
  self.assignadd(self.dump_step, self.global_step_increase_tensor)
53
66
  out = hook_input.func(gradients)
@@ -57,11 +70,12 @@ def hook_graph_mode_optimizer(opt, hook_input):
57
70
  opt.construct = new_construct.__get__(opt, type(opt))
58
71
  csv_generator.start()
59
72
 
73
+
60
74
  def hook_pynative_optimizer(opt, hook_input):
61
75
  level_adapted = get_adapted_level(hook_input.level)
62
76
 
63
- def hook_fn(cell, input):
64
- gradients, = input
77
+ def hook_fn(cell, input_data):
78
+ gradients, = input_data
65
79
  cur_step = grad_context.get_context(GradConst.CURRENT_STEP)
66
80
  if grad_context.step_need_dump(cur_step) and grad_context.rank_need_dump(hook_input.rank_id):
67
81
  create_directory(hook_input.save_dir)
@@ -1,12 +1,26 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
1
16
  import os
2
17
 
3
18
  import mindspore
4
- from msprobe.core.grad_probe.constant import level_adp
5
- from msprobe.core.grad_probe.utils import check_param
6
19
  from msprobe.core.common.file_utils import (create_directory,
7
- check_path_before_create,
8
20
  check_file_or_directory_path,
9
21
  save_npy)
22
+ from msprobe.core.grad_probe.constant import level_adp
23
+ from msprobe.core.grad_probe.utils import check_param
10
24
 
11
25
 
12
26
  def save_grad_direction(param_name, grad, save_path):
@@ -15,7 +29,6 @@ def save_grad_direction(param_name, grad, save_path):
15
29
  check_file_or_directory_path(save_path, isdir=True)
16
30
  check_param(param_name)
17
31
  save_filepath = os.path.join(save_path, f"{param_name}.npy")
18
- check_path_before_create(save_filepath)
19
32
 
20
33
  if grad.dtype == mindspore.bfloat16:
21
34
  grad = grad.to(mindspore.float32)
@@ -27,4 +40,4 @@ def save_grad_direction(param_name, grad, save_path):
27
40
 
28
41
  def get_adapted_level(level: str):
29
42
  level_adapted = level_adp.get(level)
30
- return level_adapted
43
+ return level_adapted
@@ -1,12 +1,26 @@
1
- import json
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
2
15
 
3
- from msprobe.core.common_config import CommonConfig, BaseConfig
4
- from msprobe.core.common.file_utils import FileOpen
5
16
  from msprobe.core.common.const import Const
6
- from msprobe.mindspore.common.const import FreeBenchmarkConst
7
- from msprobe.mindspore.common.log import logger
17
+ from msprobe.core.common.file_utils import load_json
18
+ from msprobe.core.common.utils import is_int
19
+ from msprobe.core.common_config import BaseConfig, CommonConfig
8
20
  from msprobe.core.grad_probe.constant import level_adp
9
21
  from msprobe.core.grad_probe.utils import check_numeral_list_ascend
22
+ from msprobe.mindspore.common.const import FreeBenchmarkConst
23
+ from msprobe.mindspore.common.log import logger
10
24
 
11
25
 
12
26
  class TensorConfig(BaseConfig):
@@ -18,9 +32,6 @@ class TensorConfig(BaseConfig):
18
32
  self._check_config()
19
33
 
20
34
  def _check_config(self):
21
- if self.data_mode is not None and len(self.data_mode) > 0:
22
- if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
23
- raise Exception("data_mode must be all, input or output")
24
35
  if self.file_format and self.file_format not in ["npy", "bin"]:
25
36
  raise Exception("file_format is invalid")
26
37
 
@@ -34,9 +45,6 @@ class StatisticsConfig(BaseConfig):
34
45
  self._check_config()
35
46
 
36
47
  def _check_config(self):
37
- if self.data_mode is not None and len(self.data_mode) > 0:
38
- if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]:
39
- raise Exception("data_mode must be all, input or output")
40
48
  if self.summary_mode and self.summary_mode not in ["statistics", "md5"]:
41
49
  raise Exception("summary_mode is invalid")
42
50
 
@@ -48,7 +56,7 @@ class OverflowCheckConfig(BaseConfig):
48
56
  self._check_config()
49
57
 
50
58
  def _check_config(self):
51
- if self.overflow_nums is not None and not isinstance(self.overflow_nums, int):
59
+ if self.overflow_nums is not None and not is_int(self.overflow_nums):
52
60
  raise Exception("overflow_nums is invalid, it should be an integer")
53
61
  if self.overflow_nums is not None and self.overflow_nums != -1 and self.overflow_nums <= 0:
54
62
  raise Exception("overflow_nums should be -1 or positive integer")
@@ -72,7 +80,7 @@ class FreeBenchmarkConfig(BaseConfig):
72
80
  if self.fuzz_level and self.fuzz_level not in FreeBenchmarkConst.DUMP_LEVEL_LIST:
73
81
  raise Exception("fuzz_level must be L1 or empty")
74
82
  if self.fuzz_stage and self.fuzz_stage not in FreeBenchmarkConst.STAGE_LIST:
75
- raise Exception("fuzz_stage must be forward or empty")
83
+ raise Exception("fuzz_stage must be forward, backward or empty")
76
84
  if self.if_preheat or self.preheat_step or self.max_sample:
77
85
  logger.warning("'if_preheat', 'preheat_step' and 'max_sample' settings "
78
86
  "are not supported for mindspore free benchmark task.")
@@ -119,8 +127,7 @@ def parse_task_config(task, json_config):
119
127
  def parse_json_config(json_file_path):
120
128
  if not json_file_path:
121
129
  raise Exception("json file path is None")
122
- with FileOpen(json_file_path, 'r') as file:
123
- json_config = json.load(file)
130
+ json_config = load_json(json_file_path)
124
131
  common_config = parse_common_config(json_config)
125
132
  if not common_config.task:
126
133
  common_config.task = Const.STATISTICS
@@ -1,8 +1,23 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
1
16
  import os
2
- import json
3
- from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
17
+
18
+ from msprobe.core.common.file_utils import create_directory, save_json
4
19
  from msprobe.mindspore.common.log import logger
5
- from msprobe.core.common.file_utils import FileOpen, create_directory
20
+ from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
6
21
 
7
22
 
8
23
  class KernelGraphOverflowCheck:
@@ -16,7 +31,7 @@ class KernelGraphOverflowCheck:
16
31
  self.dump_json["common_dump_settings"]["saved_data"] = "full"
17
32
  self.dump_json["common_dump_settings"]["input_output"] = 0
18
33
  self.dump_json["common_dump_settings"]["kernels"] = []
19
- self.dump_json["common_dump_settings"]["support_device"] = [0,1,2,3,4,5,6,7]
34
+ self.dump_json["common_dump_settings"]["support_device"] = [0, 1, 2, 3, 4, 5, 6, 7]
20
35
  self.dump_json["common_dump_settings"]["op_debug_mode"] = 3
21
36
  self.dump_json["common_dump_settings"]["file_format"] = "npy"
22
37
 
@@ -36,8 +51,7 @@ class KernelGraphOverflowCheck:
36
51
  json_path = self.dump_json["common_dump_settings"]["path"]
37
52
  create_directory(json_path)
38
53
  json_path = os.path.join(json_path, "kernel_graph_overflow_check.json")
39
- with FileOpen(json_path, 'w') as f:
40
- json.dump(self.dump_json, f)
54
+ save_json(json_path, self.dump_json, indent=4)
41
55
  logger.info(json_path + " has been created.")
42
56
  os.environ["MINDSPORE_DUMP_CONFIG"] = json_path
43
57
  if "MS_ACL_DUMP_CFG_PATH" in os.environ:
@@ -1,3 +1,18 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
1
16
  from msprobe.mindspore.common.const import Const
2
17
  from msprobe.mindspore.debugger.debugger_config import DebuggerConfig
3
18
  from msprobe.mindspore.overflow_check.kernel_graph_overflow_check import KernelGraphOverflowCheck
@@ -1,3 +1,18 @@
1
+ # Copyright (c) 2024-2024, Huawei Technologies Co., Ltd.
2
+ # All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
1
16
  class Runtime:
2
17
  step_count: int = 0
3
18
  rank_id: int = -1